Esempio n. 1
0
def logistic_skin():
    print("\nLogistic Regression for Skin Cancer data:\n")
    x_train, x_test, y_train, y_test = get_data_skin()
    logistic = Logistic(x_train, y_train)
    y_pred = logistic.predict(x_train)
    print("\nTraining Classification accuracy: ")
    print(100 - 100 * np.sum(np.abs(y_pred - y_train)) / y_pred.shape[0])
    confusionMatrix(y_train, y_pred)
    y_pred = logistic.predict(x_test)
    print("\nTesting Classification accuracy: ")
    print(100 - 100 * np.sum(np.abs(y_pred - y_test)) / y_pred.shape[0])
    confusionMatrix(y_test, y_pred)
    print("ROC Curve: ")
    plot_roc_curve(y_test, y_pred)
Esempio n. 2
0
def run_all(gpu=False, double_precision=False):

    print("\nLasso.")
    print "Solve time:\t{:.2e} seconds\n".format(
        Lasso(200, 2000, gpu=gpu, double_precision=double_precision))

    print("\nLasso Path.")
    print "Solve time:\t{:.2e} seconds\n".format(
        LassoPath(200, 1000, gpu=gpu, double_precision=double_precision))

    print("\nLogistic Regression.")
    print "Solve time:\t{:.2e} seconds\n".format(
        Logistic(1000, 100, gpu=gpu, double_precision=double_precision))

    print("\nLinear Program in Equality Form.")
    print "Solve time:\t{:.2e} seconds\n".format(
        LpEq(1000, 200, gpu=gpu, double_precision=double_precision))

    print("\nLinear Program in Inequality Form.")
    print "Solve time:\t{:.2e} seconds\n".format(
        LpIneq(1000, 200, gpu=gpu, double_precision=double_precision))

    print("\nNon-Negative Least Squares.")
    print "Solve time:\t{:.2e} seconds\n".format(
        NonNegL2(1000, 200, gpu=gpu, double_precision=double_precision))

    print("\nSupport Vector Machine.")
    print "Solve time:\t{:.2e} seconds\n".format(
        Svm(1000, 200, gpu=gpu, double_precision=double_precision))
Esempio n. 3
0
 def __init__(self, m, n, k, eta, lambd):
     '''
     :param m: Number of fields
     :param n: Number of features
     :param k: Number of latent factors
     :param eta: learning rate
     :param lambd: regularization coefficient
     '''
     self.m = m
     self.n = n
     self.k = k
     #超参数
     self.eta = eta
     self.lambd = lambd
     #初始化三维权重矩阵w~U(0, 1/sqrt(k))
     self.w = np.random.rand(n, m, k) / math.sqrt(k)
     #初始化累积梯度平方和为,Adagrad时要用到,防止除0异常
     self.G = np.ones(shape=(n, m, k), dtype=np.float64)
     self.log = Logistic()
Esempio n. 4
0
class FFM(object):
    def __init__(self, m, n, k, eta, lambd):
        '''
        :param m: Number of fields
        :param n: Number of features
        :param k: Number of latent factors
        :param eta: learning rate
        :param lambd: regularization coefficient
        '''
        self.m = m
        self.n = n
        self.k = k
        #超参数
        self.eta = eta
        self.lambd = lambd
        #初始化三维权重矩阵w~U(0, 1/sqrt(k))
        self.w = np.random.rand(n, m, k) / math.sqrt(k)
        #初始化累积梯度平方和为,Adagrad时要用到,防止除0异常
        self.G = np.ones(shape=(n, m, k), dtype=np.float64)
        self.log = Logistic()

    def phi(self, node_list):
        '''
        特征组合式的线性加权求和
        :param node_list: 用链表存储x中的非0值
        :return 
        '''
        z = 0.0
        for a in xrange(len(node_list)):
            node1 = node_list[a]
            j1 = node1.j
            f1 = node1.f
            v1 = node1.v
            for b in xrange(a + 1, len(node_list)):
                node2 = node_list[b]
                j2 = node2.j
                f2 = node2.f
                v2 = node2.v
                w1 = self.w[j1, f2]
                w2 = self.w[j2, f1]
                z += np.dot(w1, w2) * v1 * v2
        return z

    def predict(self, node_list):
        '''
        输入x,预测y的值
        :param node_list: 用链表存储x中的非0值
        :return
        '''
        z = self.phi(node_list)
        y = self.log.decide_by_tanh(z)
        return y

    def sgd(self, node_list, y):
        '''
        根据一个样本来更新模型参数
        :param node_list:用链表存储x中的非0值
        :param y:正样本1,负样本:-1
        :return
        '''
        kappa = -y / (1 + math.exp(y * self.phi(node_list)))
        for a in xrange(len(node_list)):
            node1 = node_list[a]
            j1 = node1.j
            f1 = node1.f
            v1 = node1.v
            for b in xrange(a + 1, len(node_list)):
                node2 = node_list[b]
                j2 = node2.j
                f2 = node2.f
                v2 = node2.v
                c = kappa * v1 * v2
                #self.w[j1, f2]和self.w[j2, f1]是向量,导致g_j1_f2和g_j2_f1也是向量
                g_j1_f2 = self.lambd * self.w[j1, f2] + c * self.w[j2, f1]
                g_j2_f1 = self.lambd * self.w[j2, f1] + c * self.w[j1, f2]
                #计算各个维度上的梯度累积平方和
                self.G[j1, f2] += g_j1_f2**2  #所有G肯定是大于0的正数,因为初始化时G都为1
                self.G[j2, f1] += g_j2_f1**2
                #AdaGrad
                self.w[j1, f2] -= self.eta / np.sqrt(
                    self.G[j1, f2]) * g_j1_f2  #sqrt(G)作为分母,所以G必须是大于0的正数
                self.w[j2, f1] -= self.eta / np.sqrt(self.G[
                    j2,
                    f1]) * g_j2_f1  #math.sqrt()只能接收一个数字作为参数,而numpy.sqrt()可以接收
                #一个array作为参数,表示对array中的每个元素分别开方

    def train(self, sample_generator, max_echo, max_r2):
        '''
        根据一堆样本训练模型
        :param sample_generator:样本生成器,每次yield(node_list, y),node_list中存储的是x的非0值。通常x要事先做好归一化,即模长为1,
            这样精度会略微高一点
        :param max_echo:最大迭代次数
        :param max_r2:拟合系数r2达到阈值时即可终止学习
        :return
        '''
        for itr in xrange(max_echo):
            print("echo", itr)
            y_sum = 0.0
            y_square_sum = 0.0
            err_square_sum = 0.0  #误差平方和
            population = 0  #样本总数
            for node_lsit, y in sample_generator:
                y = 0.0 if y == -1 else y  #真实的y取值为{-1, 1},而预测的y位于(0,1),计算拟合效果时需要进行统一
                self.sgd(node_list, y)
                y_hat = self.predict(node_list)
                y_sum += y
                y_square_sum += y**2
                err_square_sum += (y - y_hat)**2
                population += 1
            var_y = y_square_sum - y_sum * y_sum / population  #y的方差
            r2 = 1 - err_square_sum / var_y
            print("r2=", r2)
            if r2 > max_r2:  #r2值越大说明拟合得越好
                print("r2 have reach", r2)
                break

    def save_model(self, outfile):
        '''
        序列化模型
        :param outfile
        :return
        '''
        np.save(outfile, self.w)

    def load_model(self, infile):
        '''
        加载模型
        :param infile
        :return
        '''
        self.w = np.load(infile)
Esempio n. 5
0
model_type = 'dnn'
test_epoch = 9
maxseq_length = 100
embedding_size = 300
batch_size = 32
keep_prob = 1.0

test_data = read_data('data/test.txt')
test_data = np.array(test_data)
test_X = test_data[:,0]
test_Y = test_data[:,[-1]]

word2vec = word2vec_load()

if model_type == 'logistic':
    model = Logistic(maxseq_length, embedding_size)
elif model_type == 'dnn':
    model = DNN(maxseq_length, embedding_size)
elif model_type == 'rnn':
    model = RNN(batch_size, maxseq_length, embedding_size)
elif model_type == 'lstm':
    model = LSTM(batch_size, maxseq_length, embedding_size, keep_prob)
elif model_type == 'cnn':
    model = CNN(batch_size, maxseq_length, embedding_size)

with tf.Session() as sess:
    total_batch = int(len(test_X) / batch_size)
    save_path = './saved/' + model_type + '/model-' + str(test_epoch)

    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
Esempio n. 6
0
from lib.handle2 import Handle2

from logistic import Logistic

l = Logistic(params=(4), size=1)
h = Handle2(l)
lam1 = h.lyapunov_exponent_1()
print(lam1)
h.graph()
Esempio n. 7
0
    ARGS = PARSER.parse_args()

    PITCHES = pd.read_csv(ARGS.data)

    if ARGS.sample:
        PITCHES = PITCHES.sample(ARGS.sample, random_state=ARGS.seed)

    if ARGS.perf:
        assert ARGS.predict == 'ptype'
        ITRS = 1
        TIMES = {}
        LIKELIHOODS = {}
        for i in range(0, ITRS):
            TRAIN, TEST = train_test_split(PITCHES, random_state=i)
            for m in [SimpleCategorical(), CategoricalNeuralNetwork(), Logistic()]:
                cname = m.__class__.__name__
                if i == 0:
                    TIMES[cname] = np.empty(ITRS)
                    LIKELIHOODS[cname] = np.empty(ITRS)
                start = time.time()
                m.fit(TRAIN)
                end = time.time()
                m.log_likelihood(TEST)
                TIMES[cname][i] = end - start
                LIKELIHOODS[cname][i] = m.log_likelihood(TEST)
        for m in TIMES:
            print("%s training time: %f +/- %f, Log Likelihood: %f +/- %f" %
                  (m, TIMES[m].mean(), TIMES[m].var() * 2,
                   LIKELIHOODS[m].mean(), LIKELIHOODS[m].var() * 2))
        print(TIMES)
Esempio n. 8
0

def draw_line(w, col):
    points_x = np.linspace(-1, 7, 300)
    func = np.poly1d([-w[0] / w[1], -w[2] / w[1]])
    points_y = func(points_x)
    plt.plot(points_x, points_y, color=col)


if __name__ == "__main__":
    # 生成数据
    gen = Generator()
    x, y = gen.data_generator()
    x = np.hstack((x, [[1] for i in range(x.shape[0])]))

    logist = Logistic(x.T, y)  # 不带正则项的逻辑回归
    logist_regu = Logistic(x.T, y, lamb=0.003)  # 加入正则项的逻辑回归

    # 梯度下降法 不带正则项
    w = logist.gradient_descent()
    draw_line(w, 'black')

    # 梯度下降法 带正则项的
    w = logist_regu.gradient_descent()
    draw_line(w, 'blue')

    # 牛顿法 不带正则项
    w = logist.newton()
    draw_line(w, 'red')

    # 牛顿法 带正则项
Esempio n. 9
0
def program_parser():
    parser = argparse.ArgumentParser(description='Assignment 2')

    parser.add_argument('--algorithm',
                        choices=["least_square", "perceptron", "logistic"],
                        help='the algorithms')

    parser.add_argument('--n',
                        choices=["run", "batch", "lambda", "alpha", "check"],
                        default="run",
                        help='the algorithms of logistic')

    args = parser.parse_args()

    linear_dataset = get_linear_seperatable_2d_2c_dataset()
    lsm = LSM(linear_dataset)
    perceptron = Perceptron(linear_dataset)

    algos = {"least_square": lsm.run, "perceptron": perceptron.run}

    if args.algorithm == "logistic":
        np.random.seed(2333)
        dataset_train, dataset_test = get_text_classification_datasets()
        logistic = Logistic(dataset_train, dataset_test)
        if args.n == "run":
            logistic.show()
        elif args.n == "check":
            logistic.check_gradient()
        elif args.n == "batch":
            logistic.show_batch_diff()
        elif args.n == "lambda":
            logistic.show_lamb_diff()
        elif args.n == "alpha":
            logistic.show_alpha_diff()
    elif args.algorithm in algos.keys():
        algos[args.algorithm]()
    else:
        parser.print_help()
Esempio n. 10
0
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from logistic import Logistic


if __name__ == '__main__':
    X, y = make_classification(5000, flip_y=0.5)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    
    clf = Logistic(X.shape[1], 2)
    clf.fit(X_train, y_train, val_data=(X_test, y_test))
    y_pred = clf.predict(X_test)
    final_acc = (y_pred == y_test).mean()
    print("logistic (tensorflow): %.4f" % final_acc)

    clf = LogisticRegression()
    y_pred = clf.fit(X_train, y_train).predict(X_test)
    print("logistic (sklearn):", (y_pred == y_test).mean())
Esempio n. 11
0
def builder(c, vectors, classes):
    return Logistic(c, vectors, classes)
Esempio n. 12
0
maxseq_length = 100
embedding_size = 300
training_epochs = 10
batch_size = 32
learning_rate = 0.001
keep_prob = 0.7

train_data = read_data('data/train.txt')
train_data = np.array(train_data)
train_X = train_data[:, 0]
train_Y = train_data[:, [-1]]

word2vec = word2vec_load()

if model_type == 'logistic':
    model = Logistic(maxseq_length, embedding_size, learning_rate)
elif model_type == 'dnn':
    model = DNN(maxseq_length, embedding_size, learning_rate)
elif model_type == 'rnn':
    model = RNN(batch_size, maxseq_length, embedding_size, learning_rate)
elif model_type == 'lstm':
    model = LSTM(batch_size, maxseq_length, embedding_size, keep_prob,
                 learning_rate)
elif model_type == 'cnn':
    model = CNN(batch_size, maxseq_length, embedding_size, learning_rate)

with tf.Session() as sess:
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter('./logs/' + model_type)
    writer.add_graph(sess.graph)
Esempio n. 13
0
    ax[0].set_title("Cross Entropy")
    ax[0].set_xlabel("Iteration")

    ax[1].plot(accuracy, marker=".")
    ax[1].set_title("Accuracy")
    ax[1].set_xlabel("Iteration")

    plt.tight_layout()
    plt.show()


if __name__ == "__main__":
    x, y = load_data()
    lr = Logistic(
        size=x.shape[1],
        # optimizer=GradientDescent(
        #     learning_rate=0.1
        # ),
        optimizer=Momentum(
            learning_rate=0.5,
            beta=0.9
        ),
        iteration=100
    )
    ce, ac = lr.train(
        x, y
    )

    plot(ce, ac)