def logistic_skin(): print("\nLogistic Regression for Skin Cancer data:\n") x_train, x_test, y_train, y_test = get_data_skin() logistic = Logistic(x_train, y_train) y_pred = logistic.predict(x_train) print("\nTraining Classification accuracy: ") print(100 - 100 * np.sum(np.abs(y_pred - y_train)) / y_pred.shape[0]) confusionMatrix(y_train, y_pred) y_pred = logistic.predict(x_test) print("\nTesting Classification accuracy: ") print(100 - 100 * np.sum(np.abs(y_pred - y_test)) / y_pred.shape[0]) confusionMatrix(y_test, y_pred) print("ROC Curve: ") plot_roc_curve(y_test, y_pred)
def run_all(gpu=False, double_precision=False): print("\nLasso.") print "Solve time:\t{:.2e} seconds\n".format( Lasso(200, 2000, gpu=gpu, double_precision=double_precision)) print("\nLasso Path.") print "Solve time:\t{:.2e} seconds\n".format( LassoPath(200, 1000, gpu=gpu, double_precision=double_precision)) print("\nLogistic Regression.") print "Solve time:\t{:.2e} seconds\n".format( Logistic(1000, 100, gpu=gpu, double_precision=double_precision)) print("\nLinear Program in Equality Form.") print "Solve time:\t{:.2e} seconds\n".format( LpEq(1000, 200, gpu=gpu, double_precision=double_precision)) print("\nLinear Program in Inequality Form.") print "Solve time:\t{:.2e} seconds\n".format( LpIneq(1000, 200, gpu=gpu, double_precision=double_precision)) print("\nNon-Negative Least Squares.") print "Solve time:\t{:.2e} seconds\n".format( NonNegL2(1000, 200, gpu=gpu, double_precision=double_precision)) print("\nSupport Vector Machine.") print "Solve time:\t{:.2e} seconds\n".format( Svm(1000, 200, gpu=gpu, double_precision=double_precision))
def __init__(self, m, n, k, eta, lambd): ''' :param m: Number of fields :param n: Number of features :param k: Number of latent factors :param eta: learning rate :param lambd: regularization coefficient ''' self.m = m self.n = n self.k = k #超参数 self.eta = eta self.lambd = lambd #初始化三维权重矩阵w~U(0, 1/sqrt(k)) self.w = np.random.rand(n, m, k) / math.sqrt(k) #初始化累积梯度平方和为,Adagrad时要用到,防止除0异常 self.G = np.ones(shape=(n, m, k), dtype=np.float64) self.log = Logistic()
class FFM(object): def __init__(self, m, n, k, eta, lambd): ''' :param m: Number of fields :param n: Number of features :param k: Number of latent factors :param eta: learning rate :param lambd: regularization coefficient ''' self.m = m self.n = n self.k = k #超参数 self.eta = eta self.lambd = lambd #初始化三维权重矩阵w~U(0, 1/sqrt(k)) self.w = np.random.rand(n, m, k) / math.sqrt(k) #初始化累积梯度平方和为,Adagrad时要用到,防止除0异常 self.G = np.ones(shape=(n, m, k), dtype=np.float64) self.log = Logistic() def phi(self, node_list): ''' 特征组合式的线性加权求和 :param node_list: 用链表存储x中的非0值 :return ''' z = 0.0 for a in xrange(len(node_list)): node1 = node_list[a] j1 = node1.j f1 = node1.f v1 = node1.v for b in xrange(a + 1, len(node_list)): node2 = node_list[b] j2 = node2.j f2 = node2.f v2 = node2.v w1 = self.w[j1, f2] w2 = self.w[j2, f1] z += np.dot(w1, w2) * v1 * v2 return z def predict(self, node_list): ''' 输入x,预测y的值 :param node_list: 用链表存储x中的非0值 :return ''' z = self.phi(node_list) y = self.log.decide_by_tanh(z) return y def sgd(self, node_list, y): ''' 根据一个样本来更新模型参数 :param node_list:用链表存储x中的非0值 :param y:正样本1,负样本:-1 :return ''' kappa = -y / (1 + math.exp(y * self.phi(node_list))) for a in xrange(len(node_list)): node1 = node_list[a] j1 = node1.j f1 = node1.f v1 = node1.v for b in xrange(a + 1, len(node_list)): node2 = node_list[b] j2 = node2.j f2 = node2.f v2 = node2.v c = kappa * v1 * v2 #self.w[j1, f2]和self.w[j2, f1]是向量,导致g_j1_f2和g_j2_f1也是向量 g_j1_f2 = self.lambd * self.w[j1, f2] + c * self.w[j2, f1] g_j2_f1 = self.lambd * self.w[j2, f1] + c * self.w[j1, f2] #计算各个维度上的梯度累积平方和 self.G[j1, f2] += g_j1_f2**2 #所有G肯定是大于0的正数,因为初始化时G都为1 self.G[j2, f1] += g_j2_f1**2 #AdaGrad self.w[j1, f2] -= self.eta / np.sqrt( self.G[j1, f2]) * g_j1_f2 #sqrt(G)作为分母,所以G必须是大于0的正数 self.w[j2, f1] -= self.eta / np.sqrt(self.G[ j2, f1]) * g_j2_f1 #math.sqrt()只能接收一个数字作为参数,而numpy.sqrt()可以接收 #一个array作为参数,表示对array中的每个元素分别开方 def train(self, sample_generator, max_echo, max_r2): ''' 根据一堆样本训练模型 :param sample_generator:样本生成器,每次yield(node_list, y),node_list中存储的是x的非0值。通常x要事先做好归一化,即模长为1, 这样精度会略微高一点 :param max_echo:最大迭代次数 :param max_r2:拟合系数r2达到阈值时即可终止学习 :return ''' for itr in xrange(max_echo): print("echo", itr) y_sum = 0.0 y_square_sum = 0.0 err_square_sum = 0.0 #误差平方和 population = 0 #样本总数 for node_lsit, y in sample_generator: y = 0.0 if y == -1 else y #真实的y取值为{-1, 1},而预测的y位于(0,1),计算拟合效果时需要进行统一 self.sgd(node_list, y) y_hat = self.predict(node_list) y_sum += y y_square_sum += y**2 err_square_sum += (y - y_hat)**2 population += 1 var_y = y_square_sum - y_sum * y_sum / population #y的方差 r2 = 1 - err_square_sum / var_y print("r2=", r2) if r2 > max_r2: #r2值越大说明拟合得越好 print("r2 have reach", r2) break def save_model(self, outfile): ''' 序列化模型 :param outfile :return ''' np.save(outfile, self.w) def load_model(self, infile): ''' 加载模型 :param infile :return ''' self.w = np.load(infile)
model_type = 'dnn' test_epoch = 9 maxseq_length = 100 embedding_size = 300 batch_size = 32 keep_prob = 1.0 test_data = read_data('data/test.txt') test_data = np.array(test_data) test_X = test_data[:,0] test_Y = test_data[:,[-1]] word2vec = word2vec_load() if model_type == 'logistic': model = Logistic(maxseq_length, embedding_size) elif model_type == 'dnn': model = DNN(maxseq_length, embedding_size) elif model_type == 'rnn': model = RNN(batch_size, maxseq_length, embedding_size) elif model_type == 'lstm': model = LSTM(batch_size, maxseq_length, embedding_size, keep_prob) elif model_type == 'cnn': model = CNN(batch_size, maxseq_length, embedding_size) with tf.Session() as sess: total_batch = int(len(test_X) / batch_size) save_path = './saved/' + model_type + '/model-' + str(test_epoch) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver()
from lib.handle2 import Handle2 from logistic import Logistic l = Logistic(params=(4), size=1) h = Handle2(l) lam1 = h.lyapunov_exponent_1() print(lam1) h.graph()
ARGS = PARSER.parse_args() PITCHES = pd.read_csv(ARGS.data) if ARGS.sample: PITCHES = PITCHES.sample(ARGS.sample, random_state=ARGS.seed) if ARGS.perf: assert ARGS.predict == 'ptype' ITRS = 1 TIMES = {} LIKELIHOODS = {} for i in range(0, ITRS): TRAIN, TEST = train_test_split(PITCHES, random_state=i) for m in [SimpleCategorical(), CategoricalNeuralNetwork(), Logistic()]: cname = m.__class__.__name__ if i == 0: TIMES[cname] = np.empty(ITRS) LIKELIHOODS[cname] = np.empty(ITRS) start = time.time() m.fit(TRAIN) end = time.time() m.log_likelihood(TEST) TIMES[cname][i] = end - start LIKELIHOODS[cname][i] = m.log_likelihood(TEST) for m in TIMES: print("%s training time: %f +/- %f, Log Likelihood: %f +/- %f" % (m, TIMES[m].mean(), TIMES[m].var() * 2, LIKELIHOODS[m].mean(), LIKELIHOODS[m].var() * 2)) print(TIMES)
def draw_line(w, col): points_x = np.linspace(-1, 7, 300) func = np.poly1d([-w[0] / w[1], -w[2] / w[1]]) points_y = func(points_x) plt.plot(points_x, points_y, color=col) if __name__ == "__main__": # 生成数据 gen = Generator() x, y = gen.data_generator() x = np.hstack((x, [[1] for i in range(x.shape[0])])) logist = Logistic(x.T, y) # 不带正则项的逻辑回归 logist_regu = Logistic(x.T, y, lamb=0.003) # 加入正则项的逻辑回归 # 梯度下降法 不带正则项 w = logist.gradient_descent() draw_line(w, 'black') # 梯度下降法 带正则项的 w = logist_regu.gradient_descent() draw_line(w, 'blue') # 牛顿法 不带正则项 w = logist.newton() draw_line(w, 'red') # 牛顿法 带正则项
def program_parser(): parser = argparse.ArgumentParser(description='Assignment 2') parser.add_argument('--algorithm', choices=["least_square", "perceptron", "logistic"], help='the algorithms') parser.add_argument('--n', choices=["run", "batch", "lambda", "alpha", "check"], default="run", help='the algorithms of logistic') args = parser.parse_args() linear_dataset = get_linear_seperatable_2d_2c_dataset() lsm = LSM(linear_dataset) perceptron = Perceptron(linear_dataset) algos = {"least_square": lsm.run, "perceptron": perceptron.run} if args.algorithm == "logistic": np.random.seed(2333) dataset_train, dataset_test = get_text_classification_datasets() logistic = Logistic(dataset_train, dataset_test) if args.n == "run": logistic.show() elif args.n == "check": logistic.check_gradient() elif args.n == "batch": logistic.show_batch_diff() elif args.n == "lambda": logistic.show_lamb_diff() elif args.n == "alpha": logistic.show_alpha_diff() elif args.algorithm in algos.keys(): algos[args.algorithm]() else: parser.print_help()
from sklearn.model_selection import train_test_split from sklearn.datasets import make_classification from sklearn.linear_model import LogisticRegression from logistic import Logistic if __name__ == '__main__': X, y = make_classification(5000, flip_y=0.5) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) clf = Logistic(X.shape[1], 2) clf.fit(X_train, y_train, val_data=(X_test, y_test)) y_pred = clf.predict(X_test) final_acc = (y_pred == y_test).mean() print("logistic (tensorflow): %.4f" % final_acc) clf = LogisticRegression() y_pred = clf.fit(X_train, y_train).predict(X_test) print("logistic (sklearn):", (y_pred == y_test).mean())
def builder(c, vectors, classes): return Logistic(c, vectors, classes)
maxseq_length = 100 embedding_size = 300 training_epochs = 10 batch_size = 32 learning_rate = 0.001 keep_prob = 0.7 train_data = read_data('data/train.txt') train_data = np.array(train_data) train_X = train_data[:, 0] train_Y = train_data[:, [-1]] word2vec = word2vec_load() if model_type == 'logistic': model = Logistic(maxseq_length, embedding_size, learning_rate) elif model_type == 'dnn': model = DNN(maxseq_length, embedding_size, learning_rate) elif model_type == 'rnn': model = RNN(batch_size, maxseq_length, embedding_size, learning_rate) elif model_type == 'lstm': model = LSTM(batch_size, maxseq_length, embedding_size, keep_prob, learning_rate) elif model_type == 'cnn': model = CNN(batch_size, maxseq_length, embedding_size, learning_rate) with tf.Session() as sess: merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter('./logs/' + model_type) writer.add_graph(sess.graph)
ax[0].set_title("Cross Entropy") ax[0].set_xlabel("Iteration") ax[1].plot(accuracy, marker=".") ax[1].set_title("Accuracy") ax[1].set_xlabel("Iteration") plt.tight_layout() plt.show() if __name__ == "__main__": x, y = load_data() lr = Logistic( size=x.shape[1], # optimizer=GradientDescent( # learning_rate=0.1 # ), optimizer=Momentum( learning_rate=0.5, beta=0.9 ), iteration=100 ) ce, ac = lr.train( x, y ) plot(ce, ac)