def testSelectedFeatures2(): LRModel = lr.LogisticRegression(0.001, 500) data1 = genRWNormalized() square = np.copy(data1) for i in range(len(data1[0]) - 1): colToAdd = np.power(data1[:, i], 2) square = np.insert(square, -1, colToAdd, axis=1) square = np.insert(square, -1, np.multiply(data1[:, 0], data1[:, 8]), axis=1) square = np.insert(square, -1, np.multiply(data1[:, 0], data1[:, 7]), axis=1) square = np.insert(square, -1, np.multiply(data1[:, 0], data1[:, 2]), axis=1) square = np.insert(square, -1, np.multiply(data1[:, 5], data1[:, 6]), axis=1) LRModel = lr.LogisticRegression(0.001, 100) featureSelection(LRModel, square, 3)
def testSelectedFeatures1(): print("start testAdditionlSquaredFeatures()") LRModel = lr.LogisticRegression(0.001, 500) LDAModel = LDA.LDA() data1 = genRWNormalized() data2 = np.append(data1[:, [10, 1, 9, 6]], np.array([data1[:, -1]]).T, axis=1) data3 = addSquareFeature(data1, [10, 1, 9, 6]) a1 = 0 b1 = 0 a2 = 0 b2 = 0 a3 = 0 b3 = 0 for i in range(3): np.random.shuffle(data1) np.random.shuffle(data2) np.random.shuffle(data3) a1 += LRKFoldValidation(LRModel, data1, 5) b1 += LDAKFoldValidation(LDAModel, data2, 5) a2 += LRKFoldValidation(LRModel, data2, 5) b2 += LDAKFoldValidation(LDAModel, data2, 5) a3 += LRKFoldValidation(LRModel, data3, 5) b3 += LDAKFoldValidation(LDAModel, data3, 5) print("Accuracy for lr in rw is {}".format(a1 / 3)) print("Accuracy for LDA in rw is {}".format(b1 / 3)) print("Accuracy for lr in rw is {}".format(a2 / 3)) print("Accuracy for LDA in rw is {}".format(b2 / 3)) print("Accuracy for lr in rw is {}".format(a3 / 3)) print("Accuracy for LDA in rw is {}".format(b3 / 3))
def __init__(self, rng, input, n_in, n_hidden, n_out): # 隠れ層 self.hiddenLayer = HiddenLayer(rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=T.tanh) # 出力層 self.logRegressionLayer = logistic_regression.LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) # L1正則化項 self.L1 = abs(self.hiddenLayer.W).sum() + abs( self.logRegressionLayer.W).sum() # L2正則化 self.L2_sqr = ((self.hiddenLayer.W)**2).sum() + ( (self.logRegressionLayer.W)**2).sum() # loss(出力層にのみ依存するのでロジスティック回帰と同じで良い) self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # 誤差計算シンボル self.errors = self.logRegressionLayer.errors # パラメータ self.params = self.hiddenLayer.params + self.logRegressionLayer.params # self tracking input self.input = input
def testAlphaAndEpochs(): rwClear = genRWClear() # learning rate: 0.0001 - 1, Iteration: 50 - 100000 bestLearn = 0 bestIte = 0 learn = [0.001, 0.01, 0.1, 1] ite = [100, 500, 1000, 5000] max_acc = 0 for i in learn: for j in ite: LRModel = lr.LogisticRegression(i, j) ave = 0.0 for k in range(3): ac = LRKFoldValidation(LRModel, rwClear, 5) print("per k fold:", ac) ave += ac ave = ave / 3.0 print("ave:", ave) if ave > max_acc: max_acc = ave bestLearn = i bestIte = j print(ave, " ", i, " ", j) print(bestLearn) print(bestIte) print(max_acc)
def test_init(self): lr_model = lr.LogisticRegression( max_iter=10, tol=1e-6, learning_rate=1e-3, random_state=2 ) assert lr_model.max_iter == 10 assert lr_model.tol == 1e-6 assert lr_model.learning_rate == 1e-3 assert lr_model.random_state == 2
def test_predictions(): X = np.array(dataset)[:, :-1] y = np.array(dataset)[:, -1] lr = logistic_regression.LogisticRegression(learning_rate=0.01, num_iterations=10**6, verbose=10000) lr.coef_ = coef # inject model coefficient assert all(lr.predict(X) == np.array(y, dtype=bool))
def testLRWithWine(a, epochs): data = genDataWOHeader(file_path1) qualityToCategory(data) np.random.shuffle(data) #data1= removeOutLiersByND(data2) testSet, trainSet = seperateTestSet(data) #trainSet=np.insert(trainSet, trainSet.shape[1]-1,np.ones((trainSet.shape[0],1),dtype=float),axis=1) aModel = lr.LogisticRegression(a, epochs) return LRKFoldValidation(aModel, data, 5)
def MyLogRegTester(data, num_iters=10): X, y = [], [] filename = "predictions.txt" for line in data: line1 = list(map(lambda x: float(x), line[1:len(line) - 1])) X.append(line1) y.append(line[len(line) - 1]) X = np.asarray(X) X = LogReg.min_max_normalization(X) data = [] for i in range(len(X)): dat = [] dat.append(X[i]) dat.append(y[i]) data.append(dat) all_acc = 0 print("My Logistic Regression implementation : ") file_exception = False try: f = open(filename, 'w') f.write("#y_predicted,y_actual\n") f.close() except Exception as e: print("Unable to do file operations. Error : ", e) file_exception = True for i in range(num_iters): random.shuffle(data) X, y = [], [] for dat in data: X.append(dat[0]) y.append(dat[1]) X = np.asarray(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) classifier = LogReg.LogisticRegression(X_train, y_train) classifier.train() #classifier.plot_cost_function() if not file_exception: acc = classifier.calculate_accuracy(X_test, y_test, write_predictions_to_file=True, filename=filename) else: acc = classifier.calculate_accuracy(X_test, y_test) print("Accuracy of trial ", i + 1, " : ", acc) all_acc += acc print( "Mean Accuracy of 10 trials of My Logistic Regression implementation : ", all_acc / 10)
sign = int(0.5*sign+0.5) # print(sign, Category[i]) if sign != Category[i][0]: count+=1 return count/(2*N) N, d, Ntime, dl = 1000, 10, 100, 0.1 initialState = LinearInseparableData(N, d, Ntime, dl) initialState.run() xList = initialState.xList_4d # [x, y, x2, y2] category = initialState.category colorLine = initialState.Color cycleMax, stepSize = 50000, 0.05 LRsolution = logistic_regression.LogisticRegression(xList, category, cycleMax, stepSize) theta_solu = LRsolution.run() xBound, yBound = boundaryLine(theta_solu) plotResult(xList)
def test_has_converged(self, coef, X): lr_model = lr.LogisticRegression() p = lr.predict_proba(coef, X) assert lr_model._has_converged(coef, X, p) assert not lr_model._has_converged(np.array([1, 1000]), X, p)
def test_gradient_descent_computes_gradient(self, X, y): with patch_with_mock(lr, "logistic_gradient"): lr_model = lr.LogisticRegression(max_iter=5) lr_model.fit(X, y) assert lr.logistic_gradient.call_count >= 5
d = feature_keeper(d, ['pay_1', 'limit_bal', 'any_late_pay'], 'default') X, y = pipeline.get_X_y(d) return d, X, y d_train, X_train, y_train = process_data(d_train) d_valid, X_valid, y_valid = process_data(d_valid) d_test, X_test, y_test = process_data(d_test) ## train model scaler = pipeline.MinMaxScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) lr = logistic_regression.LogisticRegression(learning_rate=0.1, num_iterations=10**3, verbose=100) lr.fit(X_train, y_train) # plot relative feature importance pd.Series( lr.coef_[1:] / np.abs(lr.coef_[1:]).sum(), index=d_train.columns[:-1]).plot.bar(title='Relative feature importance') pd.Series(lr.coef_[1:] / np.abs(lr.coef_[1:]).sum(), index=d_train.columns[:-1]).abs().sort_values( ascending=False).plot.bar(title='Relative feature importance') ## validate model X_valid_scaled = scaler.transform(X_valid) pm = pipeline.PerformanceMetric(lr.predict(X_valid_scaled), y_valid) print('f1:', pm.f1_score)
node_id = comm.Get_rank() nb_nodes = comm.size log = get_logger(node_id) # Initialize the graph (grid with nb_nodes nodes) grid_graph = graph.Grid(nb_nodes, args.seed, tau=args.tau) # Initialize the synthetic dataset dataset = dataset.ClassificationDataset(seed=args.seed, nb_points=args.nb_points_per_node * nb_nodes, d=args.d) # Initialize the model model = logistic_regression.LogisticRegression(dataset, nb_nodes * args.c) # Initialize the algorithm algo = adfs.ADFS(comm=comm, seed=args.seed, graph=grid_graph, model=model, log=log) # Run the algorithm algo.run(args.n_steps) # Plot the error if node_id == 0: min_error = min(algo.error) plot(algo.time, algo.error, min_error)
import numpy as np import logistic_regression as lg train_data = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 0]], dtype=np.float16) test_data = np.array([]) model = lg.LogisticRegression(lr=0.01, datas=train_data, epoch=10000, err=0.01) init_w = np.copy(model.w) model.train() lg.draw(model=model, init_w=init_w, train_data=train_data, test_data=test_data)
import load_mnist import logistic_regression as lr from theano import tensor as T mnist_file = 'mnist.pkl.gz' y = T.ivector('y') x = T.matrix('x') classifier = lr.LogisticRegression(input=x, n_in=28 * 28, n_out=10) cost = classifier.negative_log_likelihood(y) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={
for i in learn: for j in ite: LRModel = lr.LogisticRegression(i, j) ave = 0.0 for k in range(3): ac = LRKFoldValidation(LRModel, rwClear, 5) print("per k fold:", ac) ave += ac ave = ave / 3.0 print("ave:", ave) if ave > max_acc: max_acc = ave bestLearn = i bestIte = j print(ave, " ", i, " ", j) print(bestLearn) print(bestIte) print(max_acc) LRModel = lr.LogisticRegression(0.001, 500) LDAModel = LDA.LDA() rwNormalized = genRWNormalized() cancerNormalized = genCancerNormalized() rwNormalized = genRWNormalized() print(LRKFoldValidation(LRModel, cancerNormalized, 5)) print(LDAKFoldValidation(LDAModel, cancerNormalized, 5)) print(LRKFoldValidation(LRModel, rwNormalized, 5)) print(LDAKFoldValidation(LDAModel, rwNormalized, 5))
def testDataPreprocess(): rwData = genRW() cancerData = genCancer() rwNormalized = genRWNormalized() cancerNormalized = genCancerNormalized() rwRemovedOL = genRWRemovedOL() cancerRemovedOL = genCancerRemovedOL() rwClear = genRWClear() cancerClear = genCancerClear() LRModel = lr.LogisticRegression(0.001, 500) LDAModel = LDA.LDA() a = 0 b = 0 c = 0 d = 0 for i in range(3): np.random.shuffle(rwData) np.random.shuffle(cancerData) a += LRKFoldValidation(LRModel, rwData, 5) b += LDAKFoldValidation(LDAModel, rwData, 5) c += LRKFoldValidation(LRModel, cancerData, 5) d += LDAKFoldValidation(LDAModel, cancerData, 5) print(a / 3) print(b / 3) print(c / 3) print(d / 3) a2 = 0 b2 = 0 c2 = 0 d2 = 0 for i in range(3): np.random.shuffle(rwNormalized) np.random.shuffle(cancerNormalized) a2 += LRKFoldValidation(LRModel, rwNormalized, 5) b2 += LDAKFoldValidation(LDAModel, rwNormalized, 5) c2 += LRKFoldValidation(LRModel, cancerNormalized, 5) d2 += LDAKFoldValidation(LDAModel, cancerNormalized, 5) print(a2 / 3) print(b2 / 3) print(c2 / 3) print(d2 / 3) a3 = 0 b3 = 0 c3 = 0 d3 = 0 for i in range(3): np.random.shuffle(rwClear) np.random.shuffle(cancerClear) a3 += LRKFoldValidation(LRModel, rwClear, 5) b3 += LDAKFoldValidation(LDAModel, rwClear, 5) c3 += LRKFoldValidation(LRModel, cancerClear, 5) d3 += LDAKFoldValidation(LDAModel, cancerClear, 5) print(a3 / 3) print(b3 / 3) print(c3 / 3) print(d3 / 3) a4 = 0 b4 = 0 c4 = 0 d4 = 0 for i in range(3): np.random.shuffle(rwRemovedOL) np.random.shuffle(cancerRemovedOL) a4 += LRKFoldValidation(LRModel, rwRemovedOL, 5) b4 += LDAKFoldValidation(LDAModel, rwRemovedOL, 5) c4 += LRKFoldValidation(LRModel, cancerRemovedOL, 5) d4 += LDAKFoldValidation(LDAModel, cancerRemovedOL, 5) print(a4 / 3) print(b4 / 3) print(c4 / 3) print(d4 / 3)
def featureSelection(data, isLR): selectedFeatureNum = [] selectedFeatureArray = -1 bestAccuracyAll = 0 y_2d = np.array([data[:, -1]]).T #print(y_2d) for i in range(data.shape[1] - 1): featureToAdd = -1 bestAccuracy = 0 column_2d = -1 print("select feature{}".format(i)) if i == 0: for j in range(data.shape[1] - 1): if (j in selectedFeatureNum) == False: column_2d = np.array([data[:, j]]).T nums = selectedFeatureNum + [j] # ------5 should be changed -- #print(np.concatenate((column_2d,y_2d), axis = 1)) if isLR: model = lr.LogisticRegression(0.001, 500) accuracy = LRKFoldValidation( model, np.concatenate((column_2d, y_2d), axis=1), 5) else: model = LDA.LDA() accuracy = LDAKFoldValidation( model, np.concatenate((column_2d, y_2d), axis=1), 5) print("Using feature(s){} accuracy is{}".format( nums, accuracy)) if accuracy >= bestAccuracy: bestAccuracy = accuracy featureToAdd = j selectedFeatureArray = column_2d bestAccuracyAll = bestAccuracy selectedFeatureNum.append(featureToAdd) continue else: #try add feature from the rest of set for j in range(data.shape[1] - 1): if (j in selectedFeatureNum) == False: column_2d = np.array([data[:, j]]).T nums = selectedFeatureNum + [j] # ------5 should be changed --- #print(np.concatenate((selectedFeatureArray, column_2d , y_2d), axis = 1)) if isLR: model = lr.lr.LogisticRegression(0.001, 500) accuracy = LRKFoldValidation( model, np.concatenate( (selectedFeatureArray, column_2d, y_2d), axis=1), 5) else: model = LDA.LDA accuracy = LDAKFoldValidation( model, np.concatenate( (selectedFeatureArray, column_2d, y_2d), axis=1), 5) print("Using feature(s){} accuracy is{}".format( nums, accuracy)) if accuracy >= bestAccuracy: bestAccuracy = accuracy featureToAdd = j #additional feature cannot improve performance by 1% if bestAccuracyAll >= bestAccuracy: print("maxima reached") break else: #add addtional feature bestAccuracyAll = bestAccuracy selectedFeatureNum.append(featureToAdd) selectedFeatureArray = np.concatenate( (selectedFeatureArray, np.array([data[:, featureToAdd]]).T), axis=1) print( "feature selection ended, best performing features are {}, the accuracy is {}" .format(selectedFeatureNum, bestAccuracyAll)) return selectedFeatureNum, selectedFeatureArray
# A simple example import logistic_regression as lr if __name__ == '__main__': # Change LABEL_NAME_0 ans LABEL_NAME_1 into the value of label of your dataset, like'Iris-setosa' or 'Iris-versicolor'. logistic_regression = lr.LogisticRegression('Iris-setosa', 'Iris-versicolor') # Change DATASET_PATH into you path of dataset 'iris.data'. # After this step, the data will be loaded and initialized. logistic_regression.set_data_from_file('iris.data') # The calculate method is the implement of logistic regression with newton method. logistic_regression.calculate() # By this step, it can generate a simple diagram of the labels and vectors of input data and it can draw a line which represent the result of logistic regression. logistic_regression.show()