def test_lr_newton_method(): X, y = read_data() lr_clf = LogisticRegression(solver="newton_method") lr_clf.fit(X, y) # test intercept intercept = lr_clf.intercept_ assert (abs(intercept - -2.618) < 0.01) # test coefficient coef = lr_clf.coef_ assert (abs(coef[0] - 0.76) < 0.01) assert (abs(coef[1] - 1.17) < 0.01)
def test_lr_stochastic_gradient_descent(): X, y = read_data() lr_clf = LogisticRegression(learning_rate=0.001, max_iter=10000, solver="stochastic_gradient_descent") lr_clf.fit(X, y) # test intercept intercept = lr_clf.intercept_ assert (abs(intercept - -2.618) < 0.01) # test coefficient coef = lr_clf.coef_ assert (abs(coef[0] - 0.76) < 0.01) assert (abs(coef[1] - 1.17) < 0.01)
import numpy as np import matplotlib.pyplot as plt from logistic import LogisticRegression # read data X = np.loadtxt('logistic_x.txt') y = np.loadtxt('logistic_y.txt') # build model lr = LogisticRegression() lr.fit(X, y) y_ = lr.predict(X) # create a mesh to plot in x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 h = 0.1 # step_size xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) data = np.vstack((xx.ravel(), yy.ravel())).T labels = lr.predict(data) # plot fig, ax = plt.subplots() ax.scatter(data[:, 0], data[:, 1], c=np.where(labels == 1, 'green', 'red'), alpha=0.01) plt.title('Decision Boundary of Logistic Regression') ax.scatter(X[y == 1, 0], X[y == 1, 1], c='green',
def main(): bodies, stances, index, body_IDs, stance_IDs, labels = generateSentences( 'train_bodies.csv', 'train_stances.csv') encoder = LabelEncoder() encoder.fit(label_headers) encoded_labels = encoder.transform(labels) combined = matchStance(bodies, stances, body_IDs, stance_IDs) sorted_bodies = linkBodies(body_IDs, stance_IDs, bodies) training_bodies = sorted_bodies[0:index + 1] training_stances = stances[0:index + 1] training_labels = encoded_labels[0:index + 1] cv, tfidf = vectorise(training_bodies, training_stances, training_labels) # b_cv = cv.transform(training_bodies) # s_cv = cv.transform(training_stances) # b_tf = tfidf.transform(b_cv) # s_tf = tfidf.transform(s_cv) # cosineSim(training_bodies,training_stances,cv,training_labels,'plots/CS-vect.png') # cosineSim(b_tf,s_tf,tfidf,training_labels,'plots/CS-tfidf.png') # # kldivergence(b_cv.toarray(),s_cv.toarray(),training_labels,'plots/KL-vect.png') # kldivergence(b_tf.toarray(),s_tf.toarray(),training_labels,'plots/KL-tfidf.png') # valid_bodies = sorted_bodies[index+1:len(sorted_bodies)] # valid_stances = stances[index+1:len(stances)] valid_labels = encoded_labels[index + 1:len(encoded_labels)] valid_b_cv = cv.transform(sorted_bodies) valid_s_cv = cv.transform(stances) valid_b_tf = list(tfidf.transform(valid_b_cv).toarray()) valid_s_tf = list(tfidf.transform(valid_s_cv).toarray()) dists = calcDistances(valid_b_tf, valid_s_tf) distanceShow(dists[0:index + 1], training_labels) linear_dists = [ dists[i][len(dists[i]) - 5:len(dists[i])] for i in range(0, len(dists)) ] test_b, test_s, test_index, test_b_ids, test_s_ids, test_labels = generateSentences( 'competition_test_bodies.csv', 'competition_test_stances.csv') encoded_test = encoder.transform(test_labels) test_sorted_b = linkBodies(test_b_ids, test_s_ids, test_b) test_b_tf = list(tfidf.transform(cv.transform(test_sorted_b)).toarray()) test_s_tf = list(tfidf.transform(cv.transform(test_s)).toarray()) test_dists = calcDistances(test_b_tf, test_s_tf) test_linear_dists = [ test_dists[i][len(test_dists[i]) - 5:len(test_dists[i])] for i in range(0, len(test_dists)) ] lrs = [0.0005, 0.001, 0.005, 0.01, 0.05, 0.1] for i in range(0, len(lrs)): logistic = LogisticRegression(lr=lrs[i], steps=10000) logistic.fit(input=dists[0:index + 1], labels=training_labels) y_pred = logistic.predict(test_dists) linear = LinearRegression(lr=lrs[i], steps=50) linear.fit(input=linear_dists[0:index + 1], labels=training_labels) y_pred2 = linear.predict(test_linear_dists) print "Logistic Classification, LR: {}".format(lrs[i]) print( classification_report(y_true=list(encoded_test), y_pred=list(y_pred))) print(matthews_corrcoef(encoded_test, y_pred)) print "Linear Classification, LR: {}".format(lrs[i]) print( classification_report(y_true=list(encoded_test), y_pred=list(y_pred2))) print(matthews_corrcoef(encoded_test, y_pred2))