コード例 #1
0
def test_lr_newton_method():
    X, y = read_data()

    lr_clf = LogisticRegression(solver="newton_method")
    lr_clf.fit(X, y)

    # test intercept
    intercept = lr_clf.intercept_
    assert (abs(intercept - -2.618) < 0.01)

    # test coefficient
    coef = lr_clf.coef_
    assert (abs(coef[0] - 0.76) < 0.01)
    assert (abs(coef[1] - 1.17) < 0.01)
コード例 #2
0
def test_lr_stochastic_gradient_descent():
    X, y = read_data()

    lr_clf = LogisticRegression(learning_rate=0.001,
                                max_iter=10000,
                                solver="stochastic_gradient_descent")
    lr_clf.fit(X, y)

    # test intercept
    intercept = lr_clf.intercept_
    assert (abs(intercept - -2.618) < 0.01)

    # test coefficient
    coef = lr_clf.coef_
    assert (abs(coef[0] - 0.76) < 0.01)
    assert (abs(coef[1] - 1.17) < 0.01)
コード例 #3
0
ファイル: plot.py プロジェクト: wp-lai/xmachinelearning
import numpy as np
import matplotlib.pyplot as plt
from logistic import LogisticRegression

# read data
X = np.loadtxt('logistic_x.txt')
y = np.loadtxt('logistic_y.txt')

# build model
lr = LogisticRegression()
lr.fit(X, y)
y_ = lr.predict(X)

# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
h = 0.1  # step_size
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
data = np.vstack((xx.ravel(), yy.ravel())).T
labels = lr.predict(data)

# plot
fig, ax = plt.subplots()
ax.scatter(data[:, 0],
           data[:, 1],
           c=np.where(labels == 1, 'green', 'red'),
           alpha=0.01)
plt.title('Decision Boundary of Logistic Regression')
ax.scatter(X[y == 1, 0],
           X[y == 1, 1],
           c='green',
コード例 #4
0
def main():
    bodies, stances, index, body_IDs, stance_IDs, labels = generateSentences(
        'train_bodies.csv', 'train_stances.csv')
    encoder = LabelEncoder()
    encoder.fit(label_headers)
    encoded_labels = encoder.transform(labels)

    combined = matchStance(bodies, stances, body_IDs, stance_IDs)
    sorted_bodies = linkBodies(body_IDs, stance_IDs, bodies)

    training_bodies = sorted_bodies[0:index + 1]
    training_stances = stances[0:index + 1]
    training_labels = encoded_labels[0:index + 1]

    cv, tfidf = vectorise(training_bodies, training_stances, training_labels)

    # b_cv = cv.transform(training_bodies)
    # s_cv = cv.transform(training_stances)
    # b_tf = tfidf.transform(b_cv)
    # s_tf = tfidf.transform(s_cv)
    # cosineSim(training_bodies,training_stances,cv,training_labels,'plots/CS-vect.png')
    # cosineSim(b_tf,s_tf,tfidf,training_labels,'plots/CS-tfidf.png')
    #
    # kldivergence(b_cv.toarray(),s_cv.toarray(),training_labels,'plots/KL-vect.png')
    # kldivergence(b_tf.toarray(),s_tf.toarray(),training_labels,'plots/KL-tfidf.png')

    # valid_bodies = sorted_bodies[index+1:len(sorted_bodies)]
    # valid_stances = stances[index+1:len(stances)]
    valid_labels = encoded_labels[index + 1:len(encoded_labels)]
    valid_b_cv = cv.transform(sorted_bodies)
    valid_s_cv = cv.transform(stances)
    valid_b_tf = list(tfidf.transform(valid_b_cv).toarray())
    valid_s_tf = list(tfidf.transform(valid_s_cv).toarray())

    dists = calcDistances(valid_b_tf, valid_s_tf)
    distanceShow(dists[0:index + 1], training_labels)
    linear_dists = [
        dists[i][len(dists[i]) - 5:len(dists[i])]
        for i in range(0, len(dists))
    ]

    test_b, test_s, test_index, test_b_ids, test_s_ids, test_labels = generateSentences(
        'competition_test_bodies.csv', 'competition_test_stances.csv')
    encoded_test = encoder.transform(test_labels)
    test_sorted_b = linkBodies(test_b_ids, test_s_ids, test_b)

    test_b_tf = list(tfidf.transform(cv.transform(test_sorted_b)).toarray())
    test_s_tf = list(tfidf.transform(cv.transform(test_s)).toarray())
    test_dists = calcDistances(test_b_tf, test_s_tf)
    test_linear_dists = [
        test_dists[i][len(test_dists[i]) - 5:len(test_dists[i])]
        for i in range(0, len(test_dists))
    ]

    lrs = [0.0005, 0.001, 0.005, 0.01, 0.05, 0.1]
    for i in range(0, len(lrs)):
        logistic = LogisticRegression(lr=lrs[i], steps=10000)
        logistic.fit(input=dists[0:index + 1], labels=training_labels)
        y_pred = logistic.predict(test_dists)

        linear = LinearRegression(lr=lrs[i], steps=50)
        linear.fit(input=linear_dists[0:index + 1], labels=training_labels)
        y_pred2 = linear.predict(test_linear_dists)

        print "Logistic Classification, LR: {}".format(lrs[i])
        print(
            classification_report(y_true=list(encoded_test),
                                  y_pred=list(y_pred)))
        print(matthews_corrcoef(encoded_test, y_pred))

        print "Linear Classification, LR: {}".format(lrs[i])
        print(
            classification_report(y_true=list(encoded_test),
                                  y_pred=list(y_pred2)))
        print(matthews_corrcoef(encoded_test, y_pred2))