Python train_test_splitの例、scratch.machine_learning.train_test_split Pythonの例

コード例 #1

0

ファイルを表示

ファイル: logistic_regression.py プロジェクト: rshu/MLOptimization

def main():
    from matplotlib import pyplot as plt
    plt.close()
    plt.clf()
    plt.gca().clear()

    from matplotlib import pyplot as plt
    from scratch.working_with_data import rescale
    from scratch.multiple_regression import least_squares_fit, predict
    from scratch.gradient_descent import gradient_step

    learning_rate = 0.001
    rescaled_xs = rescale(xs)
    beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1)
    # [0.26, 0.43, -0.43]
    predictions = [predict(x_i, beta) for x_i in rescaled_xs]

    plt.scatter(predictions, ys)
    plt.xlabel("predicted")
    plt.ylabel("actual")
    # plt.show()

    plt.savefig('im/linear_regression_for_probabilities.png')
    plt.close()

    from scratch.machine_learning import train_test_split
    import random
    import tqdm

    random.seed(0)
    x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33)

    learning_rate = 0.01

    # pick a random starting point
    beta = [random.random() for _ in range(3)]

    with tqdm.trange(5000) as t:
        for epoch in t:
            gradient = negative_log_gradient(x_train, y_train, beta)
            beta = gradient_step(beta, gradient, -learning_rate)
            loss = negative_log_likelihood(x_train, y_train, beta)
            t.set_description(f"loss: {loss:.3f} beta: {beta}")

    from scratch.working_with_data import scale

    means, stdevs = scale(xs)
    beta_unscaled = [(beta[0] - beta[1] * means[1] / stdevs[1] -
                      beta[2] * means[2] / stdevs[2]), beta[1] / stdevs[1],
                     beta[2] / stdevs[2]]
    # [8.9, 1.6, -0.000288]

    assert (negative_log_likelihood(xs, ys,
                                    beta_unscaled) == negative_log_likelihood(
                                        rescaled_xs, ys, beta))

    true_positives = false_positives = true_negatives = false_negatives = 0

    for x_i, y_i in zip(x_test, y_test):
        prediction = logistic(dot(beta, x_i))

        if y_i == 1 and prediction >= 0.5:  # TP: paid and we predict paid
            true_positives += 1
        elif y_i == 1:  # FN: paid and we predict unpaid
            false_negatives += 1
        elif prediction >= 0.5:  # FP: unpaid and we predict paid
            false_positives += 1
        else:  # TN: unpaid and we predict unpaid
            true_negatives += 1

    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)

    print(precision, recall)

    assert precision == 0.75
    assert recall == 0.8

    plt.clf()
    plt.gca().clear()

    predictions = [logistic(dot(beta, x_i)) for x_i in x_test]
    plt.scatter(predictions, y_test, marker='+')
    plt.xlabel("predicted probability")
    plt.ylabel("actual outcome")
    plt.title("Logistic Regression Predicted vs. Actual")
    # plt.show()

    plt.savefig('im/logistic_regression_predicted_vs_actual.png')
    plt.gca().clear()

コード例 #2

0

ファイルを表示

ファイル: logistic_regression.py プロジェクト: joelgrus/data-science-from-scratch

def main():
    
    from matplotlib import pyplot as plt
    plt.close()
    plt.clf()
    plt.gca().clear()
    
    from matplotlib import pyplot as plt
    from scratch.working_with_data import rescale
    from scratch.multiple_regression import least_squares_fit, predict
    from scratch.gradient_descent import gradient_step
    
    learning_rate = 0.001
    rescaled_xs = rescale(xs)
    beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1)
    # [0.26, 0.43, -0.43]
    predictions = [predict(x_i, beta) for x_i in rescaled_xs]
    
    plt.scatter(predictions, ys)
    plt.xlabel("predicted")
    plt.ylabel("actual")
    # plt.show()
    
    
    plt.savefig('im/linear_regression_for_probabilities.png')
    plt.close()
    
    from scratch.machine_learning import train_test_split
    import random
    import tqdm
    
    random.seed(0)
    x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33)
    
    learning_rate = 0.01
    
    # pick a random starting point
    beta = [random.random() for _ in range(3)]
    
    with tqdm.trange(5000) as t:
        for epoch in t:
            gradient = negative_log_gradient(x_train, y_train, beta)
            beta = gradient_step(beta, gradient, -learning_rate)
            loss = negative_log_likelihood(x_train, y_train, beta)
            t.set_description(f"loss: {loss:.3f} beta: {beta}")
    
    from scratch.working_with_data import scale
    
    means, stdevs = scale(xs)
    beta_unscaled = [(beta[0]
                      - beta[1] * means[1] / stdevs[1]
                      - beta[2] * means[2] / stdevs[2]),
                     beta[1] / stdevs[1],
                     beta[2] / stdevs[2]]
    # [8.9, 1.6, -0.000288]
    
    
    
    assert (negative_log_likelihood(xs, ys, beta_unscaled) ==
            negative_log_likelihood(rescaled_xs, ys, beta))
    
    true_positives = false_positives = true_negatives = false_negatives = 0
    
    for x_i, y_i in zip(x_test, y_test):
        prediction = logistic(dot(beta, x_i))
    
        if y_i == 1 and prediction >= 0.5:  # TP: paid and we predict paid
            true_positives += 1
        elif y_i == 1:                      # FN: paid and we predict unpaid
            false_negatives += 1
        elif prediction >= 0.5:             # FP: unpaid and we predict paid
            false_positives += 1
        else:                               # TN: unpaid and we predict unpaid
            true_negatives += 1
    
    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)
    
    
    
    print(precision, recall)
    
    assert precision == 0.75
    assert recall == 0.8
    
    
    
    plt.clf()
    plt.gca().clear()
    
    predictions = [logistic(dot(beta, x_i)) for x_i in x_test]
    plt.scatter(predictions, y_test, marker='+')
    plt.xlabel("predicted probability")
    plt.ylabel("actual outcome")
    plt.title("Logistic Regression Predicted vs. Actual")
    # plt.show()
    
    
    
    plt.savefig('im/logistic_regression_predicted_vs_actual.png')
    plt.gca().clear()

コード例 #3

0

ファイルを表示

ファイル: sklearn.py プロジェクト: thoonk/DataScience

    (2.9, 59000, 0),
    (2.1, 52000, 0),
    (8.2, 87000, 0),
    (4.8, 73000, 0),
    (2.2, 42000, 1),
    (9.1, 98000, 0),
    (6.5, 84000, 0),
    (6.9, 73000, 0),
    (5.1, 72000, 0),
    (9.1, 69000, 1),
    (9.8, 79000, 1),
]
data = [list(row) for row in tuples]

xs = [[1.0] + row[:2] for row in data]  # [1, experience, salary]
ys = [row[2] for row in data]  # paid_account

from sklearn.linear_model import LogisticRegression
from scratch.working_with_data import rescale
from scratch.machine_learning import train_test_split

rescaled_xs = rescale(xs)
x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33)

clf = LogisticRegression(fit_intercept=False,
                         solver='liblinear',
                         max_iter=1000).fit(x_train, y_train)
print(clf.coef_)
print(clf.classes_)
print(clf.intercept_)

コード例 #4

0

ファイルを表示

def main():

    from matplotlib import pyplot as plt
    plt.close()
    plt.clf()
    plt.gca().clear()

    from matplotlib import pyplot as plt
    from scratch.working_with_data import rescale
    from scratch.multiple_regression import least_squares_fit, predict
    from scratch.gradient_descent import gradient_step

    learning_rate = 0.001
    rescaled_xs = rescale(xs)
    beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1)
    # [0.26, 0.43, -0.43]
    predictions = [predict(x_i, beta) for x_i in rescaled_xs]

    plt.scatter(predictions, ys)
    plt.xlabel("wartosc przewidywana")
    plt.ylabel("wartosc rzeczywista")
    # plt.show()

    plt.savefig('im/linear_regression_for_probabilities.png')
    plt.close()

    from scratch.machine_learning import train_test_split
    import random
    import tqdm

    random.seed(0)
    x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33)

    learning_rate = 0.01

    # Wybierz losowy punkt początkowy.
    beta = [random.random() for _ in range(3)]

    with tqdm.trange(5000) as t:
        for epoch in t:
            gradient = negative_log_gradient(x_train, y_train, beta)
            beta = gradient_step(beta, gradient, -learning_rate)
            loss = negative_log_likelihood(x_train, y_train, beta)
            t.set_description(f"loss: {loss:.3f} beta: {beta}")

    from scratch.working_with_data import scale

    means, stdevs = scale(xs)
    beta_unscaled = [(beta[0] - beta[1] * means[1] / stdevs[1] -
                      beta[2] * means[2] / stdevs[2]), beta[1] / stdevs[1],
                     beta[2] / stdevs[2]]
    # [8.9, 1.6, -0.000288]

    assert (negative_log_likelihood(xs, ys,
                                    beta_unscaled) == negative_log_likelihood(
                                        rescaled_xs, ys, beta))

    true_positives = false_positives = true_negatives = false_negatives = 0

    for x_i, y_i in zip(x_test, y_test):
        prediction = logistic(dot(beta, x_i))

        if y_i == 1 and prediction >= 0.5:  # Wynik prawdziwie dodatni: użytkownik zapłacił i klasyfikator przewidział to poprawnie.
            true_positives += 1
        elif y_i == 1:  # Wynik fałszywie ujemny: użytkownik zapłacił, a klasyfikator tego nie przewidział.
            false_negatives += 1
        elif prediction >= 0.5:  # Wynik fałszywie dodatni: użytkownik nie zapłacił, a klasyfikator przewidział opłatę.
            false_positives += 1
        else:  # Wynik prawdziwie negatywny: użytkownik nie zapłacił i zostało to przewidziane przez klasyfikator.
            true_negatives += 1

    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)

    print(precision, recall)

    assert precision == 0.75
    assert recall == 0.8

    plt.clf()
    plt.gca().clear()

    predictions = [logistic(dot(beta, x_i)) for x_i in x_test]
    plt.scatter(predictions, y_test, marker='+')
    plt.xlabel("przewidywane prawdopodobienstwo")
    plt.ylabel("wynik rzeczywisty")
    plt.title("Porownanie wartosci rzeczywistych i przewidywanych")
    plt.show()

    plt.savefig('im/logistic_regression_predicted_vs_actual.png')
    plt.gca().clear()