def main(): from matplotlib import pyplot as plt plt.close() plt.clf() plt.gca().clear() from matplotlib import pyplot as plt from scratch.working_with_data import rescale from scratch.multiple_regression import least_squares_fit, predict from scratch.gradient_descent import gradient_step learning_rate = 0.001 rescaled_xs = rescale(xs) beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1) # [0.26, 0.43, -0.43] predictions = [predict(x_i, beta) for x_i in rescaled_xs] plt.scatter(predictions, ys) plt.xlabel("predicted") plt.ylabel("actual") # plt.show() plt.savefig('im/linear_regression_for_probabilities.png') plt.close() from scratch.machine_learning import train_test_split import random import tqdm random.seed(0) x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33) learning_rate = 0.01 # pick a random starting point beta = [random.random() for _ in range(3)] with tqdm.trange(5000) as t: for epoch in t: gradient = negative_log_gradient(x_train, y_train, beta) beta = gradient_step(beta, gradient, -learning_rate) loss = negative_log_likelihood(x_train, y_train, beta) t.set_description(f"loss: {loss:.3f} beta: {beta}") from scratch.working_with_data import scale means, stdevs = scale(xs) beta_unscaled = [(beta[0] - beta[1] * means[1] / stdevs[1] - beta[2] * means[2] / stdevs[2]), beta[1] / stdevs[1], beta[2] / stdevs[2]] # [8.9, 1.6, -0.000288] assert (negative_log_likelihood(xs, ys, beta_unscaled) == negative_log_likelihood( rescaled_xs, ys, beta)) true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): prediction = logistic(dot(beta, x_i)) if y_i == 1 and prediction >= 0.5: # TP: paid and we predict paid true_positives += 1 elif y_i == 1: # FN: paid and we predict unpaid false_negatives += 1 elif prediction >= 0.5: # FP: unpaid and we predict paid false_positives += 1 else: # TN: unpaid and we predict unpaid true_negatives += 1 precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) print(precision, recall) assert precision == 0.75 assert recall == 0.8 plt.clf() plt.gca().clear() predictions = [logistic(dot(beta, x_i)) for x_i in x_test] plt.scatter(predictions, y_test, marker='+') plt.xlabel("predicted probability") plt.ylabel("actual outcome") plt.title("Logistic Regression Predicted vs. Actual") # plt.show() plt.savefig('im/logistic_regression_predicted_vs_actual.png') plt.gca().clear()
def main(): from matplotlib import pyplot as plt plt.close() plt.clf() plt.gca().clear() from matplotlib import pyplot as plt from scratch.working_with_data import rescale from scratch.multiple_regression import least_squares_fit, predict from scratch.gradient_descent import gradient_step learning_rate = 0.001 rescaled_xs = rescale(xs) beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1) # [0.26, 0.43, -0.43] predictions = [predict(x_i, beta) for x_i in rescaled_xs] plt.scatter(predictions, ys) plt.xlabel("predicted") plt.ylabel("actual") # plt.show() plt.savefig('im/linear_regression_for_probabilities.png') plt.close() from scratch.machine_learning import train_test_split import random import tqdm random.seed(0) x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33) learning_rate = 0.01 # pick a random starting point beta = [random.random() for _ in range(3)] with tqdm.trange(5000) as t: for epoch in t: gradient = negative_log_gradient(x_train, y_train, beta) beta = gradient_step(beta, gradient, -learning_rate) loss = negative_log_likelihood(x_train, y_train, beta) t.set_description(f"loss: {loss:.3f} beta: {beta}") from scratch.working_with_data import scale means, stdevs = scale(xs) beta_unscaled = [(beta[0] - beta[1] * means[1] / stdevs[1] - beta[2] * means[2] / stdevs[2]), beta[1] / stdevs[1], beta[2] / stdevs[2]] # [8.9, 1.6, -0.000288] assert (negative_log_likelihood(xs, ys, beta_unscaled) == negative_log_likelihood(rescaled_xs, ys, beta)) true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): prediction = logistic(dot(beta, x_i)) if y_i == 1 and prediction >= 0.5: # TP: paid and we predict paid true_positives += 1 elif y_i == 1: # FN: paid and we predict unpaid false_negatives += 1 elif prediction >= 0.5: # FP: unpaid and we predict paid false_positives += 1 else: # TN: unpaid and we predict unpaid true_negatives += 1 precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) print(precision, recall) assert precision == 0.75 assert recall == 0.8 plt.clf() plt.gca().clear() predictions = [logistic(dot(beta, x_i)) for x_i in x_test] plt.scatter(predictions, y_test, marker='+') plt.xlabel("predicted probability") plt.ylabel("actual outcome") plt.title("Logistic Regression Predicted vs. Actual") # plt.show() plt.savefig('im/logistic_regression_predicted_vs_actual.png') plt.gca().clear()
(2.9, 59000, 0), (2.1, 52000, 0), (8.2, 87000, 0), (4.8, 73000, 0), (2.2, 42000, 1), (9.1, 98000, 0), (6.5, 84000, 0), (6.9, 73000, 0), (5.1, 72000, 0), (9.1, 69000, 1), (9.8, 79000, 1), ] data = [list(row) for row in tuples] xs = [[1.0] + row[:2] for row in data] # [1, experience, salary] ys = [row[2] for row in data] # paid_account from sklearn.linear_model import LogisticRegression from scratch.working_with_data import rescale from scratch.machine_learning import train_test_split rescaled_xs = rescale(xs) x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33) clf = LogisticRegression(fit_intercept=False, solver='liblinear', max_iter=1000).fit(x_train, y_train) print(clf.coef_) print(clf.classes_) print(clf.intercept_)
def main(): from matplotlib import pyplot as plt plt.close() plt.clf() plt.gca().clear() from matplotlib import pyplot as plt from scratch.working_with_data import rescale from scratch.multiple_regression import least_squares_fit, predict from scratch.gradient_descent import gradient_step learning_rate = 0.001 rescaled_xs = rescale(xs) beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1) # [0.26, 0.43, -0.43] predictions = [predict(x_i, beta) for x_i in rescaled_xs] plt.scatter(predictions, ys) plt.xlabel("wartosc przewidywana") plt.ylabel("wartosc rzeczywista") # plt.show() plt.savefig('im/linear_regression_for_probabilities.png') plt.close() from scratch.machine_learning import train_test_split import random import tqdm random.seed(0) x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33) learning_rate = 0.01 # Wybierz losowy punkt początkowy. beta = [random.random() for _ in range(3)] with tqdm.trange(5000) as t: for epoch in t: gradient = negative_log_gradient(x_train, y_train, beta) beta = gradient_step(beta, gradient, -learning_rate) loss = negative_log_likelihood(x_train, y_train, beta) t.set_description(f"loss: {loss:.3f} beta: {beta}") from scratch.working_with_data import scale means, stdevs = scale(xs) beta_unscaled = [(beta[0] - beta[1] * means[1] / stdevs[1] - beta[2] * means[2] / stdevs[2]), beta[1] / stdevs[1], beta[2] / stdevs[2]] # [8.9, 1.6, -0.000288] assert (negative_log_likelihood(xs, ys, beta_unscaled) == negative_log_likelihood( rescaled_xs, ys, beta)) true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): prediction = logistic(dot(beta, x_i)) if y_i == 1 and prediction >= 0.5: # Wynik prawdziwie dodatni: użytkownik zapłacił i klasyfikator przewidział to poprawnie. true_positives += 1 elif y_i == 1: # Wynik fałszywie ujemny: użytkownik zapłacił, a klasyfikator tego nie przewidział. false_negatives += 1 elif prediction >= 0.5: # Wynik fałszywie dodatni: użytkownik nie zapłacił, a klasyfikator przewidział opłatę. false_positives += 1 else: # Wynik prawdziwie negatywny: użytkownik nie zapłacił i zostało to przewidziane przez klasyfikator. true_negatives += 1 precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) print(precision, recall) assert precision == 0.75 assert recall == 0.8 plt.clf() plt.gca().clear() predictions = [logistic(dot(beta, x_i)) for x_i in x_test] plt.scatter(predictions, y_test, marker='+') plt.xlabel("przewidywane prawdopodobienstwo") plt.ylabel("wynik rzeczywisty") plt.title("Porownanie wartosci rzeczywistych i przewidywanych") plt.show() plt.savefig('im/logistic_regression_predicted_vs_actual.png') plt.gca().clear()