(4.2, 78000, 0), (1.1, 54000, 0), (6.2, 60000, 0), (2.9, 59000, 0), (2.1, 52000, 0), (8.2, 87000, 0), (4.8, 73000, 0), (2.2, 42000, 1), (9.1, 98000, 0), (6.5, 84000, 0), (6.9, 73000, 0), (5.1, 72000, 0), (9.1, 69000, 1), (9.8, 79000, 1), ] ##### 16.1 问题 # change tuples to lists data = list(map(list, data)) # each element is [1, experience, salary] x = [[1] + row[:2] for row in data] # each element is paid_account y = [row[2] for row in data] print("linear regression:") rescaled_x = rescale(x) beta = estimate_beta(rescaled_x, y) print(beta) ##### 16.2 Logistic函数
paid_experience = [experience[i] for i in paid_indexes] unpaid_experience = [experience[i] for i in unpaid_indexes] paid_salary = [salary[i] for i in paid_indexes] unpaid_salary = [salary[i] for i in unpaid_indexes] # Plot it it. Notice the apparent separation of the data. plt.scatter(paid_experience,paid_salary, marker='x', color='black') plt.scatter(unpaid_experience,unpaid_salary, marker='o', color='grey') plt.title("Paid and Unpaid Users") plt.xlabel("experience") plt.ylabel("salary") plt.show() # Rescale the data to avoid problems with values. rescale_x = rescale(x) beta_0 = [1,1,1] beta_hat = logistic_batch(rescale_x,y,beta_0) beta_hat # The result should be approximately [-2.11, 4.54, -4.41]. # This translates to paid = logistic(-2.11 + 4.54 * experience_rescaled - 4.41 * salary_rescaled). # Take the linear expression, set equal to zero, translate it back to unscaled units, and we get roughly: # salary = 5615.11 * experience + 31167.71 # Plot this equation along with our original data. # How well does it separate the two types of data? # How does the logistic function come into play? # NOTE: The is not the least squares line for the original data. # It is the linear expression found to maximize likelihood of the logistic function relative to the data.
def test_rescale(self): self.assertEqual([[-0.5/math.sqrt(0.5), 3/math.sqrt(18)], [0.5/math.sqrt(0.5), -3/math.sqrt(18)]], working_with_data.rescale([[1, 5], [2, -1]])) self.assertEqual([[3]], working_with_data.rescale([[3]]))
return minimum_stochastic(negate(target_fn), negate_all(gradient_fn),x,y,theta_0,step_size) if __name__ == "__main__": data = [(0.7,48000,1),(1.9,48000,0),(2.5,60000,1),(4.2,63000,0),(6,76000,0),(6.5,69000,0),(7.5,76000,0),(8.1,88000,0),(8.7,83000,1),(10,83000,1),(0.8,43000,0),(1.8,60000,0),(10,79000,1),(6.1,76000,0),(1.4,50000,0),(9.1,92000,0),(5.8,75000,0),(5.2,69000,0),(1,56000,0),(6,67000,0),(4.9,74000,0),(6.4,63000,1),(6.2,82000,0),(3.3,58000,0),(9.3,90000,1),(5.5,57000,1),(9.1,102000,0),(2.4,54000,0),(8.2,65000,1),(5.3,82000,0),(9.8,107000,0),(1.8,64000,0),(0.6,46000,1),(0.8,48000,0),(8.6,84000,1),(0.6,45000,0),(0.5,30000,1),(7.3,89000,0),(2.5,48000,1),(5.6,76000,0),(7.4,77000,0),(2.7,56000,0),(0.7,48000,0),(1.2,42000,0),(0.2,32000,1),(4.7,56000,1),(2.8,44000,1),(7.6,78000,0),(1.1,63000,0),(8,79000,1),(2.7,56000,0),(6,52000,1),(4.6,56000,0),(2.5,51000,0),(5.7,71000,0),(2.9,65000,0),(1.1,33000,1),(3,62000,0),(4,71000,0),(2.4,61000,0),(7.5,75000,0),(9.7,81000,1),(3.2,62000,0),(7.9,88000,0),(4.7,44000,1),(2.5,55000,0),(1.6,41000,0),(6.7,64000,1),(6.9,66000,1),(7.9,78000,1),(8.1,102000,0),(5.3,48000,1),(8.5,66000,1),(0.2,56000,0),(6,69000,0),(7.5,77000,0),(8,86000,0),(4.4,68000,0),(4.9,75000,0),(1.5,60000,0),(2.2,50000,0),(3.4,49000,1),(4.2,70000,0),(7.7,98000,0),(8.2,85000,0),(5.4,88000,0),(0.1,46000,0),(1.5,37000,0),(6.3,86000,0),(3.7,57000,0),(8.4,85000,0),(2,42000,0),(5.8,69000,1),(2.7,64000,0),(3.1,63000,0),(1.9,48000,0),(10,72000,1),(0.2,45000,0),(8.6,95000,0),(1.5,64000,0),(9.8,95000,0),(5.3,65000,0),(7.5,80000,0),(9.9,91000,0),(9.7,50000,1),(2.8,68000,0),(3.6,58000,0),(3.9,74000,0),(4.4,76000,0),(2.5,49000,0),(7.2,81000,0),(5.2,60000,1),(2.4,62000,0),(8.9,94000,0),(2.4,63000,0),(6.8,69000,1),(6.5,77000,0),(7,86000,0),(9.4,94000,0),(7.8,72000,1),(0.2,53000,0),(10,97000,0),(5.5,65000,0),(7.7,71000,1),(8.1,66000,1),(9.8,91000,0),(8,84000,0),(2.7,55000,0),(2.8,62000,0),(9.4,79000,0),(2.5,57000,0),(7.4,70000,1),(2.1,47000,0),(5.3,62000,1),(6.3,79000,0),(6.8,58000,1),(5.7,80000,0),(2.2,61000,0),(4.8,62000,0),(3.7,64000,0),(4.1,85000,0),(2.3,51000,0),(3.5,58000,0),(0.9,43000,0),(0.9,54000,0),(4.5,74000,0),(6.5,55000,1),(4.1,41000,1),(7.1,73000,0),(1.1,66000,0),(9.1,81000,1),(8,69000,1),(7.3,72000,1),(3.3,50000,0),(3.9,58000,0),(2.6,49000,0),(1.6,78000,0),(0.7,56000,0),(2.1,36000,1),(7.5,90000,0),(4.8,59000,1),(8.9,95000,0),(6.2,72000,0),(6.3,63000,0),(9.1,100000,0),(7.3,61000,1),(5.6,74000,0),(0.5,66000,0),(1.1,59000,0),(5.1,61000,0),(6.2,70000,0),(6.6,56000,1),(6.3,76000,0),(6.5,78000,0),(5.1,59000,0),(9.5,74000,1),(4.5,64000,0),(2,54000,0),(1,52000,0),(4,69000,0),(6.5,76000,0),(3,60000,0),(4.5,63000,0),(7.8,70000,0),(3.9,60000,1),(0.8,51000,0),(4.2,78000,0),(1.1,54000,0),(6.2,60000,0),(2.9,59000,0),(2.1,52000,0),(8.2,87000,0),(4.8,73000,0),(2.2,42000,1),(9.1,98000,0),(6.5,84000,0),(6.9,73000,0),(5.1,72000,0),(9.1,69000,1),(9.8,79000,1),] data = map(list, data) # change tuples to lists x = [[1]+data_i[:2] for data_i in data] y = [data_i[2] for data_i in data] # pdb.set_trace() rescaled_x = rescale(x) x_train,y_train,x_test,y_test = train_test_split(rescaled_x,y,0.33) print "batch gradient down" target_fn = partial(logistic_log_likelihood,x_train,y_train) gradient_fn = partial(logistic_log_gradient,x_train,y_train) theta_0 = [1,1,1] theta = maximum_batch(target_fn,gradient_fn,theta_0) print theta print "stochastic gradient down" theta_0 = [1,1,1] theta = maximum_stochastic(logistic_log_likelihood_i,logistic_log_gradient_i,x_train,y_train,theta_0) print theta
(2.1, 52000, 0), (8.2, 87000, 0), (4.8, 73000, 0), (2.2, 42000, 1), (9.1, 98000, 0), (6.5, 84000, 0), (6.9, 73000, 0), (5.1, 72000, 0), (9.1, 69000, 1), (9.8, 79000, 1), ] data = list(map(list, data)) # change tuples to lists x = [row[:2] + [-1] for row in data] # each element is [1, experience, salary] y = np.array([row[2] for row in data]) # each element is paid_account X = np.array(rescale(x)) y = [yi if yi == 1 else -1 for yi in y] def svm_sgd(X, Y): w = np.zeros(len(X[0])) eta = 1 epochs = 100000 for epoch in range(1, epochs): for i, x in enumerate(X): if (Y[i] * np.dot(X[i], w)) < 1: w = w + eta * ((X[i] * Y[i]) + (-2 * (1 / epoch) * w)) else: w = w + eta * (-2 * (1 / epoch) * w)
(2.1, 52000, 0), (8.2, 87000, 0), (4.8, 73000, 0), (2.2, 42000, 1), (9.1, 98000, 0), (6.5, 84000, 0), (6.9, 73000, 0), (5.1, 72000, 0), (9.1, 69000, 1), (9.8, 79000, 1), ] data = map(list, data) # change tuples to lists x = [[1] + row[:2] for row in data] # each element is [1, experience, salary] y = [row[2] for row in data] # each element is paid_account rescaled_x = rescale(x) # rescale the features print "logistic regression:" random.seed(0) x_train, x_test, y_train, y_test = train_test_split(rescaled_x, y, 0.33) # want to maximize log likelihood on the training data fn = partial(logistic_log_likelihood, x_train, y_train) gradient_fn = partial(logistic_log_gradient, x_train, y_train) # pick a random starting point beta_0 = [1, 1, 1] # and maximize using gradient descent beta_hat = maximize_batch(fn, gradient_fn, beta_0)
def main(): from matplotlib import pyplot as plt plt.close() plt.clf() plt.gca().clear() from matplotlib import pyplot as plt from working_with_data import rescale from src.scratch_dir import least_squares_fit, predict from src.scratch_dir import gradient_step learning_rate = 0.001 rescaled_xs = rescale(xs) beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1) # [0.26, 0.43, -0.43] predictions = [predict(x_i, beta) for x_i in rescaled_xs] plt.scatter(predictions, ys) plt.xlabel("predicted") plt.ylabel("actual") # plt.show() plt.savefig('im/linear_regression_for_probabilities.png') plt.close() from src.scratch_dir import train_test_split import random import tqdm random.seed(0) x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33) learning_rate = 0.01 # pick a random starting point beta = [random.random() for _ in range(3)] with tqdm.trange(5000) as t: for epoch in t: gradient = negative_log_gradient(x_train, y_train, beta) beta = gradient_step(beta, gradient, -learning_rate) loss = negative_log_likelihood(x_train, y_train, beta) t.set_description(f"loss: {loss:.3f} beta: {beta}") from working_with_data import scale means, stdevs = scale(xs) beta_unscaled = [(beta[0] - beta[1] * means[1] / stdevs[1] - beta[2] * means[2] / stdevs[2]), beta[1] / stdevs[1], beta[2] / stdevs[2]] # [8.9, 1.6, -0.000288] assert (negative_log_likelihood(xs, ys, beta_unscaled) == negative_log_likelihood( rescaled_xs, ys, beta)) true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): prediction = logistic(dot(beta, x_i)) if y_i == 1 and prediction >= 0.5: # TP: paid and we predict paid true_positives += 1 elif y_i == 1: # FN: paid and we predict unpaid false_negatives += 1 elif prediction >= 0.5: # FP: unpaid and we predict paid false_positives += 1 else: # TN: unpaid and we predict unpaid true_negatives += 1 precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) print(precision, recall) assert precision == 0.75 assert recall == 0.8 plt.clf() plt.gca().clear() predictions = [logistic(dot(beta, x_i)) for x_i in x_test] plt.scatter(predictions, y_test, marker='+') plt.xlabel("predicted probability") plt.ylabel("actual outcome") plt.title("Logistic Regression Predicted vs. Actual") # plt.show() plt.savefig('im/logistic_regression_predicted_vs_actual.png') plt.gca().clear()
(8.2, 87000, 0), (4.8, 73000, 0), (2.2, 42000, 1), (9.1, 98000, 0), (6.5, 84000, 0), (6.9, 73000, 0), (5.1, 72000, 0), (9.1, 69000, 1), (9.8, 79000, 1), ] data = map(list, data) # change tuples to lists x = [[1] + row[:2] for row in data] # each element is [1, experience, salary] y = [row[2] for row in data] # each element is paid_account rescaled_x = rescale(x) # rescale the features print "logistic regression:" random.seed(0) x_train, x_test, y_train, y_test = train_test_split(rescaled_x, y, 0.33) # want to maximize log likelihood on the training data fn = partial(logistic_log_likelihood, x_train, y_train) gradient_fn = partial(logistic_log_gradient, x_train, y_train) # pick a random starting point beta_0 = [1, 1, 1] # and maximize using gradient descent beta_hat = maximize_batch(fn, gradient_fn, beta_0)
plt.xlabel('years of experience') plt.ylabel('annual salary') plt.legend(loc = 8) plt.show() #################################################################################################### ## Least squares fit #################################################################################################### from matplotlib import pyplot as plt from working_with_data import rescale from multiple_regression import least_squares_fit, predict from gradient_descent import gradient_step learning_rate = 0.01 rescaled_xs = rescale(xs) beta = least_squares_fit(rescaled_xs, ys, learning_rate, 10, 1) predictions = [predict(x_i, beta) for x_i in rescaled_xs] plt.scatter(predictions,ys) plt.xlabel("predicted") plt.ylabel("actual") plt.show() #################################################################################################### import math from Vector_operations_on_data import Vector, dot from typing import List def logistic(x: float) -> float:
def test_rescale(self): self.assertEqual([[-0.5 / math.sqrt(0.5), 3 / math.sqrt(18)], [0.5 / math.sqrt(0.5), -3 / math.sqrt(18)]], working_with_data.rescale([[1, 5], [2, -1]])) self.assertEqual([[3]], working_with_data.rescale([[3]]))