(4.2, 78000, 0),
        (1.1, 54000, 0),
        (6.2, 60000, 0),
        (2.9, 59000, 0),
        (2.1, 52000, 0),
        (8.2, 87000, 0),
        (4.8, 73000, 0),
        (2.2, 42000, 1),
        (9.1, 98000, 0),
        (6.5, 84000, 0),
        (6.9, 73000, 0),
        (5.1, 72000, 0),
        (9.1, 69000, 1),
        (9.8, 79000, 1),
    ]

    ##### 16.1 问题
    # change tuples to lists
    data = list(map(list, data))
    # each element is [1, experience, salary]
    x = [[1] + row[:2] for row in data]
    # each element is paid_account
    y = [row[2] for row in data]

    print("linear regression:")
    rescaled_x = rescale(x)
    beta = estimate_beta(rescaled_x, y)
    print(beta)

    ##### 16.2 Logistic函数
paid_experience = [experience[i] for i in paid_indexes]
unpaid_experience = [experience[i] for i in unpaid_indexes]
paid_salary = [salary[i] for i in paid_indexes]
unpaid_salary = [salary[i] for i in unpaid_indexes]

# Plot it it. Notice the apparent separation of the data.
plt.scatter(paid_experience,paid_salary, marker='x', color='black')
plt.scatter(unpaid_experience,unpaid_salary, marker='o', color='grey')
plt.title("Paid and Unpaid Users")
plt.xlabel("experience")
plt.ylabel("salary")
plt.show()

# Rescale the data to avoid problems with values.
rescale_x = rescale(x)
beta_0 = [1,1,1]
beta_hat = logistic_batch(rescale_x,y,beta_0)

beta_hat

# The result should be approximately [-2.11, 4.54, -4.41].
# This translates to paid = logistic(-2.11 + 4.54 * experience_rescaled - 4.41 * salary_rescaled).
# Take the linear expression, set equal to zero, translate it back to unscaled units, and we get roughly:
# salary = 5615.11 * experience + 31167.71

# Plot this equation along with our original data.
# How well does it separate the two types of data?
# How does the logistic function come into play?
# NOTE: The is not the least squares line for the original data.
# It is the linear expression found to maximize likelihood of the logistic function relative to the data.
 def test_rescale(self):
     self.assertEqual([[-0.5/math.sqrt(0.5), 3/math.sqrt(18)], [0.5/math.sqrt(0.5), -3/math.sqrt(18)]],
                      working_with_data.rescale([[1, 5], [2, -1]]))
     self.assertEqual([[3]], working_with_data.rescale([[3]]))
    return minimum_stochastic(negate(target_fn),
            negate_all(gradient_fn),x,y,theta_0,step_size)





if __name__ == "__main__":

    data = [(0.7,48000,1),(1.9,48000,0),(2.5,60000,1),(4.2,63000,0),(6,76000,0),(6.5,69000,0),(7.5,76000,0),(8.1,88000,0),(8.7,83000,1),(10,83000,1),(0.8,43000,0),(1.8,60000,0),(10,79000,1),(6.1,76000,0),(1.4,50000,0),(9.1,92000,0),(5.8,75000,0),(5.2,69000,0),(1,56000,0),(6,67000,0),(4.9,74000,0),(6.4,63000,1),(6.2,82000,0),(3.3,58000,0),(9.3,90000,1),(5.5,57000,1),(9.1,102000,0),(2.4,54000,0),(8.2,65000,1),(5.3,82000,0),(9.8,107000,0),(1.8,64000,0),(0.6,46000,1),(0.8,48000,0),(8.6,84000,1),(0.6,45000,0),(0.5,30000,1),(7.3,89000,0),(2.5,48000,1),(5.6,76000,0),(7.4,77000,0),(2.7,56000,0),(0.7,48000,0),(1.2,42000,0),(0.2,32000,1),(4.7,56000,1),(2.8,44000,1),(7.6,78000,0),(1.1,63000,0),(8,79000,1),(2.7,56000,0),(6,52000,1),(4.6,56000,0),(2.5,51000,0),(5.7,71000,0),(2.9,65000,0),(1.1,33000,1),(3,62000,0),(4,71000,0),(2.4,61000,0),(7.5,75000,0),(9.7,81000,1),(3.2,62000,0),(7.9,88000,0),(4.7,44000,1),(2.5,55000,0),(1.6,41000,0),(6.7,64000,1),(6.9,66000,1),(7.9,78000,1),(8.1,102000,0),(5.3,48000,1),(8.5,66000,1),(0.2,56000,0),(6,69000,0),(7.5,77000,0),(8,86000,0),(4.4,68000,0),(4.9,75000,0),(1.5,60000,0),(2.2,50000,0),(3.4,49000,1),(4.2,70000,0),(7.7,98000,0),(8.2,85000,0),(5.4,88000,0),(0.1,46000,0),(1.5,37000,0),(6.3,86000,0),(3.7,57000,0),(8.4,85000,0),(2,42000,0),(5.8,69000,1),(2.7,64000,0),(3.1,63000,0),(1.9,48000,0),(10,72000,1),(0.2,45000,0),(8.6,95000,0),(1.5,64000,0),(9.8,95000,0),(5.3,65000,0),(7.5,80000,0),(9.9,91000,0),(9.7,50000,1),(2.8,68000,0),(3.6,58000,0),(3.9,74000,0),(4.4,76000,0),(2.5,49000,0),(7.2,81000,0),(5.2,60000,1),(2.4,62000,0),(8.9,94000,0),(2.4,63000,0),(6.8,69000,1),(6.5,77000,0),(7,86000,0),(9.4,94000,0),(7.8,72000,1),(0.2,53000,0),(10,97000,0),(5.5,65000,0),(7.7,71000,1),(8.1,66000,1),(9.8,91000,0),(8,84000,0),(2.7,55000,0),(2.8,62000,0),(9.4,79000,0),(2.5,57000,0),(7.4,70000,1),(2.1,47000,0),(5.3,62000,1),(6.3,79000,0),(6.8,58000,1),(5.7,80000,0),(2.2,61000,0),(4.8,62000,0),(3.7,64000,0),(4.1,85000,0),(2.3,51000,0),(3.5,58000,0),(0.9,43000,0),(0.9,54000,0),(4.5,74000,0),(6.5,55000,1),(4.1,41000,1),(7.1,73000,0),(1.1,66000,0),(9.1,81000,1),(8,69000,1),(7.3,72000,1),(3.3,50000,0),(3.9,58000,0),(2.6,49000,0),(1.6,78000,0),(0.7,56000,0),(2.1,36000,1),(7.5,90000,0),(4.8,59000,1),(8.9,95000,0),(6.2,72000,0),(6.3,63000,0),(9.1,100000,0),(7.3,61000,1),(5.6,74000,0),(0.5,66000,0),(1.1,59000,0),(5.1,61000,0),(6.2,70000,0),(6.6,56000,1),(6.3,76000,0),(6.5,78000,0),(5.1,59000,0),(9.5,74000,1),(4.5,64000,0),(2,54000,0),(1,52000,0),(4,69000,0),(6.5,76000,0),(3,60000,0),(4.5,63000,0),(7.8,70000,0),(3.9,60000,1),(0.8,51000,0),(4.2,78000,0),(1.1,54000,0),(6.2,60000,0),(2.9,59000,0),(2.1,52000,0),(8.2,87000,0),(4.8,73000,0),(2.2,42000,1),(9.1,98000,0),(6.5,84000,0),(6.9,73000,0),(5.1,72000,0),(9.1,69000,1),(9.8,79000,1),]
    data = map(list, data) # change tuples to lists
    x = [[1]+data_i[:2] for data_i in data]
    y = [data_i[2] for data_i in data]

    # pdb.set_trace()
    rescaled_x = rescale(x)
    x_train,y_train,x_test,y_test = train_test_split(rescaled_x,y,0.33)


    print "batch gradient down"
    target_fn = partial(logistic_log_likelihood,x_train,y_train)
    gradient_fn = partial(logistic_log_gradient,x_train,y_train)
    theta_0 = [1,1,1]
    theta = maximum_batch(target_fn,gradient_fn,theta_0)
    print theta

    print "stochastic gradient down"
    theta_0 = [1,1,1]
    theta = maximum_stochastic(logistic_log_likelihood_i,logistic_log_gradient_i,x_train,y_train,theta_0)
    print theta
    
    (2.1, 52000, 0),
    (8.2, 87000, 0),
    (4.8, 73000, 0),
    (2.2, 42000, 1),
    (9.1, 98000, 0),
    (6.5, 84000, 0),
    (6.9, 73000, 0),
    (5.1, 72000, 0),
    (9.1, 69000, 1),
    (9.8, 79000, 1),
]
data = list(map(list, data))  # change tuples to lists

x = [row[:2] + [-1] for row in data]  # each element is [1, experience, salary]
y = np.array([row[2] for row in data])  # each element is paid_account
X = np.array(rescale(x))
y = [yi if yi == 1 else -1 for yi in y]


def svm_sgd(X, Y):

    w = np.zeros(len(X[0]))
    eta = 1
    epochs = 100000

    for epoch in range(1, epochs):
        for i, x in enumerate(X):
            if (Y[i] * np.dot(X[i], w)) < 1:
                w = w + eta * ((X[i] * Y[i]) + (-2 * (1 / epoch) * w))
            else:
                w = w + eta * (-2 * (1 / epoch) * w)
        (2.1, 52000, 0),
        (8.2, 87000, 0),
        (4.8, 73000, 0),
        (2.2, 42000, 1),
        (9.1, 98000, 0),
        (6.5, 84000, 0),
        (6.9, 73000, 0),
        (5.1, 72000, 0),
        (9.1, 69000, 1),
        (9.8, 79000, 1),
    ]
    data = map(list, data)  # change tuples to lists

    x = [[1] + row[:2] for row in data]  # each element is [1, experience, salary]
    y = [row[2] for row in data]  # each element is paid_account
    rescaled_x = rescale(x)  # rescale the features

    print "logistic regression:"

    random.seed(0)
    x_train, x_test, y_train, y_test = train_test_split(rescaled_x, y, 0.33)

    # want to maximize log likelihood on the training data
    fn = partial(logistic_log_likelihood, x_train, y_train)
    gradient_fn = partial(logistic_log_gradient, x_train, y_train)

    # pick a random starting point
    beta_0 = [1, 1, 1]

    # and maximize using gradient descent
    beta_hat = maximize_batch(fn, gradient_fn, beta_0)
Example #7
0
def main():

    from matplotlib import pyplot as plt
    plt.close()
    plt.clf()
    plt.gca().clear()

    from matplotlib import pyplot as plt
    from working_with_data import rescale
    from src.scratch_dir import least_squares_fit, predict
    from src.scratch_dir import gradient_step

    learning_rate = 0.001
    rescaled_xs = rescale(xs)
    beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1)
    # [0.26, 0.43, -0.43]
    predictions = [predict(x_i, beta) for x_i in rescaled_xs]

    plt.scatter(predictions, ys)
    plt.xlabel("predicted")
    plt.ylabel("actual")
    # plt.show()

    plt.savefig('im/linear_regression_for_probabilities.png')
    plt.close()

    from src.scratch_dir import train_test_split
    import random
    import tqdm

    random.seed(0)
    x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33)

    learning_rate = 0.01

    # pick a random starting point
    beta = [random.random() for _ in range(3)]

    with tqdm.trange(5000) as t:
        for epoch in t:
            gradient = negative_log_gradient(x_train, y_train, beta)
            beta = gradient_step(beta, gradient, -learning_rate)
            loss = negative_log_likelihood(x_train, y_train, beta)
            t.set_description(f"loss: {loss:.3f} beta: {beta}")

    from working_with_data import scale

    means, stdevs = scale(xs)
    beta_unscaled = [(beta[0] - beta[1] * means[1] / stdevs[1] -
                      beta[2] * means[2] / stdevs[2]), beta[1] / stdevs[1],
                     beta[2] / stdevs[2]]
    # [8.9, 1.6, -0.000288]

    assert (negative_log_likelihood(xs, ys,
                                    beta_unscaled) == negative_log_likelihood(
                                        rescaled_xs, ys, beta))

    true_positives = false_positives = true_negatives = false_negatives = 0

    for x_i, y_i in zip(x_test, y_test):
        prediction = logistic(dot(beta, x_i))

        if y_i == 1 and prediction >= 0.5:  # TP: paid and we predict paid
            true_positives += 1
        elif y_i == 1:  # FN: paid and we predict unpaid
            false_negatives += 1
        elif prediction >= 0.5:  # FP: unpaid and we predict paid
            false_positives += 1
        else:  # TN: unpaid and we predict unpaid
            true_negatives += 1

    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)

    print(precision, recall)

    assert precision == 0.75
    assert recall == 0.8

    plt.clf()
    plt.gca().clear()

    predictions = [logistic(dot(beta, x_i)) for x_i in x_test]
    plt.scatter(predictions, y_test, marker='+')
    plt.xlabel("predicted probability")
    plt.ylabel("actual outcome")
    plt.title("Logistic Regression Predicted vs. Actual")
    # plt.show()

    plt.savefig('im/logistic_regression_predicted_vs_actual.png')
    plt.gca().clear()
        (8.2, 87000, 0),
        (4.8, 73000, 0),
        (2.2, 42000, 1),
        (9.1, 98000, 0),
        (6.5, 84000, 0),
        (6.9, 73000, 0),
        (5.1, 72000, 0),
        (9.1, 69000, 1),
        (9.8, 79000, 1),
    ]
    data = map(list, data)  # change tuples to lists

    x = [[1] + row[:2]
         for row in data]  # each element is [1, experience, salary]
    y = [row[2] for row in data]  # each element is paid_account
    rescaled_x = rescale(x)  # rescale the features

    print "logistic regression:"

    random.seed(0)
    x_train, x_test, y_train, y_test = train_test_split(rescaled_x, y, 0.33)

    # want to maximize log likelihood on the training data
    fn = partial(logistic_log_likelihood, x_train, y_train)
    gradient_fn = partial(logistic_log_gradient, x_train, y_train)

    # pick a random starting point
    beta_0 = [1, 1, 1]

    # and maximize using gradient descent
    beta_hat = maximize_batch(fn, gradient_fn, beta_0)
plt.xlabel('years of experience')
plt.ylabel('annual salary')
plt.legend(loc = 8)
plt.show()

####################################################################################################
## Least squares fit
####################################################################################################

from matplotlib import pyplot as plt
from working_with_data import rescale
from multiple_regression import least_squares_fit, predict
from gradient_descent import gradient_step

learning_rate = 0.01
rescaled_xs = rescale(xs)
beta = least_squares_fit(rescaled_xs, ys, learning_rate, 10, 1)

predictions = [predict(x_i, beta) for x_i in rescaled_xs]

plt.scatter(predictions,ys)
plt.xlabel("predicted")
plt.ylabel("actual")
plt.show()

####################################################################################################
import math
from Vector_operations_on_data import Vector, dot
from typing import List

def logistic(x: float) -> float:
 def test_rescale(self):
     self.assertEqual([[-0.5 / math.sqrt(0.5), 3 / math.sqrt(18)],
                       [0.5 / math.sqrt(0.5), -3 / math.sqrt(18)]],
                      working_with_data.rescale([[1, 5], [2, -1]]))
     self.assertEqual([[3]], working_with_data.rescale([[3]]))