def _negative_log_partial_j(x: Vector, y: float, beta: Vector,
                            j: int) -> float:
    """
    The j-th partial derivative for one data pont
    here i is the index of the data point
    """
    return -(y - logistic(dot(x, beta))) * x[j]
Exemple #2
0
def sqerror_gradients(network: List[List[Vector]],
                      input_vector: Vevctor,
                      target_vector: Vector) -> List[List[Vector]]:
    """Given a neural network, an input vector, and a target vector,
       make a prediction and compute the gradient of the squared error
       loss with respect to the neuron weights."""
    # forward pass
    hidden_outputs, outputs = feed_forward(network, input_vector)

    # gradients with respect to output neuron pre-activation outputs
    output_deltas = [output * (1 - output) * (output - target)
                     for output, target in zip(outputs, target_vector)]

    # gradients with respect to output neuron weights
    output_grads = [[output_deltas[i] * hidden_output
                       for hidden_output in hidden_outputs +[1]]
                    for i output_neuron in enumerate(network[-1])]

    #gradients with respect to hidden neuron pre-activation outputs
    hidden_deltas = [hidden_output * (1 - hidden_output) *
                        dot(output_deltas, [n[i] for n in network[-1]])
                     for i, hidden_output in enumerate(hidden_outputs)]

    # gradients with respect to hidden neuron weights
    hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]]
                         for i, hidden_neuron in enumerate(network[0])]

    return [hidden_grads, output_grads]
Exemple #3
0
def sum_of_squares(v: Vector) -> float:
    """ Computes the sum of squared elements in v"""
    return dot(v, v)
def main():

    from matplotlib import pyplot as plt
    plt.close()
    plt.clf()
    plt.gca().clear()

    from matplotlib import pyplot as plt
    from datascience.working_data import rescale
    from datascience.multiple_regression import least_squares_fit, predict
    from datascience.gradient_descent import gradient_step

    learning_rate = 0.001
    rescaled_xs = rescale(xs)
    beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1)
    # [0.26, 0.43, -0.43]
    predictions = [predict(x_i, beta) for x_i in rescaled_xs]

    plt.scatter(predictions, ys)
    plt.xlabel("predicted")
    plt.ylabel("actual")
    # plt.show()

    plt.savefig('im/linear_regression_for_probabilities.png')
    plt.close()

    from datascience.machine_learning import train_test_split

    import random
    import tqdm

    random.seed(0)
    x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33)

    learning_rate = 0.01

    # pick a random starting point
    beta = [random.random() for _ in range(3)]

    with tqdm.trange(5000) as t:
        for epoch in t:
            gradient = negative_log_gradient(x_train, y_train, beta)
            beta = gradient_step(beta, gradient, -learning_rate)
            loss = negative_log_likelihood(x_train, y_train, beta)
            t.set_description(f"loss: {loss:.3f} beta: {beta}")

    from datascience.working_data import scale

    means, stdevs = scale(xs)
    beta_unscaled = [(beta[0] - beta[1] * means[1] / stdevs[1] -
                      beta[2] * means[2] / stdevs[2]), beta[1] / stdevs[1],
                     beta[2] / stdevs[2]]
    # [8.9, 1.6, -0.000288]

    assert (negative_log_likelihood(xs, ys,
                                    beta_unscaled) == negative_log_likelihood(
                                        rescaled_xs, ys, beta))

    true_positives = false_positives = true_negatives = false_negatives = 0

    for x_i, y_i in zip(x_test, y_test):
        prediction = logistic(dot(beta, x_i))

        if y_i == 1 and prediction >= 0.5:  # TP: paid and we predict paid
            true_positives += 1
        elif y_i == 1:  # FN: paid and we predict unpaid
            false_negatives += 1
        elif prediction >= 0.5:  # FP: unpaid and we predict paid
            false_positives += 1
        else:  # TN: unpaid and we predict unpaid
            true_negatives += 1

    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)

    print(precision, recall)

    assert precision == 0.75
    assert recall == 0.8

    plt.clf()
    plt.gca().clear()

    predictions = [logistic(dot(beta, x_i)) for x_i in x_test]
    plt.scatter(predictions, y_test, marker='+')
    plt.xlabel("predicted probability")
    plt.ylabel("actual outcome")
    plt.title("Logistic Regression Predicted vs. Actual")
    # plt.show()

    plt.savefig('im/logistic_regression_predicted_vs_actual.png')
    plt.gca().clear()
def _negative_log_likelihood(x: Vector, y: float, beta: Vector) -> float:
    """The negative log likelihood for one data point"""
    if y == 1:
        return -math.log(logistic(dot(x, beta)))
    else:
        return -math.log(1 - logistic(dot(x, beta)))
Exemple #6
0
def predict(x: Vector, beta: Vector) -> float:
    """ assume that the first element of x is 1"""
    return dot(x, beta)
Exemple #7
0
def ridge_penalty(beta: Vector, alpha: Vector) -> float:
    return alpha * dot(beta[1:], beta[1:])
Exemple #8
0
from datascience.linear_algebra import add

def ridge_penalty_gradient(beta: Vector, alpha: float) -> Vector:
    """gradient of just the ridge penalty"""
    return [0.] + [2 * alpha * beta_j for beta_j in beta[1:]]

def sqerror_ridge_gradient(x: Vector, y: flaot, beta: Vector, alpha: float) -> Vector:
    """ the gradient corresponding to the ith squared error term
        including the ridge penalty """
    return add(sqerror_gradient(x, y, beta), ridge_penalty_gradient(beta, alpha))

random.seed(0)
beta_0 = least_squares_fit_ridge(inputs, daily_minutes_good, 0.0,  #alpha 
                                 learning_rate, 5000, 25)

assert 5 < dot(beta_0[1:], beta_0[1:]) < 6
assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0) < 0.69

beta_0_1 = least_squares_fit_ridge(inputs, daily_minutes_good, 0.1,  # alpha
                                       learning_rate, 5000, 25)
# [30.8, 0.95, -1.83, 0.54]
assert 4 < dot(beta_0_1[1:], beta_0_1[1:]) < 5
assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0_1) < 0.69
    
beta_1 = least_squares_fit_ridge(inputs, daily_minutes_good, 1,  # alpha
                                     learning_rate, 5000, 25)
# [30.6, 0.90, -1.68, 0.10]
assert 3 < dot(beta_1[1:], beta_1[1:]) < 4
assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_1) < 0.69
    
beta_10 = least_squares_fit_ridge(inputs, daily_minutes_good,10,  # alpha
Exemple #9
0
def transform_vector(v: Vector, components: List[Vector]) -> Vector:
    return [dot(v, w) for w in components]
Exemple #10
0
def project(v: Vector, w: Vector) -> Vector:
    """return the projection of v onto the direction w"""
    projection_length = dot(v, w)
    return scalar_multiply(projection_length, w)
Exemple #11
0
def directional_variance_gradient(data: List[Vector], w: Vector) -> Vector:
    """The gradient of directional variance with respect to w"""
    w_dir = direction(w)
    return [sum(2 * dot(v, w_dir) * v[i] for v in data) for i in range(len(w))]
Exemple #12
0
def directional_variance(data: List[Vector], w: Vector) -> float:
    """ Returns the variance of x in the direction of w"""
    w_dir = direction(w)
    return sum(dot(v, w_dir)**2 for v in data)
Exemple #13
0
def perceptron_output(weights: Vector, bias: float, x: Vector) -> float:
    """Returns 1 if the perceptron 'fires', 0 if not"""
    calculation = dot(weights, x) + bias
    return step_function(calculation)
Exemple #14
0
def neuron_output(weights: Vector, inputs: Vector) -> float:
    # weights includes the bias term, inputs includes a 1
    return sigmoid(dot(weights, inputs))