Exemple #1
0
    def forward(self, input: Tensor) -> Tensor:
        self.input = input  # Save both input and previous
        self.prev_hidden = self.hidden  # hidden state to use in backprop.

        a = [
            (
                dot(self.w[h], input) +  # weights @ input
                dot(self.u[h], self.hidden) +  # weights @ hidden
                self.b[h])  # bias
            for h in range(self.hidden_dim)
        ]

        self.hidden = tensor_apply(tanh, a)  # Apply tanh activation
        return self.hidden  # and return the result.
Exemple #2
0
def _negative_log_partial_j(x: Vector, y: float, beta: Vector,
                            j: int) -> float:
    """
    The j-th partial derivative for one data pont
    here i is the index of the data point
    """
    return -(y - logistic(dot(x, beta))) * x[j]
Exemple #3
0
def sqerror_gradients(network: List[List[Vector]], input_vector: Vector,
                      target_vector: Vector) -> List[List[Vector]]:
    """
    Given a neural network, an input vector, and a target vector,
    make a prediction and compute the gradient of the squared error
    loss with respect to the neuron weights.
    """
    # forward pass
    hidden_outputs, outputs = feed_forward(network, input_vector)

    # gradients with respect to output neuron pre-activation outputs
    output_deltas = [
        output * (1 - output) * (output - target)
        for output, target in zip(outputs, target_vector)
    ]

    # gradients with respect to output neuron weights
    output_grads = [[
        output_deltas[i] * hidden_output
        for hidden_output in hidden_outputs + [1]
    ] for i, output_neuron in enumerate(network[-1])]

    # gradients with respect to hidden neuron pre-activation outputs
    hidden_deltas = [
        hidden_output * (1 - hidden_output) *
        dot(output_deltas, [n[i] for n in network[-1]])
        for i, hidden_output in enumerate(hidden_outputs)
    ]

    # gradients with respect to hidden neuron weights
    hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]]
                    for i, hidden_neuron in enumerate(network[0])]

    return [hidden_grads, output_grads]
    def forward(self, input: Tensor) -> Tensor:
        # Save the input to use in the backward pass.
        self.input = input

        # Return the vector of neuron outputs.
        return [
            dot(input, self.w[o]) + self.b[o] for o in range(self.output_dim)
        ]
 def loop(dataset: List[Rating],
          learning_rate: float = None) -> None:
     with tqdm.tqdm(dataset) as t:
         loss = 0.0
         for i, rating in enumerate(t):
             movie_vector = movie_vectors[rating.movie_id]
             user_vector = user_vectors[rating.user_id]
             predicted = dot(user_vector, movie_vector)
             error = predicted - rating.rating
             loss += error ** 2
 
             if learning_rate is not None:
                 #     predicted = m_0 * u_0 + ... + m_k * u_k
                 # So each u_j enters output with coefficent m_j
                 # and each m_j enters output with coefficient u_j
                 user_gradient = [error * m_j for m_j in movie_vector]
                 movie_gradient = [error * u_j for u_j in user_vector]
 
                 # Take gradient steps
                 for j in range(EMBEDDING_DIM):
                     user_vector[j] -= learning_rate * user_gradient[j]
                     movie_vector[j] -= learning_rate * movie_gradient[j]
 
             t.set_description(f"avg loss: {loss / (i + 1)}")
def predict(x: Vector, beta: Vector) -> float:
    """assumes that the first element of x is 1"""
    return dot(x, beta)
Exemple #7
0
def perceptron_output(weights: Vector, bias: float, x: Vector) -> float:
    """Returns 1 if the perceptron 'fires', 0 if not"""
    calculation = dot(weights, x) + bias
    return step_function(calculation)
Exemple #8
0
def neuron_output(weights: Vector, inputs: Vector) -> float:
    # weights includes the bias term, inputs includes a 1
    return sigmoid(dot(weights, inputs))
def transform_vector(v: Vector, components: List[Vector]) -> Vector:
    return [dot(v, w) for w in components]
def directional_variance_gradient(data: List[Vector], w: Vector) -> Vector:
    """
    The gradient of directional variance with respect to w
    """
    w_dir = direction(w)
    return [sum(2 * dot(v, w_dir) * v[i] for v in data) for i in range(len(w))]
def project(v: Vector, w: Vector) -> Vector:
    """return the projection of v onto the direction w"""
    projection_length = dot(v, w)
    return scalar_multiply(projection_length, w)
def matrix_times_vector(m: Matrix, v: Vector) -> Vector:
    nr, nc = shape(m)
    n = len(v)
    assert nc == n, "must have (# of cols in m) == (# of elements in v)"

    return [dot(row, v) for row in m]  # output has length nr
def directional_variance(data: List[Vector], w: Vector) -> float:
    """
    Returns the variance of x in the direction of w
    """
    w_dir = direction(w)
    return sum(dot(v, w_dir)**2 for v in data)
def main():
    from statistics import daily_minutes_good

    random.seed(0)
    # I used trial and error to choose niters and step_size.
    # This will run for a while.
    learning_rate = 0.001

    beta = least_squares_fit(inputs, daily_minutes_good, learning_rate, 5000,
                             25)
    assert 30.50 < beta[0] < 30.70  # constant
    assert 0.96 < beta[1] < 1.00  # num friends
    assert -1.89 < beta[2] < -1.85  # work hours per day
    assert 0.91 < beta[3] < 0.94  # has PhD

    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta) < 0.68

    from typing import Tuple

    def estimate_sample_beta(pairs: List[Tuple[Vector, float]]):
        x_sample = [x for x, _ in pairs]
        y_sample = [y for _, y in pairs]
        beta = least_squares_fit(x_sample, y_sample, learning_rate, 5000, 25)
        print("bootstrap sample", beta)
        return beta

    random.seed(0)  # so that you get the same results as me

    # This will take a couple of minutes!
    bootstrap_betas = bootstrap_statistic(
        list(zip(inputs, daily_minutes_good)), estimate_sample_beta, 100)

    bootstrap_standard_errors = [
        standard_deviation([beta[i] for beta in bootstrap_betas])
        for i in range(4)
    ]

    print(bootstrap_standard_errors)

    # [1.272,    # constant term, actual error = 1.19
    #  0.103,    # num_friends,   actual error = 0.080
    #  0.155,    # work_hours,    actual error = 0.127
    #  1.249]    # phd,           actual error = 0.998

    random.seed(0)
    beta_0 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        0.0,  # alpha
        learning_rate,
        5000,
        25)
    # [30.51, 0.97, -1.85, 0.91]
    assert 5 < dot(beta_0[1:], beta_0[1:]) < 6
    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0) < 0.69

    beta_0_1 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        0.1,  # alpha
        learning_rate,
        5000,
        25)
    # [30.8, 0.95, -1.83, 0.54]
    assert 4 < dot(beta_0_1[1:], beta_0_1[1:]) < 5
    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good,
                                     beta_0_1) < 0.69

    beta_1 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        1,  # alpha
        learning_rate,
        5000,
        25)
    # [30.6, 0.90, -1.68, 0.10]
    assert 3 < dot(beta_1[1:], beta_1[1:]) < 4
    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_1) < 0.69

    beta_10 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        10,  # alpha
        learning_rate,
        5000,
        25)
    # [28.3, 0.67, -0.90, -0.01]
    assert 1 < dot(beta_10[1:], beta_10[1:]) < 2
    assert 0.5 < multiple_r_squared(inputs, daily_minutes_good, beta_10) < 0.6
def ridge_penalty(beta: Vector, alpha: float) -> float:
    return alpha * dot(beta[1:], beta[1:])
Exemple #16
0
def main():

    from matplotlib import pyplot as plt
    plt.close()
    plt.clf()
    plt.gca().clear()

    from matplotlib import pyplot as plt
    from working_with_data import rescale
    from src.scratch_dir import least_squares_fit, predict
    from src.scratch_dir import gradient_step

    learning_rate = 0.001
    rescaled_xs = rescale(xs)
    beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1)
    # [0.26, 0.43, -0.43]
    predictions = [predict(x_i, beta) for x_i in rescaled_xs]

    plt.scatter(predictions, ys)
    plt.xlabel("predicted")
    plt.ylabel("actual")
    # plt.show()

    plt.savefig('im/linear_regression_for_probabilities.png')
    plt.close()

    from src.scratch_dir import train_test_split
    import random
    import tqdm

    random.seed(0)
    x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33)

    learning_rate = 0.01

    # pick a random starting point
    beta = [random.random() for _ in range(3)]

    with tqdm.trange(5000) as t:
        for epoch in t:
            gradient = negative_log_gradient(x_train, y_train, beta)
            beta = gradient_step(beta, gradient, -learning_rate)
            loss = negative_log_likelihood(x_train, y_train, beta)
            t.set_description(f"loss: {loss:.3f} beta: {beta}")

    from working_with_data import scale

    means, stdevs = scale(xs)
    beta_unscaled = [(beta[0] - beta[1] * means[1] / stdevs[1] -
                      beta[2] * means[2] / stdevs[2]), beta[1] / stdevs[1],
                     beta[2] / stdevs[2]]
    # [8.9, 1.6, -0.000288]

    assert (negative_log_likelihood(xs, ys,
                                    beta_unscaled) == negative_log_likelihood(
                                        rescaled_xs, ys, beta))

    true_positives = false_positives = true_negatives = false_negatives = 0

    for x_i, y_i in zip(x_test, y_test):
        prediction = logistic(dot(beta, x_i))

        if y_i == 1 and prediction >= 0.5:  # TP: paid and we predict paid
            true_positives += 1
        elif y_i == 1:  # FN: paid and we predict unpaid
            false_negatives += 1
        elif prediction >= 0.5:  # FP: unpaid and we predict paid
            false_positives += 1
        else:  # TN: unpaid and we predict unpaid
            true_negatives += 1

    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)

    print(precision, recall)

    assert precision == 0.75
    assert recall == 0.8

    plt.clf()
    plt.gca().clear()

    predictions = [logistic(dot(beta, x_i)) for x_i in x_test]
    plt.scatter(predictions, y_test, marker='+')
    plt.xlabel("predicted probability")
    plt.ylabel("actual outcome")
    plt.title("Logistic Regression Predicted vs. Actual")
    # plt.show()

    plt.savefig('im/logistic_regression_predicted_vs_actual.png')
    plt.gca().clear()
Exemple #17
0
def cosine_similarity(v1: Vector, v2: Vector) -> float:
    return dot(v1, v2) / math.sqrt(dot(v1, v1) * dot(v2, v2))
Exemple #18
0
def _negative_log_likelihood(x: Vector, y: float, beta: Vector) -> float:
    """The negative log likelihood for one data point"""
    if y == 1:
        return -math.log(logistic(dot(x, beta)))
    else:
        return -math.log(1 - logistic(dot(x, beta)))