Beispiel #1
0
    def forward(self, input: Tensor) -> Tensor:
        self.input = input              # zachowaj zarówno wartość wejściową, jak i poprzedni
        self.prev_hidden = self.hidden  # stan ukryty, aby użyć ich w propagacji wstecznej.

        a = [(dot(self.w[h], input) +           # wagi wejściowe
              dot(self.u[h], self.hidden) +     # wagi stanu ukrytego
              self.b[h])                        # wartość progowa
             for h in range(self.hidden_dim)]

        self.hidden = tensor_apply(tanh, a)  # Zastosuj tanh jako funkcję aktywacji
        return self.hidden                   # i zwróć wynik.
    def forward(self, input: Tensor) -> Tensor:
        self.input = input              # Save both input and previous
        self.prev_hidden = self.hidden  # hidden state to use in backprop.

        a = [(dot(self.w[h], input) +           # weights @ input
              dot(self.u[h], self.hidden) +     # weights @ hidden
              self.b[h])                        # bias
             for h in range(self.hidden_dim)]

        self.hidden = tensor_apply(tanh, a)  # Apply tanh activation
        return self.hidden                   # and return the result.
Beispiel #3
0
    def forward(self, input: Tensor) -> Tensor:
        self.input = input  # Save both input and previous
        self.prev_hidden = self.hidden  # hidden state to use in backprop.

        a = [
            (
                dot(self.w[h], input) +  # weights @ input
                dot(self.u[h], self.hidden) +  # weights @ hidden
                self.b[h])  # bias
            for h in range(self.hidden_dim)
        ]

        self.hidden = tensor_apply(tanh, a)  # Apply tanh activation
        return self.hidden  # and return the result.
Beispiel #4
0
def _negative_log_partial_j(x: Vector, y: float, beta: Vector,
                            j: int) -> float:
    """
    j-owa częściowa pochodna dla jednej obserwacji. 
    Parametr i jest indeksem obserwacji.
    """
    return -(y - logistic(dot(x, beta))) * x[j]
def _negative_log_partial_j(x: Vector, y: float, beta: Vector,
                            j: int) -> float:
    """
    The j-th partial derivative for one data pont
    here i is the index of the data point
    """
    return -(y - logistic(dot(x, beta))) * x[j]
def directional_variance_gradient(data: List[Vector], w: Vector) -> Vector:
    """
    The gradient of directional variance with respect to w
    """
    w_dir = direction(w)
    return [sum(2 * dot(v, w_dir) * v[i] for v in data)
            for i in range(len(w))]
def sqerror_gradients(network: List[List[Vector]],
                      input_vector: Vector,
                      target_vector: Vector) -> List[List[Vector]]:
    """
    Na wejściu dostaje sieć neuronową, wektor wejściowy i wektor wyjściowy.
    Przelicza sieć, a następnie oblicza gradient błędu kwadratowego w odniesieniu do wag neuronów. 
    """
    # przeliczenie sieci
    hidden_outputs, outputs = feed_forward(network, input_vector)

    # gradienty wartości wyjściowych końcowego neuronu
    output_deltas = [output * (1 - output) * (output - target)
                     for output, target in zip(outputs, target_vector)]

    # gradienty z uwzględnieniem wag neuronu wyjściowego
    output_grads = [[output_deltas[i] * hidden_output
                     for hidden_output in hidden_outputs + [1]]
                    for i, output_neuron in enumerate(network[-1])]

    # gradienty wartości wyjściowych ukrytych neuronów
    hidden_deltas = [hidden_output * (1 - hidden_output) *
                         dot(output_deltas, [n[i] for n in network[-1]])
                     for i, hidden_output in enumerate(hidden_outputs)]

    # gradienty z uwzględnieniem wag ukrytych neuronów
    hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]]
                    for i, hidden_neuron in enumerate(network[0])]

    return [hidden_grads, output_grads]
def sqerror_gradients(network: List[List[Vector]],
                      input_vector: Vector,
                      target_vector: Vector) -> List[List[Vector]]:
    """
    Given a neural network, an input vector, and a target vector,
    make a prediction and compute the gradient of the squared error
    loss with respect to the neuron weights.
    """
    # forward pass
    hidden_outputs, outputs = feed_forward(network, input_vector)

    # gradients with respect to output neuron pre-activation outputs
    output_deltas = [output * (1 - output) * (output - target)
                     for output, target in zip(outputs, target_vector)]

    # gradients with respect to output neuron weights
    output_grads = [[output_deltas[i] * hidden_output
                     for hidden_output in hidden_outputs + [1]]
                    for i, output_neuron in enumerate(network[-1])]

    # gradients with respect to hidden neuron pre-activation outputs
    hidden_deltas = [hidden_output * (1 - hidden_output) *
                         dot(output_deltas, [n[i] for n in network[-1]])
                     for i, hidden_output in enumerate(hidden_outputs)]

    # gradients with respect to hidden neuron weights
    hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]]
                    for i, hidden_neuron in enumerate(network[0])]

    return [hidden_grads, output_grads]
    def forward(self, input: Tensor) -> Tensor:
        # Save the input to use in the backward pass.
        self.input = input

        # Return the vector of neuron outputs.
        return [dot(input, self.w[o]) + self.b[o]
                for o in range(self.output_dim)]
Beispiel #10
0
def sqerror_gradients(network: List[List[Vector]],
                      input_vector: Vector,
                      target_vector: Vector) -> List[List[Vector]]:
    """
    Given a neural network, an input vector, and a target vector,
    make a prediction and compute the gradient of the squared error
    loss with respect to the neuron weights.
    """
    # forward pass
    hidden_outputs, outputs = feed_forward(network, input_vector)

    # gradients with respect to output neuron pre-activation outputs
    output_deltas = [output * (1 - output) * (output - target)
                     for output, target in zip(outputs, target_vector)]

    # gradients with respect to output neuron weights
    output_grads = [[output_deltas[i] * hidden_output
                     for hidden_output in hidden_outputs + [1]]
                    for i, output_neuron in enumerate(network[-1])]

    # gradients with respect to hidden neuron pre-activation outputs
    hidden_deltas = [hidden_output * (1 - hidden_output) *
                         dot(output_deltas, [n[i] for n in network[-1]])
                     for i, hidden_output in enumerate(hidden_outputs)]

    # gradients with respect to hidden neuron weights
    hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]]
                    for i, hidden_neuron in enumerate(network[0])]

    return [hidden_grads, output_grads]
def directional_variance_gradient(data: List[Vector], w: Vector) -> Vector:
    """
    The gradient of directional variance with respect to w
    """
    w_dir = direction(w)
    return [sum(2 * dot(v, w_dir) * v[i] for v in data)
            for i in range(len(w))]
    def forward(self, input: Tensor) -> Tensor:
        # Save the input to use in the backward pass.
        self.input = input

        # Return the vector of neuron outputs.
        return [
            dot(input, self.w[o]) + self.b[o] for o in range(self.output_dim)
        ]
    def forward(self, input: Tensor) -> Tensor:
        # Zachowaj wartość wejściową do wykorzystania w propagacji wstecznej.
        self.input = input

        # Zwróć wektor wyników z wszystkich neuronów.
        return [
            dot(input, self.w[o]) + self.b[o] for o in range(self.output_dim)
        ]
Beispiel #14
0
def predict(x: Vector, beta: Vector) -> float:
    """
    :param x: vector of [1, x_1... x_n] where x_1..n is the existing input data values
    :param beta: vector of [alpha, beta_1... beta_n] where beta_1..n is a mutually exclusive variables representing the independent values
    in a multiple regression.
    :return:
    """
    return dot(x, beta)
Beispiel #15
0
def covariance(xs: List[float], ys: List[float]) -> float:
    """ A function to compute covariance between two vectors of same length"""
    
    assert len(xs) == len(ys),"Vectors must be of the same length"
    
    mean_xs = sum(xs)/len(xs)
    de_mean_xs = [x_i - mean_xs for x_i in xs]
    
    mean_ys = sum(ys)/len(ys)
    de_mean_ys = [y_i - mean_ys for y_i in ys]
    
    return dot(de_mean_xs,de_mean_ys)/(len(xs) - 1)
Beispiel #16
0
    def loop(dataset: List[Rating], learning_rate: float = None) -> None:
        with tqdm.tqdm(dataset) as t:
            loss = 0.0
            for i, rating in enumerate(t):
                movie_vector = movie_vectors[rating.movie_id]
                user_vector = user_vectors[rating.user_id]
                predicted = dot(user_vector, movie_vector)
                error = predicted - rating.rating
                loss += error**2

                if learning_rate is not None:
                    #     predicted = m_0 * u_0 + ... + m_k * u_k
                    # So each u_j enters output with coefficent m_j
                    # and each m_j enters output with coefficient u_j
                    user_gradient = [error * m_j for m_j in movie_vector]
                    movie_gradient = [error * u_j for u_j in user_vector]

                    # Take gradient steps
                    for j in range(EMBEDDING_DIM):
                        user_vector[j] -= learning_rate * user_gradient[j]
                        movie_vector[j] -= learning_rate * movie_gradient[j]

                t.set_description(f"avg loss: {loss / (i + 1)}")
 def loop(dataset: List[Rating],
          learning_rate: float = None) -> None:
     with tqdm.tqdm(dataset) as t:
         loss = 0.0
         for i, rating in enumerate(t):
             movie_vector = movie_vectors[rating.movie_id]
             user_vector = user_vectors[rating.user_id]
             predicted = dot(user_vector, movie_vector)
             error = predicted - rating.rating
             loss += error ** 2
 
             if learning_rate is not None:
                 #     predicted = m_0 * u_0 + ... + m_k * u_k
                 # So each u_j enters output with coefficent m_j
                 # and each m_j enters output with coefficient u_j
                 user_gradient = [error * m_j for m_j in movie_vector]
                 movie_gradient = [error * u_j for u_j in user_vector]
 
                 # Take gradient steps
                 for j in range(EMBEDDING_DIM):
                     user_vector[j] -= learning_rate * user_gradient[j]
                     movie_vector[j] -= learning_rate * movie_gradient[j]
 
             t.set_description(f"avg loss: {loss / (i + 1)}")
Beispiel #18
0
 def loop(dataset: List[Rating],
          learning_rate: float = None) -> None:
     with tqdm.tqdm(dataset) as t:
         loss = 0.0
         for i, rating in enumerate(t):
             movie_vector = movie_vectors[rating.movie_id]
             user_vector = user_vectors[rating.user_id]
             predicted = dot(user_vector, movie_vector)
             error = predicted - rating.rating
             loss += error ** 2
 
             if learning_rate is not None:
                 # wartości przewidywane = m_0 * u_0 + … + m_k * u_k
                 # więc każda wartość u_j jest brana do wyniku ze współczynnikiem m_j
                 # a każda wartość m_j jest brana do wyniku ze współczynnikiem u_j
                 user_gradient = [error * m_j for m_j in movie_vector]
                 movie_gradient = [error * u_j for u_j in user_vector]
 
                 # Zrób krok w kierunku gradientu
                 for j in range(EMBEDDING_DIM):
                     user_vector[j] -= learning_rate * user_gradient[j]
                     movie_vector[j] -= learning_rate * movie_gradient[j]
 
             t.set_description(f"avg loss: {loss / (i + 1)}")
def main():
    
    from matplotlib import pyplot as plt
    plt.close()
    plt.clf()
    plt.gca().clear()
    
    from matplotlib import pyplot as plt
    from scratch.working_with_data import rescale
    from scratch.multiple_regression import least_squares_fit, predict
    from scratch.gradient_descent import gradient_step
    
    learning_rate = 0.001
    rescaled_xs = rescale(xs)
    beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1)
    # [0.26, 0.43, -0.43]
    predictions = [predict(x_i, beta) for x_i in rescaled_xs]
    
    plt.scatter(predictions, ys)
    plt.xlabel("predicted")
    plt.ylabel("actual")
    # plt.show()
    
    
    plt.savefig('im/linear_regression_for_probabilities.png')
    plt.close()
    
    from scratch.machine_learning import train_test_split
    import random
    import tqdm
    
    random.seed(0)
    x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33)
    
    learning_rate = 0.01
    
    # pick a random starting point
    beta = [random.random() for _ in range(3)]
    
    with tqdm.trange(5000) as t:
        for epoch in t:
            gradient = negative_log_gradient(x_train, y_train, beta)
            beta = gradient_step(beta, gradient, -learning_rate)
            loss = negative_log_likelihood(x_train, y_train, beta)
            t.set_description(f"loss: {loss:.3f} beta: {beta}")
    
    from scratch.working_with_data import scale
    
    means, stdevs = scale(xs)
    beta_unscaled = [(beta[0]
                      - beta[1] * means[1] / stdevs[1]
                      - beta[2] * means[2] / stdevs[2]),
                     beta[1] / stdevs[1],
                     beta[2] / stdevs[2]]
    # [8.9, 1.6, -0.000288]
    
    
    
    assert (negative_log_likelihood(xs, ys, beta_unscaled) ==
            negative_log_likelihood(rescaled_xs, ys, beta))
    
    true_positives = false_positives = true_negatives = false_negatives = 0
    
    for x_i, y_i in zip(x_test, y_test):
        prediction = logistic(dot(beta, x_i))
    
        if y_i == 1 and prediction >= 0.5:  # TP: paid and we predict paid
            true_positives += 1
        elif y_i == 1:                      # FN: paid and we predict unpaid
            false_negatives += 1
        elif prediction >= 0.5:             # FP: unpaid and we predict paid
            false_positives += 1
        else:                               # TN: unpaid and we predict unpaid
            true_negatives += 1
    
    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)
    
    
    
    print(precision, recall)
    
    assert precision == 0.75
    assert recall == 0.8
    
    
    
    plt.clf()
    plt.gca().clear()
    
    predictions = [logistic(dot(beta, x_i)) for x_i in x_test]
    plt.scatter(predictions, y_test, marker='+')
    plt.xlabel("predicted probability")
    plt.ylabel("actual outcome")
    plt.title("Logistic Regression Predicted vs. Actual")
    # plt.show()
    
    
    
    plt.savefig('im/logistic_regression_predicted_vs_actual.png')
    plt.gca().clear()
def covariance(xs: List[float], ys: List[float]) -> float:
    assert len(xs) == len(ys), "xs and ys must have same number of elements"

    return dot(de_mean(xs), de_mean(ys)) / (len(xs) - 1)
def perceptron_output(weights: Vector, bias: float, x: Vector) -> float:
    """Returns 1 if the perceptron 'fires', 0 if not"""
    calculation = dot(weights, x) + bias
    return step_function(calculation)
def _negative_log_partial_j(x: Vector, y: float, beta: Vector, j: int) -> float:
    """
    The j-th partial derivative for one data pont
    here i is the index of the data point
    """
    return -(y - logistic(dot(x, beta))) * x[j]
def project(v: Vector, w: Vector) -> Vector:
    """return the projection of v onto the direction w"""
    projection_length = dot(v, w)
    return scalar_multiply(projection_length, w)
def neuron_output(weights: Vector, inputs: Vector) -> float:
    # weights includes the bias term, inputs includes a 1
    return sigmoid(dot(weights, inputs))
def matrix_times_vector(m: Matrix, v: Vector) -> Vector:
    nr, nc = shape(m)
    n = len(v)
    assert nc == n, "must have (# of cols in m) == (# of elements in v)"

    return [dot(row, v) for row in m]  # output has length nr
Beispiel #26
0
def transform_vector(v: Vector, components: List[Vector]) -> Vector:
    return [dot(v, w) for w in components]
Beispiel #27
0
def directional_variance(data: List[Vector], w: Vector) -> float:
    """
    Returns the variance of x in the direction of w
    """
    w_dir = direction(w)
    return sum(dot(v, w_dir)**2 for v in data)
def directional_variance(data: List[Vector], w: Vector) -> float:
    """
    Returns the variance of x in the direction of w
    """
    w_dir = direction(w)
    return sum(dot(v, w_dir) ** 2 for v in data)
def sum_of_squares(v: Vector) -> float:
    """Computes the sum of squared elements in v"""
    return dot(v, v)
def matrix_times_vector(m: Matrix, v: Vector) -> Vector:
    nr, nc = shape(m)
    n = len(v)
    assert nc == n, "must have (# of cols in m) == (# of elements in v)"

    return [dot(row, v) for row in m]  # output has length nr
Beispiel #31
0
def predict(x: Vector, beta: Vector) -> float:
    """Zakłada, że pierwszy element każdego wektora x_i jest równy 1."""
    return dot(x, beta)
Beispiel #32
0
def neuron(w: Vector, x: Vector) -> float:
    # weights includes the bias term, inputs includes a 1
    return sigmoid(dot(w, x))
Beispiel #33
0
def cosine_similarity(v1: Vector, v2: Vector) -> float:
    return dot(v1, v2) / math.sqrt(dot(v1, v1) * dot(v2, v2))
Beispiel #34
0
def perceptron(w: Vector, bias: float, x: Vector) -> float:
    """Returns 1 if the perceptron 'fires', 0 if not"""
    z = dot(weights, x) + bias
    return step(z)
def neuron_output(weights: Vector, inputs: Vector) -> float:
    # wektor weights ma na ostatniej pozycji wartość progową (bias), a wektor inputs ma na ostatniej pozycji wartość 1.
    return sigmoid(dot(weights, inputs))
def perceptron_output(weights: Vector, bias: float, x: Vector) -> float:
    """Returns 1 if the perceptron 'fires', 0 if not"""
    calculation = dot(weights, x) + bias
    return step_function(calculation)
def transform_vector(v: Vector, components: List[Vector]) -> Vector:
    return [dot(v, w) for w in components]
def cosine_similarity(v1: Vector, v2: Vector) -> float:
    return dot(v1, v2) / math.sqrt(dot(v1, v1) * dot(v2, v2))
def neuron_output(weights: Vector, inputs: Vector) -> float:
    # weights includes the bias term, inputs includes a 1
    return sigmoid(dot(weights, inputs))
def perceptron_output(weights: Vector, bias: float, x: Vector) -> float:
    """Perceptron zwraca wartość 1 lub 0."""
    calculation = dot(weights, x) + bias
    return step_function(calculation)
def _negative_log_likelihood(x: Vector, y: float, beta: Vector) -> float:
    """The negative log likelihood for one data point"""
    if y == 1:
        return -math.log(logistic(dot(x, beta)))
    else:
        return -math.log(1 - logistic(dot(x, beta)))
Beispiel #42
0
def sum_of_squares(v: Vector) -> float:
    """v에 속해 있는 항목들의 제곱합을 계산한다."""
    return dot(v, v)
Beispiel #43
0
def predict(x: Vector, beta: Vector) -> float:
    """assumes that the first element of x is 1"""
    return dot(x, beta)
Beispiel #44
0
def project(v: Vector, w: Vector) -> Vector:
    """return the projection of v onto the direction w"""
    projection_length = dot(v, w)
    return scalar_multiply(projection_length, w)
Beispiel #45
0
def ridge_penalty(beta: Vector, alpha: float) -> float:
    return alpha * dot(beta[1:], beta[1:])
Beispiel #46
0
def covariance(xs: List[float], ys: List[float]):
    assert len(xs) == len(ys), "xs and ys must have same number of elements"

    return dot(de_mean(xs), de_mean(ys)) / (len(xs) - 1)
Beispiel #47
0
def main():
    from scratch.statistics import daily_minutes_good
    from scratch.gradient_descent import gradient_step

    random.seed(0)
    # Użyłem metody prób i błędów, aby określić num_iters i step_size.
    # To może zająć chwilę.
    learning_rate = 0.001

    beta = least_squares_fit(inputs, daily_minutes_good, learning_rate, 5000,
                             25)
    assert 30.50 < beta[0] < 30.70  # stała
    assert 0.96 < beta[1] < 1.00  # liczba znajomych
    assert -1.89 < beta[2] < -1.85  # dzienna liczba godzin pracy
    assert 0.91 < beta[3] < 0.94  # czy ma doktorat

    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta) < 0.68

    from typing import Tuple

    import datetime

    def estimate_sample_beta(pairs: List[Tuple[Vector, float]]):
        x_sample = [x for x, _ in pairs]
        y_sample = [y for _, y in pairs]
        beta = least_squares_fit(x_sample, y_sample, learning_rate, 5000, 25)
        print("bootstrap sample", beta)
        return beta

    random.seed(0)  # Dzięki temu poleceniu uzyskasz takie same wyniki jak ja.

    # To może zająć chwilę czasu
    bootstrap_betas = bootstrap_statistic(
        list(zip(inputs, daily_minutes_good)), estimate_sample_beta, 100)

    bootstrap_standard_errors = [
        standard_deviation([beta[i] for beta in bootstrap_betas])
        for i in range(4)
    ]

    print(bootstrap_standard_errors)

    # [1,272,    # stały czynnik,    błąd rzeczywisty = 1,19
    #  0,103,    # liczba znajomych, błąd rzeczywisty = 0,080
    #  0,155,    # bezrobotni,       błąd rzeczywisty = 0,127
    #  1,249]    # doktorat,         błąd rzeczywisty = 0,998

    random.seed(0)
    beta_0 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        0.0,  # alpha
        learning_rate,
        5000,
        25)
    # [30.51, 0.97, -1.85, 0.91]
    assert 5 < dot(beta_0[1:], beta_0[1:]) < 6
    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0) < 0.69

    beta_0_1 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        0.1,  # alpha
        learning_rate,
        5000,
        25)
    # [30.8, 0.95, -1.83, 0.54]
    assert 4 < dot(beta_0_1[1:], beta_0_1[1:]) < 5
    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good,
                                     beta_0_1) < 0.69

    beta_1 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        1,  # alpha
        learning_rate,
        5000,
        25)
    # [30.6, 0.90, -1.68, 0.10]
    assert 3 < dot(beta_1[1:], beta_1[1:]) < 4
    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_1) < 0.69

    beta_10 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        10,  # alpha
        learning_rate,
        5000,
        25)
    # [28.3, 0.67, -0.90, -0.01]
    assert 1 < dot(beta_10[1:], beta_10[1:]) < 2
    assert 0.5 < multiple_r_squared(inputs, daily_minutes_good, beta_10) < 0.6