def forward(self, input: Tensor) -> Tensor: self.input = input # zachowaj zarówno wartość wejściową, jak i poprzedni self.prev_hidden = self.hidden # stan ukryty, aby użyć ich w propagacji wstecznej. a = [(dot(self.w[h], input) + # wagi wejściowe dot(self.u[h], self.hidden) + # wagi stanu ukrytego self.b[h]) # wartość progowa for h in range(self.hidden_dim)] self.hidden = tensor_apply(tanh, a) # Zastosuj tanh jako funkcję aktywacji return self.hidden # i zwróć wynik.
def forward(self, input: Tensor) -> Tensor: self.input = input # Save both input and previous self.prev_hidden = self.hidden # hidden state to use in backprop. a = [(dot(self.w[h], input) + # weights @ input dot(self.u[h], self.hidden) + # weights @ hidden self.b[h]) # bias for h in range(self.hidden_dim)] self.hidden = tensor_apply(tanh, a) # Apply tanh activation return self.hidden # and return the result.
def forward(self, input: Tensor) -> Tensor: self.input = input # Save both input and previous self.prev_hidden = self.hidden # hidden state to use in backprop. a = [ ( dot(self.w[h], input) + # weights @ input dot(self.u[h], self.hidden) + # weights @ hidden self.b[h]) # bias for h in range(self.hidden_dim) ] self.hidden = tensor_apply(tanh, a) # Apply tanh activation return self.hidden # and return the result.
def _negative_log_partial_j(x: Vector, y: float, beta: Vector, j: int) -> float: """ j-owa częściowa pochodna dla jednej obserwacji. Parametr i jest indeksem obserwacji. """ return -(y - logistic(dot(x, beta))) * x[j]
def _negative_log_partial_j(x: Vector, y: float, beta: Vector, j: int) -> float: """ The j-th partial derivative for one data pont here i is the index of the data point """ return -(y - logistic(dot(x, beta))) * x[j]
def directional_variance_gradient(data: List[Vector], w: Vector) -> Vector: """ The gradient of directional variance with respect to w """ w_dir = direction(w) return [sum(2 * dot(v, w_dir) * v[i] for v in data) for i in range(len(w))]
def sqerror_gradients(network: List[List[Vector]], input_vector: Vector, target_vector: Vector) -> List[List[Vector]]: """ Na wejściu dostaje sieć neuronową, wektor wejściowy i wektor wyjściowy. Przelicza sieć, a następnie oblicza gradient błędu kwadratowego w odniesieniu do wag neuronów. """ # przeliczenie sieci hidden_outputs, outputs = feed_forward(network, input_vector) # gradienty wartości wyjściowych końcowego neuronu output_deltas = [output * (1 - output) * (output - target) for output, target in zip(outputs, target_vector)] # gradienty z uwzględnieniem wag neuronu wyjściowego output_grads = [[output_deltas[i] * hidden_output for hidden_output in hidden_outputs + [1]] for i, output_neuron in enumerate(network[-1])] # gradienty wartości wyjściowych ukrytych neuronów hidden_deltas = [hidden_output * (1 - hidden_output) * dot(output_deltas, [n[i] for n in network[-1]]) for i, hidden_output in enumerate(hidden_outputs)] # gradienty z uwzględnieniem wag ukrytych neuronów hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]] for i, hidden_neuron in enumerate(network[0])] return [hidden_grads, output_grads]
def sqerror_gradients(network: List[List[Vector]], input_vector: Vector, target_vector: Vector) -> List[List[Vector]]: """ Given a neural network, an input vector, and a target vector, make a prediction and compute the gradient of the squared error loss with respect to the neuron weights. """ # forward pass hidden_outputs, outputs = feed_forward(network, input_vector) # gradients with respect to output neuron pre-activation outputs output_deltas = [output * (1 - output) * (output - target) for output, target in zip(outputs, target_vector)] # gradients with respect to output neuron weights output_grads = [[output_deltas[i] * hidden_output for hidden_output in hidden_outputs + [1]] for i, output_neuron in enumerate(network[-1])] # gradients with respect to hidden neuron pre-activation outputs hidden_deltas = [hidden_output * (1 - hidden_output) * dot(output_deltas, [n[i] for n in network[-1]]) for i, hidden_output in enumerate(hidden_outputs)] # gradients with respect to hidden neuron weights hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]] for i, hidden_neuron in enumerate(network[0])] return [hidden_grads, output_grads]
def forward(self, input: Tensor) -> Tensor: # Save the input to use in the backward pass. self.input = input # Return the vector of neuron outputs. return [dot(input, self.w[o]) + self.b[o] for o in range(self.output_dim)]
def forward(self, input: Tensor) -> Tensor: # Save the input to use in the backward pass. self.input = input # Return the vector of neuron outputs. return [ dot(input, self.w[o]) + self.b[o] for o in range(self.output_dim) ]
def forward(self, input: Tensor) -> Tensor: # Zachowaj wartość wejściową do wykorzystania w propagacji wstecznej. self.input = input # Zwróć wektor wyników z wszystkich neuronów. return [ dot(input, self.w[o]) + self.b[o] for o in range(self.output_dim) ]
def predict(x: Vector, beta: Vector) -> float: """ :param x: vector of [1, x_1... x_n] where x_1..n is the existing input data values :param beta: vector of [alpha, beta_1... beta_n] where beta_1..n is a mutually exclusive variables representing the independent values in a multiple regression. :return: """ return dot(x, beta)
def covariance(xs: List[float], ys: List[float]) -> float: """ A function to compute covariance between two vectors of same length""" assert len(xs) == len(ys),"Vectors must be of the same length" mean_xs = sum(xs)/len(xs) de_mean_xs = [x_i - mean_xs for x_i in xs] mean_ys = sum(ys)/len(ys) de_mean_ys = [y_i - mean_ys for y_i in ys] return dot(de_mean_xs,de_mean_ys)/(len(xs) - 1)
def loop(dataset: List[Rating], learning_rate: float = None) -> None: with tqdm.tqdm(dataset) as t: loss = 0.0 for i, rating in enumerate(t): movie_vector = movie_vectors[rating.movie_id] user_vector = user_vectors[rating.user_id] predicted = dot(user_vector, movie_vector) error = predicted - rating.rating loss += error**2 if learning_rate is not None: # predicted = m_0 * u_0 + ... + m_k * u_k # So each u_j enters output with coefficent m_j # and each m_j enters output with coefficient u_j user_gradient = [error * m_j for m_j in movie_vector] movie_gradient = [error * u_j for u_j in user_vector] # Take gradient steps for j in range(EMBEDDING_DIM): user_vector[j] -= learning_rate * user_gradient[j] movie_vector[j] -= learning_rate * movie_gradient[j] t.set_description(f"avg loss: {loss / (i + 1)}")
def loop(dataset: List[Rating], learning_rate: float = None) -> None: with tqdm.tqdm(dataset) as t: loss = 0.0 for i, rating in enumerate(t): movie_vector = movie_vectors[rating.movie_id] user_vector = user_vectors[rating.user_id] predicted = dot(user_vector, movie_vector) error = predicted - rating.rating loss += error ** 2 if learning_rate is not None: # predicted = m_0 * u_0 + ... + m_k * u_k # So each u_j enters output with coefficent m_j # and each m_j enters output with coefficient u_j user_gradient = [error * m_j for m_j in movie_vector] movie_gradient = [error * u_j for u_j in user_vector] # Take gradient steps for j in range(EMBEDDING_DIM): user_vector[j] -= learning_rate * user_gradient[j] movie_vector[j] -= learning_rate * movie_gradient[j] t.set_description(f"avg loss: {loss / (i + 1)}")
def loop(dataset: List[Rating], learning_rate: float = None) -> None: with tqdm.tqdm(dataset) as t: loss = 0.0 for i, rating in enumerate(t): movie_vector = movie_vectors[rating.movie_id] user_vector = user_vectors[rating.user_id] predicted = dot(user_vector, movie_vector) error = predicted - rating.rating loss += error ** 2 if learning_rate is not None: # wartości przewidywane = m_0 * u_0 + … + m_k * u_k # więc każda wartość u_j jest brana do wyniku ze współczynnikiem m_j # a każda wartość m_j jest brana do wyniku ze współczynnikiem u_j user_gradient = [error * m_j for m_j in movie_vector] movie_gradient = [error * u_j for u_j in user_vector] # Zrób krok w kierunku gradientu for j in range(EMBEDDING_DIM): user_vector[j] -= learning_rate * user_gradient[j] movie_vector[j] -= learning_rate * movie_gradient[j] t.set_description(f"avg loss: {loss / (i + 1)}")
def main(): from matplotlib import pyplot as plt plt.close() plt.clf() plt.gca().clear() from matplotlib import pyplot as plt from scratch.working_with_data import rescale from scratch.multiple_regression import least_squares_fit, predict from scratch.gradient_descent import gradient_step learning_rate = 0.001 rescaled_xs = rescale(xs) beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1) # [0.26, 0.43, -0.43] predictions = [predict(x_i, beta) for x_i in rescaled_xs] plt.scatter(predictions, ys) plt.xlabel("predicted") plt.ylabel("actual") # plt.show() plt.savefig('im/linear_regression_for_probabilities.png') plt.close() from scratch.machine_learning import train_test_split import random import tqdm random.seed(0) x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33) learning_rate = 0.01 # pick a random starting point beta = [random.random() for _ in range(3)] with tqdm.trange(5000) as t: for epoch in t: gradient = negative_log_gradient(x_train, y_train, beta) beta = gradient_step(beta, gradient, -learning_rate) loss = negative_log_likelihood(x_train, y_train, beta) t.set_description(f"loss: {loss:.3f} beta: {beta}") from scratch.working_with_data import scale means, stdevs = scale(xs) beta_unscaled = [(beta[0] - beta[1] * means[1] / stdevs[1] - beta[2] * means[2] / stdevs[2]), beta[1] / stdevs[1], beta[2] / stdevs[2]] # [8.9, 1.6, -0.000288] assert (negative_log_likelihood(xs, ys, beta_unscaled) == negative_log_likelihood(rescaled_xs, ys, beta)) true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): prediction = logistic(dot(beta, x_i)) if y_i == 1 and prediction >= 0.5: # TP: paid and we predict paid true_positives += 1 elif y_i == 1: # FN: paid and we predict unpaid false_negatives += 1 elif prediction >= 0.5: # FP: unpaid and we predict paid false_positives += 1 else: # TN: unpaid and we predict unpaid true_negatives += 1 precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) print(precision, recall) assert precision == 0.75 assert recall == 0.8 plt.clf() plt.gca().clear() predictions = [logistic(dot(beta, x_i)) for x_i in x_test] plt.scatter(predictions, y_test, marker='+') plt.xlabel("predicted probability") plt.ylabel("actual outcome") plt.title("Logistic Regression Predicted vs. Actual") # plt.show() plt.savefig('im/logistic_regression_predicted_vs_actual.png') plt.gca().clear()
def covariance(xs: List[float], ys: List[float]) -> float: assert len(xs) == len(ys), "xs and ys must have same number of elements" return dot(de_mean(xs), de_mean(ys)) / (len(xs) - 1)
def perceptron_output(weights: Vector, bias: float, x: Vector) -> float: """Returns 1 if the perceptron 'fires', 0 if not""" calculation = dot(weights, x) + bias return step_function(calculation)
def project(v: Vector, w: Vector) -> Vector: """return the projection of v onto the direction w""" projection_length = dot(v, w) return scalar_multiply(projection_length, w)
def neuron_output(weights: Vector, inputs: Vector) -> float: # weights includes the bias term, inputs includes a 1 return sigmoid(dot(weights, inputs))
def matrix_times_vector(m: Matrix, v: Vector) -> Vector: nr, nc = shape(m) n = len(v) assert nc == n, "must have (# of cols in m) == (# of elements in v)" return [dot(row, v) for row in m] # output has length nr
def transform_vector(v: Vector, components: List[Vector]) -> Vector: return [dot(v, w) for w in components]
def directional_variance(data: List[Vector], w: Vector) -> float: """ Returns the variance of x in the direction of w """ w_dir = direction(w) return sum(dot(v, w_dir)**2 for v in data)
def directional_variance(data: List[Vector], w: Vector) -> float: """ Returns the variance of x in the direction of w """ w_dir = direction(w) return sum(dot(v, w_dir) ** 2 for v in data)
def sum_of_squares(v: Vector) -> float: """Computes the sum of squared elements in v""" return dot(v, v)
def predict(x: Vector, beta: Vector) -> float: """Zakłada, że pierwszy element każdego wektora x_i jest równy 1.""" return dot(x, beta)
def neuron(w: Vector, x: Vector) -> float: # weights includes the bias term, inputs includes a 1 return sigmoid(dot(w, x))
def cosine_similarity(v1: Vector, v2: Vector) -> float: return dot(v1, v2) / math.sqrt(dot(v1, v1) * dot(v2, v2))
def perceptron(w: Vector, bias: float, x: Vector) -> float: """Returns 1 if the perceptron 'fires', 0 if not""" z = dot(weights, x) + bias return step(z)
def neuron_output(weights: Vector, inputs: Vector) -> float: # wektor weights ma na ostatniej pozycji wartość progową (bias), a wektor inputs ma na ostatniej pozycji wartość 1. return sigmoid(dot(weights, inputs))
def perceptron_output(weights: Vector, bias: float, x: Vector) -> float: """Perceptron zwraca wartość 1 lub 0.""" calculation = dot(weights, x) + bias return step_function(calculation)
def _negative_log_likelihood(x: Vector, y: float, beta: Vector) -> float: """The negative log likelihood for one data point""" if y == 1: return -math.log(logistic(dot(x, beta))) else: return -math.log(1 - logistic(dot(x, beta)))
def sum_of_squares(v: Vector) -> float: """v에 속해 있는 항목들의 제곱합을 계산한다.""" return dot(v, v)
def predict(x: Vector, beta: Vector) -> float: """assumes that the first element of x is 1""" return dot(x, beta)
def ridge_penalty(beta: Vector, alpha: float) -> float: return alpha * dot(beta[1:], beta[1:])
def covariance(xs: List[float], ys: List[float]): assert len(xs) == len(ys), "xs and ys must have same number of elements" return dot(de_mean(xs), de_mean(ys)) / (len(xs) - 1)
def main(): from scratch.statistics import daily_minutes_good from scratch.gradient_descent import gradient_step random.seed(0) # Użyłem metody prób i błędów, aby określić num_iters i step_size. # To może zająć chwilę. learning_rate = 0.001 beta = least_squares_fit(inputs, daily_minutes_good, learning_rate, 5000, 25) assert 30.50 < beta[0] < 30.70 # stała assert 0.96 < beta[1] < 1.00 # liczba znajomych assert -1.89 < beta[2] < -1.85 # dzienna liczba godzin pracy assert 0.91 < beta[3] < 0.94 # czy ma doktorat assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta) < 0.68 from typing import Tuple import datetime def estimate_sample_beta(pairs: List[Tuple[Vector, float]]): x_sample = [x for x, _ in pairs] y_sample = [y for _, y in pairs] beta = least_squares_fit(x_sample, y_sample, learning_rate, 5000, 25) print("bootstrap sample", beta) return beta random.seed(0) # Dzięki temu poleceniu uzyskasz takie same wyniki jak ja. # To może zająć chwilę czasu bootstrap_betas = bootstrap_statistic( list(zip(inputs, daily_minutes_good)), estimate_sample_beta, 100) bootstrap_standard_errors = [ standard_deviation([beta[i] for beta in bootstrap_betas]) for i in range(4) ] print(bootstrap_standard_errors) # [1,272, # stały czynnik, błąd rzeczywisty = 1,19 # 0,103, # liczba znajomych, błąd rzeczywisty = 0,080 # 0,155, # bezrobotni, błąd rzeczywisty = 0,127 # 1,249] # doktorat, błąd rzeczywisty = 0,998 random.seed(0) beta_0 = least_squares_fit_ridge( inputs, daily_minutes_good, 0.0, # alpha learning_rate, 5000, 25) # [30.51, 0.97, -1.85, 0.91] assert 5 < dot(beta_0[1:], beta_0[1:]) < 6 assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0) < 0.69 beta_0_1 = least_squares_fit_ridge( inputs, daily_minutes_good, 0.1, # alpha learning_rate, 5000, 25) # [30.8, 0.95, -1.83, 0.54] assert 4 < dot(beta_0_1[1:], beta_0_1[1:]) < 5 assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0_1) < 0.69 beta_1 = least_squares_fit_ridge( inputs, daily_minutes_good, 1, # alpha learning_rate, 5000, 25) # [30.6, 0.90, -1.68, 0.10] assert 3 < dot(beta_1[1:], beta_1[1:]) < 4 assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_1) < 0.69 beta_10 = least_squares_fit_ridge( inputs, daily_minutes_good, 10, # alpha learning_rate, 5000, 25) # [28.3, 0.67, -0.90, -0.01] assert 1 < dot(beta_10[1:], beta_10[1:]) < 2 assert 0.5 < multiple_r_squared(inputs, daily_minutes_good, beta_10) < 0.6