def backpropagate(network, input_vector, targets): """ tunes the weights in network so that erro between network output and targets is minimized. """ # get the outputs of the network layers hidden_outputs, outputs = feed_forward(network, input_vector) # compute the deltas for the output layer. note deriv of logistic is # logistic * (1 - logisitic) output_deltas = [output * (1 - output) * (output-target) for output, target in zip(outputs, targets)] # update the weights(j) of each of the output neurons(i) for i, output_neuron in enumerate(network[-1]): # For each output neuron (i) and response (network[-1]), we will get # the inputs (i.e. the hidden outputs; j of them) and modify the jth # weight of the ith output neuron along the gradient for j, hidden_output in enumerate(hidden_outputs + [1]): output_neuron[j] -= output_deltas[i] * hidden_output # back-propagate the errors to the hidden layer. This is basically the # same as above but in reverse we compute the gradient as o(1-o) times # the input which now comes from the output layer for each neuron in the # hidden layer hidden_deltas = [hidden_output * (1 - hidden_output) * dot_product(output_deltas, [n[i] for n in output_layer]) for i, hidden_output in enumerate(hidden_outputs)] # update the weights (j) of the hidden layer neurons (i) for i, hidden_neuron in enumerate(network[0]): for j, input in enumerate(input_vector + [1]): hidden_neuron[j] -= hidden_deltas[i] * input """ A much cleaner way of doing this is to compute the gradient of the
def perceptron(weights, bias, x): """ returns 1 if the weighted sum of x exceeds 0 and 0 if not """ output = dot_product(weights, x) + bias return step_function(output)
def predict(x_i, beta): """ assumes x_i1 = 1 and beta is a vector the length of x_i """ return dot_product(x_i, beta)
def ridge_penalty(beta, alpha): """ alpha is a hyperparameter that scales how strong the penalty is. The penalty is chosen to be the square of the betas ignoring the constant term """ return alpha * dot_product(beta[1:], beta[1:])
bootstrap_betas = [[float(beta_ls[i]) for i in range(3)] for beta_ls in bootstrap_betas] # calculate the standard errors bootstrap_standard_errors = [ standard_deviation([beta[i] for beta in bootstrap_betas]) for i in range(3) ] print "The Bootstrapped Standard Error is: " print bootstrap_standard_errors # Beta estimate for various ridge penalties # ############################################# print "Stochastic Gradient Descent: Ridge Penalty = 0.01" beta_0_01 = estimate_beta_ridge(xs, ys, alpha=decimal.Decimal(0.01)) print beta_0_01 print " Sum of squares of beta = %.5f " % (dot_product( beta_0_01[1:], beta_0_01[1:])) print "Stochastic Gradient Descent: Ridge Penalty = 0.1" beta_0_1 = estimate_beta_ridge(xs, ys, alpha=decimal.Decimal(0.1)) print beta_0_1 print " Sum of squares of beta = %.5f " % (dot_product( beta_0_1[1:], beta_0_1[1:])) print "Stochastic Gradient Descent: Ridge Penalty = 1.0" beta_1 = estimate_beta_ridge(xs, ys, alpha=decimal.Decimal(1.0)) print beta_1 print " Sum of squares of beta = %.5f " % (dot_product( beta_1[1:], beta_1[1:])) print "Stochastic Gradient Descent: Ridge Penalty = 10.0" beta_10 = estimate_beta_ridge(xs, ys, alpha=decimal.Decimal(10.0))
def logistic_log_partial_ij(x_i, y_i, beta, j): """ here i is the point index and j is the gradient index """ return (y_i - logistic(dot_product(x_i, beta))) * x_i[j]
def logistic_log_likelihood_i(x_i, y_i, beta): """ returns the log likelihood of the ith bernoulli trial """ if y_i == 1: return math.log(logistic(dot_product(x_i, beta))) else: return math.log(1 - logistic(dot_product(x_i, beta)))
# beta_i i!=0 has the following transform beta_i = beta_i_scaled/sigma_i # and beta_0 is beta_hat_unscaled =[beta_hat[0], beta_hat[1]/stds_x[1], beta_hat[2]/stds_x[2]] print "beta_hat_unscaled", beta_hat_unscaled # Fit Quality # ############### # Examine the test data true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): # For the test data get a prediction for y. This will be a # probability between 0 and 1 predict = logistic(dot_product(beta_hat, x_i)) # Set a threshold of 0.5 to make our prediction a binary 0 or 1 if y_i == 1 and predict >= 0.5: # increment true positives true_positives += 1 elif y_i == 1: # increment false negatives false_negatives += 1 elif predict >= 0.5: # increment false positives false_positives += 1 else: true_negatives += 1 # Get the precision and recall
def cosine_similarity(v, w): """ computes the normalized projection of v onto w """ return dot_product(v, w) / float(magnitude(v) * magnitude(w))
def ridge_penalty(beta, alpha): """ alpha is a hyperparameter that scales how strong the penalty is. The penalty is chosen to be the square of the betas ignoring the constant term """ return alpha * dot_product(beta[1:], beta[1:])
def predict(x_i, beta): """ assumes x_i1 = 1 and beta is a vector the length of x_i """ return dot_product(x_i, beta)
# convert the bootstrap betas back to floats bootstrap_betas = [[float(beta_ls[i]) for i in range(3)] for beta_ls in bootstrap_betas] # calculate the standard errors bootstrap_standard_errors = [standard_deviation([beta[i] for beta in bootstrap_betas]) for i in range(3)] print "The Bootstrapped Standard Error is: " print bootstrap_standard_errors # Beta estimate for various ridge penalties # ############################################# print "Stochastic Gradient Descent: Ridge Penalty = 0.01" beta_0_01 = estimate_beta_ridge(xs, ys, alpha=decimal.Decimal(0.01)) print beta_0_01 print " Sum of squares of beta = %.5f " %(dot_product(beta_0_01[1:], beta_0_01[1:])) print "Stochastic Gradient Descent: Ridge Penalty = 0.1" beta_0_1 = estimate_beta_ridge(xs, ys, alpha=decimal.Decimal(0.1)) print beta_0_1 print " Sum of squares of beta = %.5f " %(dot_product(beta_0_1[1:], beta_0_1[1:])) print "Stochastic Gradient Descent: Ridge Penalty = 1.0" beta_1= estimate_beta_ridge(xs, ys, alpha=decimal.Decimal(1.0)) print beta_1 print " Sum of squares of beta = %.5f " %(dot_product(beta_1[1:], beta_1[1:]))
def logistic_log_partial_ij(x_i, y_i, beta, j): """ here i is the point index and j is the gradient index """ return (y_i - logistic(dot_product(x_i,beta))) * x_i[j]
def logistic_log_likelihood_i(x_i, y_i, beta): """ returns the log likelihood of the ith bernoulli trial """ if y_i == 1: return math.log(logistic(dot_product(x_i,beta))) else: return math.log(1-logistic(dot_product(x_i,beta)))
def neuron_output(weights, inputs): """ returns the smoothed weighted sum of inputs. Here len(weights) is 1 greater than inputs to hold a spot for the bias term """ return sigmoid(dot_product(weights, inputs))
# beta_i i!=0 has the following transform beta_i = beta_i_scaled/sigma_i # and beta_0 is beta_hat_unscaled = [ beta_hat[0], beta_hat[1] / stds_x[1], beta_hat[2] / stds_x[2] ] print "beta_hat_unscaled", beta_hat_unscaled # Fit Quality # ############### # Examine the test data true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): # For the test data get a prediction for y. This will be a # probability between 0 and 1 predict = logistic(dot_product(beta_hat, x_i)) # Set a threshold of 0.5 to make our prediction a binary 0 or 1 if y_i == 1 and predict >= 0.5: # increment true positives true_positives += 1 elif y_i == 1: # increment false negatives false_negatives += 1 elif predict >= 0.5: # increment false positives false_positives += 1 else: true_negatives += 1 # Get the precision and recall
def cosine_similarity(v,w): """ computes the normalized projection of v onto w """ return dot_product(v,w)/float(magnitude(v)*magnitude(w))