Esempio n. 1
0
def backpropagate(network, input_vector, targets):
    """ tunes the weights in network so that erro between network output and
        targets is minimized. """
    # get the outputs of the network layers
    hidden_outputs, outputs = feed_forward(network, input_vector)
    
    # compute the deltas for the output layer. note deriv of logistic is
    # logistic * (1 - logisitic)
    output_deltas = [output * (1 - output) * (output-target)
                     for output, target in zip(outputs, targets)]
    
    # update the weights(j) of each of the output neurons(i)
    for i, output_neuron in enumerate(network[-1]):
        # For each output neuron (i) and response (network[-1]), we will get
        # the inputs (i.e. the hidden outputs; j of them) and modify the jth
        # weight of the ith output neuron along the gradient
        for j, hidden_output in enumerate(hidden_outputs + [1]):
            output_neuron[j] -= output_deltas[i] * hidden_output

    # back-propagate the errors to the hidden layer. This is basically the
    # same as above but in reverse we compute the gradient as o(1-o) times
    # the input which now comes from the output layer for each neuron in the
    # hidden layer
    hidden_deltas = [hidden_output * (1 - hidden_output) * 
                     dot_product(output_deltas, 
                                 [n[i] for n in output_layer])
                     for i, hidden_output in enumerate(hidden_outputs)]

    # update the weights (j) of the hidden layer neurons (i)
    for i, hidden_neuron in enumerate(network[0]):
        for j, input in enumerate(input_vector + [1]):
            hidden_neuron[j] -= hidden_deltas[i] * input

    """ A much cleaner way of doing this is to compute the gradient of the
Esempio n. 2
0
def perceptron(weights, bias, x):
    """ returns 1 if the weighted sum of x exceeds 0 and 0 if not """
    output = dot_product(weights, x) + bias
    return step_function(output)
def predict(x_i, beta):
    """ assumes x_i1 = 1 and beta is a vector the length of x_i """
    return dot_product(x_i, beta)
def ridge_penalty(beta, alpha):
    """ alpha is a hyperparameter that scales how strong the penalty is. The
        penalty is chosen to be the square of the betas ignoring the
        constant term """
    return alpha * dot_product(beta[1:], beta[1:])
    bootstrap_betas = [[float(beta_ls[i]) for i in range(3)]
                       for beta_ls in bootstrap_betas]
    # calculate the standard errors
    bootstrap_standard_errors = [
        standard_deviation([beta[i] for beta in bootstrap_betas])
        for i in range(3)
    ]
    print "The Bootstrapped Standard Error is: "
    print bootstrap_standard_errors

    # Beta estimate for various ridge penalties #
    #############################################
    print "Stochastic Gradient Descent: Ridge Penalty = 0.01"
    beta_0_01 = estimate_beta_ridge(xs, ys, alpha=decimal.Decimal(0.01))
    print beta_0_01
    print " Sum of squares of beta = %.5f " % (dot_product(
        beta_0_01[1:], beta_0_01[1:]))

    print "Stochastic Gradient Descent: Ridge Penalty = 0.1"
    beta_0_1 = estimate_beta_ridge(xs, ys, alpha=decimal.Decimal(0.1))
    print beta_0_1
    print " Sum of squares of beta = %.5f " % (dot_product(
        beta_0_1[1:], beta_0_1[1:]))

    print "Stochastic Gradient Descent: Ridge Penalty = 1.0"
    beta_1 = estimate_beta_ridge(xs, ys, alpha=decimal.Decimal(1.0))
    print beta_1
    print " Sum of squares of beta = %.5f " % (dot_product(
        beta_1[1:], beta_1[1:]))

    print "Stochastic Gradient Descent: Ridge Penalty = 10.0"
    beta_10 = estimate_beta_ridge(xs, ys, alpha=decimal.Decimal(10.0))
Esempio n. 6
0
def logistic_log_partial_ij(x_i, y_i, beta, j):
    """ here i is the point index and j is the gradient index """
    return (y_i - logistic(dot_product(x_i, beta))) * x_i[j]
Esempio n. 7
0
def logistic_log_likelihood_i(x_i, y_i, beta):
    """ returns the log likelihood of the ith bernoulli trial """
    if y_i == 1:
        return math.log(logistic(dot_product(x_i, beta)))
    else:
        return math.log(1 - logistic(dot_product(x_i, beta)))
    # beta_i i!=0 has the following transform beta_i = beta_i_scaled/sigma_i
    # and beta_0 is 
    beta_hat_unscaled =[beta_hat[0],
                        beta_hat[1]/stds_x[1], 
                        beta_hat[2]/stds_x[2]]
    print "beta_hat_unscaled", beta_hat_unscaled
    
    # Fit Quality #
    ###############
    # Examine the test data
    true_positives = false_positives = true_negatives = false_negatives = 0

    for x_i, y_i in zip(x_test, y_test):
        # For the test data get a prediction for y. This will be a
        # probability between 0 and 1
        predict = logistic(dot_product(beta_hat, x_i))

        # Set a threshold of 0.5 to make our prediction a binary 0 or 1
        if y_i == 1 and predict >= 0.5:
            # increment true positives
            true_positives += 1
        elif y_i == 1:
            # increment false negatives
            false_negatives += 1
        elif predict >= 0.5:
            # increment false positives
            false_positives += 1
        else:
            true_negatives += 1
    
    # Get the precision and recall
def cosine_similarity(v, w):
    """ computes the normalized projection of v onto w """
    return dot_product(v, w) / float(magnitude(v) * magnitude(w))
def ridge_penalty(beta, alpha):
    """ alpha is a hyperparameter that scales how strong the penalty is. The
        penalty is chosen to be the square of the betas ignoring the
        constant term """
    return alpha * dot_product(beta[1:], beta[1:])
def predict(x_i, beta):
    """ assumes x_i1 = 1 and beta is a vector the length of x_i """
    return dot_product(x_i, beta)
    # convert the bootstrap betas back to floats
    bootstrap_betas = [[float(beta_ls[i]) for i in range(3)] for beta_ls in
                        bootstrap_betas]
    # calculate the standard errors                    
    bootstrap_standard_errors = [standard_deviation([beta[i] for beta in 
                                 bootstrap_betas]) for i in range(3)]
    print "The Bootstrapped Standard Error is: "
    print bootstrap_standard_errors
    
    # Beta estimate for various ridge penalties #
    #############################################
    print "Stochastic Gradient Descent: Ridge Penalty = 0.01"
    beta_0_01 = estimate_beta_ridge(xs, ys, 
                                         alpha=decimal.Decimal(0.01))
    print beta_0_01
    print " Sum of squares of beta = %.5f " %(dot_product(beta_0_01[1:], 
                                              beta_0_01[1:]))

    print "Stochastic Gradient Descent: Ridge Penalty = 0.1"
    beta_0_1 = estimate_beta_ridge(xs, ys, 
                                         alpha=decimal.Decimal(0.1))
    print beta_0_1
    print " Sum of squares of beta = %.5f " %(dot_product(beta_0_1[1:], 
                                              beta_0_1[1:]))

    print "Stochastic Gradient Descent: Ridge Penalty = 1.0"
    beta_1= estimate_beta_ridge(xs, ys, 
                                         alpha=decimal.Decimal(1.0))
    print beta_1
    print " Sum of squares of beta = %.5f " %(dot_product(beta_1[1:], 
                                              beta_1[1:]))
def logistic_log_partial_ij(x_i, y_i, beta, j):
    """ here i is the point index and j is the gradient index """
    return (y_i - logistic(dot_product(x_i,beta))) * x_i[j]
def logistic_log_likelihood_i(x_i, y_i, beta):
    """ returns the log likelihood of the ith bernoulli trial """
    if y_i == 1:
        return math.log(logistic(dot_product(x_i,beta)))
    else:
        return math.log(1-logistic(dot_product(x_i,beta)))
Esempio n. 15
0
def neuron_output(weights, inputs):
    """ returns the smoothed weighted sum of inputs. Here len(weights) is 1
        greater than inputs to hold a spot for the bias term """
    return sigmoid(dot_product(weights, inputs))
Esempio n. 16
0
    # beta_i i!=0 has the following transform beta_i = beta_i_scaled/sigma_i
    # and beta_0 is
    beta_hat_unscaled = [
        beta_hat[0], beta_hat[1] / stds_x[1], beta_hat[2] / stds_x[2]
    ]
    print "beta_hat_unscaled", beta_hat_unscaled

    # Fit Quality #
    ###############
    # Examine the test data
    true_positives = false_positives = true_negatives = false_negatives = 0

    for x_i, y_i in zip(x_test, y_test):
        # For the test data get a prediction for y. This will be a
        # probability between 0 and 1
        predict = logistic(dot_product(beta_hat, x_i))

        # Set a threshold of 0.5 to make our prediction a binary 0 or 1
        if y_i == 1 and predict >= 0.5:
            # increment true positives
            true_positives += 1
        elif y_i == 1:
            # increment false negatives
            false_negatives += 1
        elif predict >= 0.5:
            # increment false positives
            false_positives += 1
        else:
            true_negatives += 1

    # Get the precision and recall
def cosine_similarity(v,w):
    """ computes the normalized projection of v onto w """
    return dot_product(v,w)/float(magnitude(v)*magnitude(w))