Ejemplo n.º 1
0
def forward_backward_prop(data, labels, params):
    """ Forward and backward propagation for a two-layer sigmoidal network """
    ###################################################################
    # Compute the forward propagation and for the cross entropy cost, #
    # and backward propagation for the gradients for all parameters.  #
    ###################################################################

    ### Unpack network parameters (do not modify)
    t = 0
    W1 = np.reshape(params[t:t + dimensions[0] * dimensions[1]],
                    (dimensions[0], dimensions[1]))
    t += dimensions[0] * dimensions[1]
    b1 = np.reshape(params[t:t + dimensions[1]], (1, dimensions[1]))
    t += dimensions[1]
    W2 = np.reshape(params[t:t + dimensions[1] * dimensions[2]],
                    (dimensions[1], dimensions[2]))
    t += dimensions[1] * dimensions[2]
    b2 = np.reshape(params[t:t + dimensions[2]], (1, dimensions[2]))

    ### YOUR CODE HERE: forward propagation
    N, D = data.shape
    h = sigmoid(data.dot(W1) + b1)
    scores = softmax(h.dot(W2) + b2)
    cost = np.sum(-np.log(scores[labels == 1])) / N
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    dscores = scores - labels
    dscores /= N
    gradb2 = np.sum(dscores, axis=0)
    gradW2 = np.dot(h.T, dscores)

    gradh = np.dot(dscores, W2.T)
    gradh = sigmoid_grad(h) * gradh
    gradb1 = np.sum(gradh, axis=0)
    gradW1 = np.dot(data.T, gradh)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Ejemplo n.º 2
0
def softmaxRegression(features,
                      labels,
                      weights,
                      regularization=0.0,
                      nopredictions=False):
    """ Softmax Regression """
    ###################################################################
    # Implement softmax regression with weight regularization.        #
    # Inputs:                                                         #
    #   - features: feature vectors, each row is a feature vector     #
    #   - labels: labels corresponding to the feature vectors         #
    #   - weights: weights of the regressor                           #
    #   - regularization: L2 regularization constant                  #
    # Output:                                                         #
    #   - cost: cost of the regressor                                 #
    #   - grad: gradient of the regressor cost with respect to its    #
    #           weights                                               #
    #   - pred: label predictions of the regressor (you might find    #
    #           np.argmax helpful)                                    #
    ###################################################################

    prob = softmax(features.dot(weights))

    if len(features.shape) > 1:
        N = features.shape[0]
    else:
        N = 1
    # A vectorized implementation of    1/N * sum(cross_entropy(x_i, y_i)) + 1/2*|w|^2
    cost = np.sum(-np.log(prob[range(N), labels])) / N
    cost += 0.5 * regularization * np.sum(weights**2)

    ### YOUR CODE HERE: compute the gradients and predictions
    dscores = prob.copy()
    dscores[range(N), labels] -= 1
    dscores /= N
    grad = features.T.dot(dscores) + regularization * weights
    pred = np.argmax(prob, axis=1)
    ### END YOUR CODE
    if nopredictions:
        return cost, grad
    else:
        return cost, grad, pred
Ejemplo n.º 3
0
def softmaxRegression(features, labels, weights, regularization = 0.0, nopredictions = False):
    """ Softmax Regression """
    ###################################################################
    # Implement softmax regression with weight regularization.        #
    # Inputs:                                                         #
    #   - features: feature vectors, each row is a feature vector     #
    #   - labels: labels corresponding to the feature vectors         #
    #   - weights: weights of the regressor                           #
    #   - regularization: L2 regularization constant                  #
    # Output:                                                         #
    #   - cost: cost of the regressor                                 #
    #   - grad: gradient of the regressor cost with respect to its    #
    #           weights                                               #
    #   - pred: label predictions of the regressor (you might find    #
    #           np.argmax helpful)                                    #
    ###################################################################

    prob = softmax(features.dot(weights))

    if len(features.shape) > 1:
        N = features.shape[0]
    else:
        N = 1
    # A vectorized implementation of    1/N * sum(cross_entropy(x_i, y_i)) + 1/2*|w|^2
    cost = np.sum(-np.log(prob[range(N), labels])) / N
    cost += 0.5 * regularization * np.sum(weights ** 2)

    ### YOUR CODE HERE: compute the gradients and predictions
    dscores = prob.copy()
    dscores[range(N), labels] -= 1
    dscores /= N
    grad = features.T.dot(dscores) + regularization*weights
    pred = np.argmax(prob, axis = 1)
    ### END YOUR CODE
    if nopredictions:
        return cost, grad
    else:
        return cost, grad, pred
Ejemplo n.º 4
0
def softmaxCostAndGradient(predicted, target, outputVectors):
    """ Softmax cost function for word2vec models """
    ###################################################################
    # Implement the cost and gradients for one predicted word vector  #
    # and one target word vector as a building block for word2vec     #
    # models, assuming the softmax prediction function and cross      #
    # entropy loss.                                                   #
    # Inputs:                                                         #
    #   - predicted: numpy ndarray, predicted word vector (\hat{r} in #
    #           the written component) (V_wi)                         #
    #   - target: integer, the index of the target word               #
    #   - outputVectors: "output" vectors for all tokens              #
    # Outputs:                                                        #
    #   - cost: cross entropy cost for the softmax word prediction    #
    #   - gradPred: the gradient with respect to the predicted word   #
    #           vector                                                #
    #   - grad: the gradient with respect to all the other word       #
    #           vectors                                               #
    # We will not provide starter code for this function, but feel    #
    # free to reference the code you previously wrote for this        #
    # assignment!                                                     #
    ###################################################################

    ### YOUR CODE HERE
    V, D = outputVectors.shape

    scores = softmax(outputVectors.dot(predicted).reshape(1,V)).reshape(V,)
    cost = -np.log(scores[target])

    labels = np.zeros(V)
    labels[target] = 1
    dscores = scores - labels
    gradPred = dscores.dot(outputVectors)
    grad = dscores.reshape(V, 1).dot(predicted.reshape(D, 1).T)
    ### END YOUR CODE

    return cost, gradPred, grad
def conv(image, label, params, conv_s, pool_f, pool_s):
    '''
    Combine the forward and backward propogation to build a method that takes the input parameters and hyperparamets as inputs and outputs gradient and loss
    '''

    [f1, f2, w3, w4, b1, b2, b3, b4] = params  #filters, weights and biases

    #############################################
    ###########Forward operation#################
    #############################################

    conv1 = convolution(image, f1, b1, conv_s)
    conv1[conv1 <= 0] = 0  #apply ReLU non-linearity

    cov2 = convolution(conv1, f2, b2, conv_s)
    conv2[conv2 <= 0] = 0

    pooled = maxpool(conv2, pool_f, pool_s)  #maxpooling

    (nf2, dim2, _) = pooled.shape
    fc = pooled.reshape((nf2 * dim2 * dim2, 1))  #flatten pooled layer

    z = w3.dot(
        fc) + b3  #pass flattened pool through first fully connected layer
    z[z <= 0] = 0  #pass through ReLU function

    out = w4.dot(z) + b4  #pass through second layer

    probs = softmax(
        out
    )  #apply softmax activation function to find prodicted probabilities

    #############################################
    ############# Loss ##########################
    #############################################
    loss = categoricalCrossEntropy(probs, label)

    ##############################################
    ############ Backward operation ##############
    ##############################################

    d_out = probs - label  #derivate of loss w.r.t final dense layer

    dw4 = d_out.dot(z.T)  #loss gradient weights
    db4 = np.sum(d_out, axis=1).reshape(b4.shape)  #loss gradient of biases

    dz = w4.T.dot(d_out)  #loss gradient of first dense layer outputs
    dz[z <= 0] = 0  #ReLU
    dw3 = dz.dot(fc.T)  #loss function os weights
    db3 = np.sum(dz, axis=1).reshape(b3.shape)

    dfc = w3.T.dot(dz)  #loss gradient of fully connested pooling layer
    dpool = dfc.reshape(
        pooled.shape)  #reshape into into dimension of pooling layer

    dconv2 = maxoolBackward(dpool, conv2, pool_f, pool_s)
    dconv2[conv2 <= 0] = 0

    dconv1, df2, db2 = convolutionBackward(dconv2, conv1, f2, conv_s)
    dconv1[conv1 <= 0] = 0

    dimage, df1, db1 = convolutionBackward(dconv1, image, f1, convs)

    grads = [df1, df2, dw3, dw4, db1, db2, db3, db4]

    return grads, loss