Exemple #1
0
def negSamplingCostAndGradient(predicted, target, outputVectors, dataset,
    K=10):
    """ Negative sampling cost function for word2vec models """

    # Implement the cost and gradients for one predicted word vector
    # and one target word vector as a building block for word2vec
    # models, using the negative sampling technique. K is the sample
    # size. You might want to use dataset.sampleTokenIdx() to sample
    # a random word index.
    #
    # Note: See test_word2vec below for dataset's initialization.
    #
    # Input/Output Specifications: same as softmaxCostAndGradient
    # We will not provide starter code for this function, but feel
    # free to reference the code you previously wrote for this
    # assignment!

    ### YOUR CODE HERE
    grad = np.zeros(outputVectors.shape)

    s = sigmoid(np.dot(outputVectors[target,:], predicted))
    cost = -np.log(s)
    gradPred = - sigmoid_grad(s)/s*outputVectors[target,:]
    grad[target,:] = - sigmoid_grad(s)/s*predicted

    for k in range(K):
        i = dataset.sampleTokenIdx()
        s = sigmoid( - np.dot(outputVectors[i,:], predicted))
        cost -= np.log(s)
        gradPred += sigmoid_grad(s)/s*outputVectors[i,:]
        grad[i,:] += sigmoid_grad(s)/s*predicted

    ### END YOUR CODE

    return cost, gradPred, grad
Exemple #2
0
def test_sigmoid_gradient(dim_1, dim_2):
    a1    = np.random.normal(loc=0., scale=20., size=(dim_1,dim_2))
    shift = np.random.uniform(low=1e-9, high=1e-5, size=(dim_1,dim_2))
    ap = a1 + shift
    am = a1 - shift

    dsigmoid = (sigmoid(ap) - sigmoid(am)) / (2*shift)
    assert np.abs(np.max(dsigmoid - sigmoid_grad(sigmoid(a1)))) <= 1e-7
    assert np.abs(np.min(dsigmoid - sigmoid_grad(sigmoid(a1)))) <= 1e-7
Exemple #3
0
def test_sigmoidgrad():
    """ Original sigmoid gradient test defined in q2_sigmoid.py; """
    x = np.array([[1, 2], [-1, -2]])
    f = sigmoid(x)
    g = sigmoid_grad(f)
    assert rel_error(g, np.array([[0.19661193, 0.10499359],
        [0.19661193, 0.10499359]])) <= 1e-7
Exemple #4
0
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    #z1 = data.dot(W1) + b1
    #hidden = sigmoid(z1)
    #z2 = hidden.dot(W2) + b2
    #print 'z2.shape: ', z2.shape
    #prediction = softmax(z2)
    ### END YOUR CODE
    
    hidden = sigmoid(data.dot(W1) + b1)
    prediction = softmax(hidden.dot(W2) + b2)
    cost = -np.sum(np.log(prediction) * labels)

    
    ### YOUR CODE HERE: backward propagation
    #print 'NN: ', Dx, H, Dy
    #print 'b1.shape: ', b1.shape
    #print 'prediction.shape: ', prediction.shape
    #print 'labels.shape : ', labels.shape
    #print 'W2.shape: ', W2.shape
    #print 'hidden.shape: ', hidden.shape
    #print 'hidden.T.shape: ', hidden.T.shape
    #print 'delta.shape: ', delta.shape
    #print 'W1.shape: ', W1.shape
    #print 'data.shape: ', data.shape
    #gradW2 = delta * hidden
    #print 'sigmoid_grad(hidden).shape: ', sigmoid_grad(hidden).shape
    delta = prediction - labels
    gradW2 = hidden.T.dot(delta)
    gradb2 = np.sum(delta, axis = 0)
    hidden_delta = delta.dot(W2.T) * sigmoid_grad(hidden)
    gradW1 = data.T.dot(hidden_delta)
    gradb1 = np.sum(hidden_delta, axis = 0)
    ### END YOUR CODE
    
    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
        gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad
Exemple #5
0
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    # data: N x Dx, W1: Dx x H, b: 1 x H 
    a = data.dot(W1) + b1
    h = sigmoid(a)
    # h: N x H, W2: H x Dy, b2: 1 x Dy
    t = h.dot(W2) + b2
    y_hat = softmax(t)
    # y_hat: N x Dy, labels: N x Dy (as int)
    probs = labels * y_hat
    cost = np.sum(-np.log(probs.sum(axis=1)))
    ### END YOUR CODE
    
    ### YOUR CODE HERE: backward propagation
    # obtain the softmax gradient
    dJdt = (y_hat - labels) # N x Dy

    # b2 grad is sum along each index of the Dy vectors
    gradb2 = np.sum(dJdt, 0) 

    # h: N x H, dJdt: N x Dy
    gradW2 = h.T.dot(dJdt) # H x Dy

    # dJdt: N x Dy, W2: H x Dy
    dJdh = dJdt.dot(W2.T)
    # h: N x H
    dhda = sigmoid_grad(h)

    # data: N x Dx, dhda: N x H, DJdh: N x H
    gradW1 = data.T.dot(dhda * dJdh)
    
    # dhda: N x H, DJdh: N x H
    gradb1 = np.sum(dhda * dJdh, 0)
    ### END YOUR CODE
    
    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
        gradW2.flatten(), gradb2.flatten()))

    return cost, grad
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
    print Dx,H,Dy
    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    h=sigmoid(data.dot(W1)+b1)
    y=softmax(h.dot(W2)+b2)
    cost=-np.sum(labels*np.log(y))
    ### END YOUR CODE
    
    ### YOUR CODE HERE: backward propagation
    gradW1 = data.T.dot((y-labels).dot(W2.T)*sigmoid_grad(h))
    gradW2=h.T.dot(y-labels)
    print (y-labels) .shape
    gradb1=np.sum((y-labels).dot(W2.T)*sigmoid_grad(h),axis=0)
    gradb2=np.sum(y-labels,axis=0)

    #delta = prediction - labels
    #gradW2 = hidden.T.dot(delta)
    #gradb2 = np.sum(delta,axis=0)
    #delta = delta.dot(W2.T)*sigmoid_grad(hidden)
    #gradW1=data.T.dot(delta)
    #gradb1=np.sum(delta,axis=0)
    ### END YOUR CODE
    
    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
        gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad
def test_sigmoid(dim_1, dim_2):
    a1       = np.random.normal(loc=0., scale=20., size=(dim_1,dim_2))
    a1_copy  = a1.copy()

    s_a1     = sigmoid(a1)
    s_sol_a1 = sigmoid_sol(a1_copy)

    assert rel_error(sigmoid_grad(s_a1), sigmoid_grad_sol(s_sol_a1)) <= 1e-10
Exemple #8
0
def forward_backward_prop(X, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    the backward propagation for the gradients for all parameters.

    Notice the gradients computed here are different from the gradients in
    the assignment sheet: they are w.r.t. weights, not inputs.

    Arguments:
    X -- M x Dx matrix, where each row is a training example x.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    # Note: compute cost based on `sum` not `mean`.
    ### YOUR CODE HERE: forward propagation
    h = sigmoid(np.dot(X, W1) + b1)
    # [m, Dy]
    yhat = softmax(np.dot(h, W2) + b2)
    # /m
    cost = np.sum(-np.log(yhat[labels == 1])) / X.shape[0]
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation

    d3 = (yhat - labels) / X.shape[0]
    gradW2 = np.dot(h.T, d3)
    gradb2 = np.sum(d3, 0, keepdims=True)

    dh = np.dot(d3, W2.T)
    grad_h = sigmoid_grad(h) * dh

    gradW1 = np.dot(X.T, grad_h)
    gradb1 = np.sum(grad_h, 0)

    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
def forward_backward_prop(X, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    the backward propagation for the gradients for all parameters.

    Notice the gradients computed here are different from the gradients in
    the assignment sheet: they are w.r.t. weights, not inputs.

    Arguments:
    X -- M x Dx matrix, where each row is a training example x.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    # Note: compute cost based on `sum` not `mean`.
    ### YOUR CODE HERE: forward propagation
    z1 = np.dot(X, W1) + b1  # M*H
    h = sigmoid(z1)  # M*H
    z2 = np.dot(h, W2) + b2  # M*Dy
    Y = softmax(z2)  # M*Dy
    cost = np.sum(-labels * np.log(Y))
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    delta1 = Y - labels  # M*Dy
    gradb2 = np.sum(delta1, 0, keepdims=True)  # M*Dy
    gradW2 = np.dot(h.T, delta1)  # H*Dy

    delta2 = np.dot(delta1, W2.T)  # M*H
    ## Take care!! The argument of sigmoid_grad is sigmoid function value!!
    delta3 = np.multiply(delta2, sigmoid_grad(h))  # M*H

    gradW1 = np.dot(X.T, delta3)  # Dx*H
    gradb1 = np.sum(delta3, 0, keepdims=True)  # 1*H

    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #10
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    z1 = np.dot(data, W1) + b1
    h = sigmoid(z1)

    z2 = np.dot(h, W2) + b2
    y_hat = softmax(z2)

    cost = -np.sum(labels * np.log(y_hat))
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    # d1 = dJ/dz2
    d1 = y_hat - labels

    # d2 = dJ/dh
    d2 = np.dot(d1, W2.T)

    # d3 = dJ/dz1
    d3 = d2 * sigmoid_grad(h)

    gradW2 = np.dot(h.T, d1)
    gradb2 = np.sum(d1, axis=0)
    gradW1 = np.dot(data.T, d3)
    gradb1 = np.sum(d3, axis=0)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #11
0
def forward_backward_prop(X, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    the backward propagation for the gradients for all parameters.

    Notice the gradients computed here are different from the gradients in
    the assignment sheet: they are w.r.t. weights, not inputs.

    Arguments:
    X -- M x Dx matrix, where each row is a training example x.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    # Note: compute cost based on `sum` not `mean`.
    ### YOUR CODE HERE: forward propagation
    z1 = np.dot(X, W1) + b1
    a = sigmoid(z1)
    z2 = np.dot(a, W2) + b2
    y = softmax(z2)

    cost = -np.sum(np.log(y) * labels)

    # ### END YOUR CODE

    # ### YOUR CODE HERE: backward propagation
    grady = y - labels
    gradW2 = np.dot(a.T, grady)
    gradb2 = np.sum(grady, axis=0)

    grada = np.dot(grady, W2.T)
    gradz1 = grada * sigmoid_grad(a)

    gradW1 = np.dot(X.T, gradz1)
    gradb1 = np.sum(gradz1, axis=0)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #12
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    z = data.dot(W1) + b1
    h = sigmoid(z)
    yPredict = softmax(h.dot(W2) + b2)

    # raise NotImplementedError
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    n = data.shape[0]
    costs = -np.log(yPredict[labels == 1])
    cost = np.sum(costs) / n

    gradTheta = yPredict - labels  # N*Dy
    gradTheta_aver = gradTheta / n
    gradW2 = h.T.dot(gradTheta_aver)  # H*N dot N*Dy = H*Dy
    gradb2 = np.sum(gradTheta_aver, axis=0)  # 1*Dy

    gradh = gradTheta_aver.dot(W2.T)  # N*Dy dot Dy*H = N*H
    gradz = sigmoid_grad(h) * gradh  # N*H
    gradW1 = data.T.dot(gradz)  # Dx*N dot N*H = Dx*H
    gradb1 = np.sum(gradz, axis=0)  # 1*H

    # raise NotImplementedError
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #13
0
def forward_backward_prop(X, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    the backward propagation for the gradients for all parameters.

    Notice the gradients computed here are different from the gradients in
    the assignment sheet: they are w.r.t. weights, not inputs.

    Arguments:
    X -- M x Dx matrix, where each row is a training example x.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])  # 10, 5, 10

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))  # 10, 5
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))  # 1, 5
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))  # 5, 10
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))  # 1, 10

    # Note: compute cost based on `sum` not `mean`.
    ### YOUR CODE HERE: forward propagation
    z2 = X.dot(W1) + b1  # (20, 5) - 20 is the number if training examples
    a2 = sigmoid(z2)  # (20, 5)
    z3 = a2.dot(W2) + b2  # (20, 10)
    a3 = softmax(z3)  # (20, 10)
    cost = -np.sum(labels * np.log(a3))  # cross entropy cost
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    delta3 = a3 - labels  # 20, 10 - the derivative of cross entropy
    gradb2 = np.sum(
        delta3, 0, keepdims=True
    )  # summing over training examples (1,  10), derivative over b is 1
    gradW2 = np.dot(
        a2.T, delta3
    )  # works similar to the derivative with respect to input x or hidden layer h

    delta2 = sigmoid_grad(a2) * np.dot(delta3, W2.T)  # see assign1, 2(c)
    gradb1 = np.sum(delta2, 0, keepdims=True)
    gradW1 = np.dot(X.T, delta2)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    if len(data.shape) >= 2:
        (N, _) = data.shape

    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    a1 = sigmoid(data.dot(W1) + b1)
    a2 = softmax(a1.dot(W2) + b2)

    cost = -np.sum(np.log(a2[labels == 1])) / N
    #raise NotImplementedError
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    grad_a2 = (a2 - labels)

    gradW2 = np.dot(a1.T, grad_a2) * (1.0 / N)
    gradb2 = np.sum(grad_a2, axis=0, keepdims=True) * (1.0 / N)

    grad_a1 = np.dot(grad_a2, W2.T) * sigmoid_grad(a1)

    gradW1 = np.dot(data.T, grad_a1) * (1.0 / N)
    gradb1 = np.sum(grad_a1, axis=0, keepdims=True) * (1.0 / N)

    #raise NotImplementedError
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
    def test_sigmoid(self):
        sig = lambda x: (sigmoid(x), sigmoid_grad(sigmoid(x)))

        random_ints = np.random.randint(1, 100, 100)
        random_floats = np.random.random_sample((100, ))
        random_floats = random_ints * random_floats
        for number in random_floats:
            result = gradcheck_naive(sig, np.array(number))
            self.assertTrue(float(result) <= 1e-5)
Exemple #16
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    z1 = data.dot(W1) + b1
    h1 = sigmoid(z1)
    z2 = h1.dot(W2) + b2

    # raise NotImplementedError
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    y_pred = softmax(z2)
    cost = -np.sum(np.log(y_pred) * labels)
    #print np.log(y_pred) * labels, cost
    delta = y_pred - labels
    print(cost, y_pred.shape, labels.shape)

    gradW2 = h1.T.dot(delta)
    gradb2 = np.sum(delta, axis=0)

    gradh1 = delta.dot(W2.T)

    gradz1 = gradh1 * sigmoid_grad(h1)
    #print (gradz1.shape, sigmoid_grad(h1).shape, gradh1.shape)
    gradW1 = data.T.dot(gradz1)
    gradb1 = np.sum(gradz1, axis=0)
    #print(gradb1.shape)

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #17
0
def forward_backward_prop(X, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    the backward propagation for the gradients for all parameters.

    Notice the gradients computed here are different from the gradients in
    the assignment sheet: they are w.r.t. weights, not inputs.

    Arguments:
    X          -- M x Dx matrix, where each row is a training example x.
    labels     -- M x Dy matrix, where each row is a one-hot vector.
    params     -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    # Note: compute cost based on `sum` not `mean`.
    ### Forward propagation
    z = X.dot(W1) + b1
    h = sigmoid(z)
    theta = h.dot(W2) + b2
    y_hat = softmax(theta)
    cost = -np.sum(labels * np.log(y_hat))

    ### Backward propagation
    # Note: the gradients computed here are w.r.t.weights.
    grad_theta = y_hat - labels
    grad_b2 = np.sum(grad_theta, axis=0, keepdims=True)
    grad_W2 = np.dot(h.T, grad_theta)
    grad_h = np.dot(grad_theta, W2.T)
    grad_sigmoid = grad_h * sigmoid_grad(h)
    grad_b1 = np.sum(grad_sigmoid, axis=0, keepdims=True)
    grad_W1 = np.dot(X.T, grad_sigmoid)

    assert grad_b2.shape == b2.shape
    assert grad_W2.shape == W2.shape
    assert grad_b1.shape == b1.shape
    assert grad_W1.shape == W1.shape

    ### Stack gradients (do not modify)
    grad = np.concatenate((grad_W1.flatten(), grad_b1.flatten(),
                           grad_W2.flatten(), grad_b2.flatten()))

    return cost, grad
Exemple #18
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    x = data
    #print x.shape
    y = labels
    h = sigmoid(x.dot(W1) + b1)
    y_pred = softmax(h.dot(W2) + b2)
    cost = np.sum(-np.log(y_pred[labels == 1])) / data.shape[0]

    print 'x: ', x.shape
    print 'y: ', y.shape
    print 'h: ', h.shape
    print 'y_pred: ', y_pred.shape
    print 'cost: ', cost.shape, ' = ', cost
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    d3 = (y_pred - y) / data.shape[0]
    gradW2 = np.dot(h.T, d3)
    gradb2 = np.sum(d3, 0, keepdims=True)

    d2 = np.dot(d3, W2.T)
    d1 = d2 * sigmoid_grad(h)
    gradW1 = np.dot(x.T, d1)
    gradb1 = np.sum(d1, 0)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #19
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    # data : N * Dx
    # W1   : Dx * H
    # b1   : 1 * H
    # W2   : H * Dy
    # b2   : 1 * Dy
    N = data.shape[0]

    z1 = data.dot(W1) + b1
    a1 = sigmoid(z1)  # N * H
    z2 = a1.dot(W2) + b2
    a2 = softmax(z2)  # N * Dy

    cost = np.sum(-np.log(a2[labels == 1])) / N

    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    delta_score = a2 - labels  # 1 * Dy
    delta_score /= N

    gradW2 = np.dot(a1.T, delta_score)  # H * 1 * 1 * Dy = H * Dy
    gradb2 = np.sum(delta_score, axis=0)

    grad_h = np.dot(delta_score, W2.T)  # 1 * Dy * Dy * H = 1 * H
    grad_h = sigmoid_grad(a1) * grad_h

    gradW1 = np.dot(data.T, grad_h)
    gradb1 = np.sum(grad_h, axis=0)

    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #20
0
def forward_backward_prop(X, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    the backward propagation for the gradients for all parameters.

    Notice the gradients computed here are different from the gradients in
    the assignment sheet: they are w.r.t. weights, not inputs.

    Arguments:
    X -- M x Dx matrix, where each row is a training example x.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    # Note: compute cost based on `sum` not `mean`.
    ### YOUR CODE HERE: forward propagation
    fc1 = X.dot(W1) + b1  # [M,H]
    sig1 = sigmoid(fc1)  # [M,H]
    scores = sig1.dot(W2) + b2  # [M,Dy]
    shifted_scores = scores - np.max(scores, axis=-1, keepdims=True)  # [M,Dy]
    z = np.exp(shifted_scores).sum(axis=-1, keepdims=True)  # [M,1]
    log_porbs = shifted_scores - np.log(z)
    cost = -1 * (log_porbs * labels).sum()
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    dout = np.exp(log_porbs)
    dout[labels == 1] -= 1
    gradW2 = sig1.T.dot(dout)
    gradb2 = dout.sum(axis=0)
    dsig1 = dout.dot(W2.T)
    dfc1 = sigmoid_grad(sig1) * dsig1
    gradW1 = X.T.dot(dfc1)
    gradb1 = dfc1.sum(axis=0)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #21
0
def forward_backward_prop(X, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    the backward propagation for the gradients for all parameters.

    Notice the gradients computed here are different from the gradients in
    the assignment sheet: they are w.r.t. weights, not inputs.

    Arguments:
    X -- M x Dx matrix, where each row is a training example x.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    # Note: compute cost based on `sum` not `mean`.
    ### YOUR CODE HERE: forward propagation
    z1 = np.dot(X, W1) + b1  # R - M * H
    h = sigmoid(z1)  # R - M * H
    z2 = np.dot(h, W2) + b2  # R - M * Dy
    y_pred = softmax(z2)  # R - M * Dy
    cost = -np.sum(labels * np.log(y_pred))  # cross-entropy
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    dz2 = y_pred - labels  # R - M * Dy
    dh = dz2.dot(W2.T)  # R - M * H
    # note: sigmoid_grad takes sigmoid(x) as input value
    dz1 = dh * sigmoid_grad(h)  # R - M * H

    gradW2 = h.T.dot(dz2)  # R - H * Dy
    gradb2 = np.sum(dz2, 0)  # R - 1 * Dy
    gradW1 = X.T.dot(dz1)  # R - Dx * H
    gradb1 = np.sum(dz1, 0)  # R - 1 * H
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
        gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #22
0
def forward_backward_prop(X, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    the backward propagation for the gradients for all parameters.

    Notice the gradients computed here are different from the gradients in
    the assignment sheet: they are w.r.t. weights, not inputs.

    Arguments:
    X -- M x Dx matrix, where each row is a training example x.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    # Note: compute cost based on `sum` not `mean`.
    ### YOUR CODE HERE: forward propagation
    h = X.dot(W1) + b1  # (M, Dx) * (Dx, H) -> (M, H)
    sig_h = sigmoid(h)
    y = sig_h.dot(W2) + b2  # (M, H) * (H, Dy) -> (M, Dy)
    softmax_y = softmax(y)
    cost = -np.sum(labels * np.log(softmax_y))
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    d_y = softmax_y - labels  # (M, Dy) https://math.stackexchange.com/questions/945871/derivative-of-softmax-loss-function
    d_W2 = sig_h.T.dot(d_y)  # (H, M) * (M, Dy) -> (H, Dy)
    d_b2 = np.sum(d_y, axis=0, keepdims=True)  # (M, Dy) -> (, Dy)

    d_sig_h = d_y.dot(W2.T)  # (M, Dy) * (Dy, H) -> (M, H)
    d_h = sigmoid_grad(sig_h) * d_sig_h  # (M, H)
    d_W1 = X.T.dot(d_h)  # (Dx, M) * (M, H) = (Dx, H)
    d_b1 = np.sum(d_h, axis=0, keepdims=True)  # (M, H) -> (, H)
    gradW1, gradb1, gradW2, gradb2 = d_W1, d_b1, d_W2, d_b2
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
    N = data.shape[0]

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    hidden = np.dot(data, W1) + b1
    layer1_a = sigmoid(hidden)
    layer2 = np.dot(layer1_a, W2) + b2
    # need to calculate the softmax loss
    probs = softmax(layer2)
    cost = -np.sum(np.log(probs[np.arange(N), np.argmax(labels, axis=1)]))
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    #There is no regularization :/
    # dx -> sigmoid -> W2 * layer1_a + b -> sigmoid -> W1 * data + b1 -> ..
    dx = probs.copy()
    dx -= labels

    dlayer2 = np.zeros_like(dx)
    gradW2 = np.zeros_like(W2)
    gradW1 = np.zeros_like(W1)
    gradb2 = np.zeros_like(b2)
    gradb1 = np.zeros_like(b1)

    gradW2 = np.dot(layer1_a.T, dx)
    gradb2 = np.sum(dx, axis=0)
    dlayer2 = np.dot(dx, W2.T)
    dlayer1 = sigmoid_grad(layer1_a) * dlayer2
    gradW1 = np.dot(data.T, dlayer1)
    gradb1 = np.sum(dlayer1, axis=0)

    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #24
0
def forward_backward_prop(X, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    the backward propagation for the gradients for all parameters.

    Notice the gradients computed here are different from the gradients in
    the assignment sheet: they are w.r.t. weights, not inputs.

    Arguments:
    X -- M x Dx matrix, where each row is a training example x.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    # Note: compute cost based on `sum` not `mean`.
    ### YOUR CODE HERE: forward propagation
    X1_out = sigmoid(X.dot(W1) + b1)  # (M, H)
    softmax_output = softmax(X1_out.dot(W2) + b2)  # shape(M, Dy)
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    M = X.shape[0]
    cost = -np.sum(np.log(softmax_output[labels == 1])) / M
    # labels shape :(M, Dy), [label==1] shape:[M, Dy] True, False,
    # softmax_output[lable==1]] :shape: (M, )
    dSoftmax = (softmax_output - labels) / M  # (M, Dy)
    gradW2 = np.dot(X1_out.T, dSoftmax)
    gradb2 = np.sum(dSoftmax, axis=0, keepdims=True)

    dX1_out = np.dot(dSoftmax, W2.T)  # (M, H)
    dsigmoid = sigmoid_grad(X1_out) * dX1_out  # important!
    gradW1 = np.dot(X.T, dsigmoid)  #(Dx, H) = (Dx, M)(M, H)
    gradb1 = np.sum(dsigmoid, axis=0, keepdims=True)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #25
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    h = sigmoid(np.dot(data, W1) + b1)
    yhat = softmax(np.dot(h, W2) + b2)
    cost = -np.sum(np.log(yhat[labels == 1])) / data.shape[0]
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    """
        here we compute gradb1, gradW1, gradb2, gradW2
    """
    cost_grad = (yhat - labels) / data.shape[0]  # M x Dy

    gradW2 = np.dot(h.T, cost_grad)  # (H x M) * (M x Dy) = H x Dy
    gradb2 = np.sum(cost_grad, axis=0,
                    keepdims=True)  # Dy, summing over M training set

    dJdh = np.dot(cost_grad, W2.T)  # (M x Dy) . (Dy x H) = M x H
    gradb1_single = sigmoid_grad(h) * dJdh  # M x H  (element-wise)

    gradW1 = np.dot(data.T, gradb1_single)  # (M x Dx).T . (M x H) = Dx x H
    gradb1 = np.sum(gradb1_single, axis=0)  # sum along M data set

    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #26
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    hidden = np.dot(data, W1) + b1
    hidden_act = sigmoid(hidden)
    output = np.dot(hidden_act, W2) + b2
    output_act = softmax(output)

    logprobs = -np.log(output_act[np.arange(data.shape[0]),
                                  np.argmax(labels, axis=1)])
    cost = np.sum(logprobs) / data.shape[0]
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation

    dscores = (output_act - labels) / data.shape[0]
    #print('dscore.shape', dscores.shape)
    gradW2 = np.dot(hidden_act.T, dscores)
    gradb2 = np.sum(dscores, axis=0)
    dhidden_act = np.dot(dscores, W2.T)

    dhidden = sigmoid_grad(hidden_act) * dhidden_act
    gradW1 = np.dot(data.T, dhidden)
    gradb1 = np.sum(dhidden, axis=0)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #27
0
def forward_backward_prop(X, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    the backward propagation for the gradients for all parameters.

    Notice the gradients computed here are different from the gradients in
    the assignment sheet: they are w.r.t. weights, not inputs.

    Arguments:
    X -- M x Dx matrix, where each row is a training example x.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    # Note: compute cost based on `sum` not `mean`.
    ### YOUR CODE HERE: forward propagation
    z1 = X.dot(W1) + b1
    h = sigmoid(z1)
    z2 = h.dot(W2) + b2
    yhat = softmax(z2)
    cost = -np.sum(labels * np.log(yhat))
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    delta1 = yhat - labels  # M x Dy
    delta2 = delta1.dot(W2.transpose())  # M x H
    # sigmoid_grad takes the sigmoid output as input
    # so the input should be sigmoid(z1) which is h
    delta3 = delta2 * sigmoid_grad(h)  # M x H
    gradW1 = X.transpose().dot(delta3)  # Dx x H (sums over M examples)
    gradb1 = np.sum(delta3, 0)  # 1 x H (sums over M examples)
    gradW2 = h.transpose().dot(delta1)  # H x Dy (sums over M examples)
    gradb2 = np.sum(delta1, 0)  # 1 x Dx (sums over M examples)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #28
0
def test_sigmoid_shape(dim, sigmoid_f):
    testing_shape = []
    for y in range(0, dim):
        testing_shape.append(np.random.randint(3, 8))
    shape = tuple(testing_shape)
    #z = np.random.randn(*testing_shape)
    x = np.random.standard_normal(shape)
    y = np.copy(x)
    assert x.shape == sigmoid(y).shape
    assert x.shape == sigmoid_grad(sigmoid(y)).shape
Exemple #29
0
def test_sigmoid_shape(dim):
    testing_shape = []
    for y in range(0,dim):
        testing_shape.append(np.random.randint(3,8))
    shape = tuple(testing_shape)
    #z = np.random.randn(*testing_shape)
    x = np.random.standard_normal(shape)
    y = np.copy(x)
    assert x.shape == sigmoid(y).shape
    assert x.shape == sigmoid_grad(sigmoid(y)).shape
Exemple #30
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    M = len(data)

    z1 = np.dot(data, W1) + b1  # (M, H)
    h1 = sigmoid(z1)  # (M, H)
    z2 = np.dot(h1, W2) + b2  # (M, Dy)
    y_hat = softmax(z2)  # (M, Dy)
    CE = -np.log(y_hat[np.arange(M), np.argmax(labels, axis=1)])  # (M, 1)
    cost = np.mean(CE)

    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    gradz2 = (y_hat - labels) / M  # (M, Dy)
    gradh1 = np.dot(gradz2, W2.T)  # (M, H)
    gradW2 = np.dot(h1.T, gradz2)  # (H, Dy)
    gradb2 = np.sum(gradz2, axis=0)  # (1, Dy)
    gradz1 = gradh1 * sigmoid_grad(h1)  # (M, H)
    gradW1 = np.dot(data.T, gradz1)  # (Dx, H)
    gradb1 = np.sum(gradz1, axis=0)  # (1, H)

    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #31
0
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    N = data.shape[0]

    l1 = data.dot(W1) + b1
    h = sigmoid(l1)
    l2 = h.dot(W2) + b2
    y_hat = softmax(l2)

    cost = -np.sum(labels * np.log(y_hat)) / N  # cross entropy
    ### raise NotImplementedError
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    dl2 = y_hat - labels
    dW2 = np.dot(h.T, dl2)
    db2 = np.sum(dl2, axis=0)

    dh = np.dot(dl2, W2.T)

    dl1 = dh * sigmoid_grad(h)
    dW1 = np.dot(data.T, dl1)
    db1 = np.sum(dl1, axis=0)

    gradW2 = dW2 / N
    gradb2 = db2 / N
    gradW1 = dW1 / N
    gradb1 = db1 / N
    ### raise NotImplementedError
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #32
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    gradW1 = np.zeros_like(W1)
    gradb1 = np.zeros_like(b1)
    gradW2 = np.zeros_like(W2)
    gradb2 = np.zeros_like(b2)

    N = data.shape[0]
    ### YOUR CODE HERE: forward propagation
    h = sigmoid(np.dot(data, W1) + b1)
    out = softmax(np.dot(h, W2) + b2)
    cost = -np.sum(labels * np.log(out))
    #cost /= N
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    temp0 = out - labels
    gradb2 += np.sum(temp0, axis=0)
    gradW2 += np.dot(h.T, temp0)

    temp1 = np.dot(temp0, W2.T) * sigmoid_grad(h)
    gradb1 += np.sum(temp1, axis=0)
    gradW1 += np.dot(data.T, temp1)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #33
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))  # Dx * H
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))  # 1 * H
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))  # H * Dy
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    x = data  # M * Dx
    y = labels  # M * Dy
    M = x.shape[0]

    ### forward propagation
    z1 = np.dot(x, W1) + b1
    a1 = sigmoid(z1)

    z2 = np.dot(a1, W2) + b2
    y_hat = a2 = softmax(z2)

    cost = -np.sum(np.log(a2[np.arange(M),
                             np.argmax(y, axis=1)]))  # Cross Entropy

    ### backward propagation
    gradz2 = y_hat - y
    gradW2 = np.dot(a1.T, gradz2)
    gradb2 = np.sum(gradz2, axis=0)
    grada2 = np.dot(gradz2, W2.T)
    gradz1 = sigmoid_grad(a1) * grada2
    gradW1 = np.dot(x.T, gradz1)
    gradb1 = np.sum(gradz1, axis=0)

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #34
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    h = sigmoid(data.dot(W1) + b1)
    y_beta = softmax(h.dot(W2) + b2)

    cost = -np.sum(np.log(y_beta[labels == 1]))
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    # Backpropagate throughy_beta the first latent layer
    # Calculate analytic gradient for the cross entropy loss function
    d3 = (y_beta - labels)

    # Backpropagate through the second latent layer
    gradW2 = np.dot(h.T, d3)
    gradb2 = np.sum(d3, axis=0, keepdims=True)

    # Backpropagate through the first latent layer
    d2 = np.dot(d3, W2.T) * sigmoid_grad(h)

    gradW1 = np.dot(data.T, d2)
    gradb1 = np.sum(d2, axis=0, keepdims=True)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #35
0
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    layer1_output = np.dot(data, W1) + b1
    layer1_activations = sigmoid(layer1_output)

    output_scores = np.dot(layer1_activations, W2) + b2
    softmax_scores = softmax(output_scores)

    cross_entropy_loss = -1 * np.sum(labels * np.log(softmax_scores))

    #print cross_entropy_loss.shape

    cost = cross_entropy_loss

    doutput_scores = softmax_scores

    #doutput_scores-=labels

    label_index = np.argmax(labels, axis=1)

    doutput_scores[np.arange(data.shape[0]), label_index] -= 1

    gradW2 = np.dot(layer1_activations.T, doutput_scores)
    gradb2 = np.sum(doutput_scores, axis=0)

    dlayer1_activations = np.dot(doutput_scores, W2.T)

    dlayer1_output = sigmoid_grad(layer1_activations) * dlayer1_activations

    gradW1 = np.dot(data.T, dlayer1_output)
    gradb1 = np.sum(dlayer1_output, axis=0)

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #36
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
    N = len(data)

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    z1s = np.dot(data, W1) + b1
    hs = sigmoid(z1s)
    ys = softmax(np.dot(hs, W2) + b2)
    cost = -np.sum(np.log(ys[np.arange(N), np.argmax(labels, axis=1)]))
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    gradW1 = np.zeros_like(W1)
    gradW2 = np.zeros_like(W2)
    gradb1 = np.zeros_like(b1)
    gradb2 = np.zeros_like(b2)
    errors = ys - labels

    gradW2 = np.dot(hs.T, errors)
    gradb2 = np.sum(errors, axis=0)
    # you should input the output of sigmoid into sigmoid_grad
    tmpb1 = sigmoid_grad(hs) * np.dot(errors, W2.T) 
    gradW1 = np.dot(data.T, tmpb1)
    gradb1 = np.sum(tmpb1, axis=0)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
        gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #37
0
def forward_backward_prop(X, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    the backward propagation for the gradients for all parameters.

    Notice the gradients computed here are different from the gradients in
    the assignment sheet: they are w.r.t. weights, not inputs.

    Arguments:
    X -- M x Dx matrix, where each row is a training example x.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    # Note: compute cost based on `sum` not `mean`.
    ### YOUR CODE HERE: forward propagation
    Z1 = np.matmul(X, W1) + b1  # M by H
    A1 = sigmoid(Z1)  # M by H

    Z2 = np.matmul(A1, W2) + b2  # M by Dy
    A2 = softmax(Z2)  # M by Dy
    m = X.shape[0]
    cost = np.sum(-labels * np.log(A2)) / m
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    DZ2 = A2 - labels  # M by Dy
    gradb2 = np.sum(DZ2, axis=0, keepdims=True) / m  # 1 by Dy
    gradW2 = np.matmul(np.transpose(A1), DZ2) / m  # H by Dy
    DA1 = np.dot(DZ2, np.transpose(W2))
    DZ1 = DA1 * sigmoid_grad(A1)  # sigmoid_grad takes the result from sigmoid
    gradb1 = np.sum(DZ1, axis=0, keepdims=True) / m
    gradW1 = np.matmul(np.transpose(X), DZ1) / m
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    h1 = data.dot(W1) + b1  # (M, H)
    a1 = sigmoid(h1)

    h2 = a1.dot(W2) + b2  # (M, Dy)
    scores = softmax(h2)
    cost = -np.sum(np.log(scores) * labels)
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    gradh2 = scores - labels

    gradW2 = a1.T.dot(gradh2)
    gradb2 = np.sum(gradh2, axis=0)

    grada1 = gradh2.dot(W2.T)

    gradh1 = grada1 * sigmoid_grad(a1)

    gradW1 = data.T.dot(gradh1)
    gradb1 = np.sum(gradh1, axis=0)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #39
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    z1 = np.dot(data, W1) + b1
    h = sigmoid(z1)
    y = softmax(np.dot(h, W2) + b2)
    # print y.shape
    # print labels.shape

    cost = -np.sum(np.multiply(np.log(y), labels))
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    # print W2.shape
    delta1 = y - labels

    gradW2 = np.dot(h.transpose(), delta1)
    gradb2 = np.sum(delta1, axis=0)

    delta2 = np.multiply(np.dot(delta1, W2.transpose()), sigmoid_grad(h))
    gradW1 = np.dot(data.transpose(), delta2)
    gradb1 = np.sum(delta2, axis=0)

    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    fc_out = np.dot(data, W1) + b1  # shape (M, H)
    fc_sigmoid_out = sigmoid(fc_out)  # shape (M, H)
    scores = np.dot(fc_sigmoid_out, W2) + b2  # shape (M, Dy)
    y_hat = softmax(scores)  # shape (M, Dy)
    # M = data.shape[0]
    cost = -np.sum(labels * np.log(y_hat))  # / M
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    dscores = y_hat - labels  # / M  # shape (M, Dy)

    gradW2 = np.dot(fc_sigmoid_out.T, dscores)  # shape (H, Dy)
    gradb2 = np.sum(dscores, axis=0)  # shape (Dy,)
    dfc_sigmoid_out = np.dot(dscores, W2.T)  # shape (M, H)
    dfc_out = dfc_sigmoid_out * sigmoid_grad(fc_sigmoid_out)  # shape (M, H)

    gradW1 = np.dot(data.T, dfc_out)  # shape (Dx, H)
    gradb1 = np.sum(dfc_out, axis=0)  # shape (H,)

    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #41
0
def sigmoid_backward(dout, cache):
    """
    Computes the backward pass for an sigmoid layer.

    Inputs:
    - dout: Upstream derivative, same shape as the input
            to the sigmoid layer (x)
    - cache: sigmoid(x)
    Returns a tuple of:
    - dx: back propagated gradient with respect to x
    """
    x = cache
    return sigmoid_grad(x) * dout
Exemple #42
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    h = sigmoid(np.dot(data,W1) + b1)
    yhat = softmax(np.dot(h,W2) + b2)
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    cost = np.sum(-np.log(yhat[labels==1])) / data.shape[0]

    d3 = (yhat - labels) / data.shape[0]
    gradW2 = np.dot(h.T, d3)
    gradb2 = np.sum(d3,0,keepdims=True)

    dh = np.dot(d3,W2.T)
    grad_h = sigmoid_grad(h) * dh

    gradW1 = np.dot(data.T,grad_h)
    gradb1 = np.sum(grad_h,0)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
        gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Exemple #43
0
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### forward propagation
    N = data.shape[0]

    l1 = data.dot(W1) + b1
    h = sigmoid(l1)
    l2 = h.dot(W2) + b2
    y_hat = softmax(l2)

    cost = -np.sum(labels * np.log(y_hat)) / N # cross entropy
    
    ### backward propagation
    dl2 = y_hat - labels
    dW2 = np.dot(h.T, dl2)
    db2 = np.sum(dl2, axis=0)

    dh = np.dot(dl2, W2.T)

    dl1 = dh * sigmoid_grad(h)
    dW1 = np.dot(data.T, dl1)
    db1 = np.sum(dl1, axis=0)

    gradW2 = dW2/N
    gradb2 = db2/N
    gradW1 = dW1/N
    gradb1 = db1/N
    
    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
        gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad
Exemple #44
0
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    N = data.shape[0]
    Z1 = data.dot(W1) + b1     # (N, H)
    A1 = sigmoid(Z1)           # (N, H)
    scores = A1.dot(W2) + b2   # (N, Dy)
    probs = softmax(scores)    # (N, Dy)
    cost = -np.sum(np.log(probs[labels==1])) / N
    ### END YOUR CODE
    
    ### YOUR CODE HERE: backward propagation
    dscores = (probs - labels) / N
    dW2 = A1.T.dot(dscores)
    db2 = np.sum(dscores, axis=0)
    dA1 = dscores.dot(W2.T)
    dZ1 = sigmoid_grad(A1) * dA1
    dW1 = data.T.dot(dZ1)
    db1 = np.sum(dZ1, axis=0)
    
    gradW1 = dW1
    gradW2 = dW2
    gradb1 = db1
    gradb2 = db2
    ### END YOUR CODE
    
    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
        gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    # print data.shape, W1.shape, b1.shape, W2.shape, b2.shape, labels.shape
    # (20, 10) (10, 5) (1, 5) (5, 10) (1, 10) (20, 10)
    z1 = data.dot(W1) + b1
    h = sigmoid(z1)
    z2 = h.dot(W2) + b2
    y = softmax(z2)
    cost = -1 * np.sum(np.log(y) * labels)
    ### END YOUR CODE
    
    ### YOUR CODE HERE: backward propagation
    dEdz2 = y - labels
    dEdh = dEdz2.dot(W2.T)

    gradW2 = h.T.dot(dEdz2)
    gradb2 = np.sum(dEdz2, axis = 0)

    dEdz1 = dEdh * sigmoid_grad(h)
    dEdx = dEdz1.dot(W1.T)

    gradW1 = data.T.dot(dEdz1)
    gradb1 = np.sum(dEdz1, axis = 0)
    ### END YOUR CODE
    
    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
        gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    N, D = data.shape
    
    h = sigmoid(data.dot(W1) + b1)
    scores = softmax(h.dot(W2) + b2)
    cost = np.sum(- np.log(scores[labels == 1])) / N
    ### END YOUR CODE
    
    ### YOUR CODE HERE: backward propagation
    dscores = scores - labels  # good
    
    dscores /= N
    
    gradb2 = np.sum(dscores, axis=0)
    gradW2 = np.dot(h.T, dscores)
    
    
    grad_h = np.dot(dscores, W2.T)
    grad_h = sigmoid_grad(h) * grad_h
    
    gradb1 = np.sum(grad_h, axis=0)
    gradW1 = np.dot(data.T, grad_h)
    ### END YOUR CODE
    
    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
        gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad
Exemple #47
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    z = np.dot(data,W1) + b1
    a = sigmoid(z)
    z2 = np.dot(a,W2) + b2
    a2 = softmax(z2)
    # correct = np.argmax(labels)
    cost = -np.sum(np.log(a2) * labels)
    # print "sizes: "
    # print cost
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    d1 = a2 - labels #Dy x 1
    d2 = np.dot(d1,W2.T) # 1
    d3 = np.multiply(d2,sigmoid_grad(a))
    # print "sizes: " + str(d3.shape)
    gradW2 = np.dot(a.T,d1)
    gradb2 = np.sum(d1,axis=0)
    gradW1 = np.dot(data.T,d3)
    gradb1 = np.sum(d3,axis=0)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), gradW2.flatten(), gradb2.flatten()))
    # grad = 1

    return cost, grad
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    # Dimension of data: N * Dx
    # Dimension of W1: Dx * H
    # Dimension of W2: H * Dy
    hidden = sigmoid(data.dot(W1) + b1) # Dimensions: (N*Dx) * (Dx*H)
    prediction = softmax(hidden.dot(W2) + b2) # Dimensions: (N*H) * (H*Dy)
    cost = -np.sum(np.log(prediction) * labels) # Dimensions: 1
    ### END YOUR CODE
    
    ### YOUR CODE HERE: backward propagation
    delta = prediction - labels # Dimesions: N * Dy
    # first backpropagate into parameters W2 and b2
    gradW2 = np.dot(hidden.T, delta) # Dimensions: H * Dy
    gradb2 = np.sum(delta, axis=0,keepdims=True) 
    # next backprop into hidden layer
    gradHidden = np.dot(delta, W2.T) * sigmoid_grad(hidden) # Dimensions: (N x Dy) x (Dy x H) = 20 x 5
    # finally into W1, b1
    gradW1 = np.dot(data.T, gradHidden) # Dimensions: Dx * H
    gradb1 = np.sum(gradHidden, axis=0,keepdims=True)
    ### END YOUR CODE
    
    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
        gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    # raise NotImplementedError
    # 前向计算labels(y)和cost
    hidden = sigmoid(np.dot(data, W1) + b1)
    prediction = softmax(np.dot(hidden, W2) + b2)
    cost = - np.sum(labels * np.log(prediction))
    ### END YOUR CODE
    
    ### YOUR CODE HERE: backward propagation
    # raise NotImplementedError
    # 后向计算各参数的梯度
    delta1 = prediction - labels
    gradW2 = np.dot(hidden.T, delta1)
    gradb2 = np.sum(delta1, axis=0)
    delta2 = np.dot(delta1, W2.T)
    delta3 = delta2 * sigmoid_grad(hidden)
    gradW1 = np.dot(data.T, delta3)
    gradb1 = np.sum(delta3, axis=0)
    ### END YOUR CODE
    
    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
        gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad
Exemple #50
0
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    a1 = sigmoid(data.dot(W1)+b1)
    a2 = softmax(a1.dot(W2)+b2)
    
    cost = -np.sum(a2*np.log(labels))
    ### END YOUR CODE
    
    ### YOUR CODE HERE: backward propagation
    grad_a2 = (a2-labels) / data.shape[0]
    
    grad_W2 = a1.T.dot(grad_a2)
    grad_b2 = np.sum(grad_a2, axis=0, keepdims=True)
    
    grad_a1 = grad_a2.dot(W2.T)*sigmoid_grad(a1)
    
    grad_W1 = data.T.dot(grad_a1)
    grad_b1 = np.sum(grad_a1, axis=0, keepdims=True)
    ### END YOUR CODE
    
    ### Stack gradients (do not modify)
    grad = np.concatenate((grad_W1.flatten(), grad_b1.flatten(), 
        grad_W2.flatten(), grad_b2.flatten()))
    
    return cost, grad
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    z1 = np.dot(data , W1) + b1
    f1 = sigmoid(z1)
    z2 = np.dot(f1, W2) + b2
    ### END YOUR CODE

    output = softmax(z2)

    cost = - sum(np.log(np.sum(output * labels, axis = 1)))

    ### YOUR CODE HERE: backward propagation
    delta = output - labels
    gradW2 = np.dot(f1.T, delta)
    gradb2 = np.sum(delta, axis=0)
    delta1 =  delta.dot(W2.T) * sigmoid_grad(f1)
    gradb1 = np.sum(delta1, axis=0)
    gradW1 =  np.dot(data.T, delta1)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
        gradW2.flatten(), gradb2.flatten()))

    return cost, grad
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    #pdb.set_trace()
    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    h1 = np.dot(data, W1)+b1
    a1 = sigmoid(h1)
    scores = np.dot(a1, W2)+b2
    probs = softmax(scores)
    
    cost = np.sum(-scores*labels+labels*np.log(np.sum(np.exp(scores), axis=1, keepdims=True)))

    #gradscores = -labels+np.exp(scores)/np.sum(np.exp(scores), axis=1, keepdims=True)
    gradscores = -labels+probs

    gradb2 = np.sum(gradscores, axis=0)
    gradW2 = np.dot(a1.T,gradscores)
    grada1 = np.dot(gradscores,W2.T)

    gradh1 = grada1*sigmoid_grad(a1)
    gradb1 = np.sum(gradh1, axis=0)
    gradW1 = np.dot(data.T,gradh1)
    
    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
        gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """
    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
    
    ### YOUR CODE HERE: forward propagation
    out1 = data.dot(W1) + b1          #(N,D) * (D,H) + (H,)
    out1_act = sigmoid(out1)    
    out2 = out1_act.dot(W2) + b2            # (N,Dy)
    score = softmax(out2)
    cost = np.sum(-1 * labels * np.log(score)) 
    ### END YOUR CODE
    
    ### YOUR CODE HERE: backward propagation
    dscore = score - labels
    gradW2 = out1_act.T.dot(dscore)
    gradb2 = np.sum(dscore,axis=0)
    dout1_act = dscore.dot(W2.T)      #(N,dy) * (Dy,H) = (N,H)
    dout1 = sigmoid_grad(out1_act)*(dout1_act)    
    gradW1 = data.T.dot(dout1)
    gradb1 = np.sum(dout1,axis=0)
     
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
        gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad
Exemple #54
0
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H)) # Dx * H
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy)) # H * Dy
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    h1 = np.dot(data, W1) + b1  # N * H
    h1 = sigmoid(h1)
    h2 = np.dot(h1, W2) + b2    # N * Dy
    y_hat = softmax(h2)
    cost = -np.sum(np.multiply(np.log(y_hat), labels))
    ### END YOUR CODE
    
    ### YOUR CODE HERE: backward propagation
    phi = y_hat - labels # N * Dy
    gradW2 = np.dot(h1.T,  phi)  # H * N * N * Dy = H * Dy
    gradb2 = np.sum(phi, 0, keepdims=True) # 1 * Dy
    dhidden = np.dot(phi, W2.T) * sigmoid_grad(h1) # N * H
    gradW1 = np.dot(data.T, dhidden) # Dx * N * N * H
    gradb1 = np.sum(dhidden, 0, keepdims=True)
    ### END YOUR CODE
    
    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
        gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad
Exemple #55
0
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    z_1 = data.dot(W1) + b1
    h = sigmoid(z_1)
    z_2 = h.dot(W2) + b2
    y_hat = softmax(z_2)
    cost = -np.sum(np.log(y_hat) * labels)
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    delta_3 = y_hat - labels
    gradW2 = h.T.dot(delta_3)
    gradb2 = np.sum(delta_3, axis=0)
    delta_2 = delta_3.dot(W2.T) * sigmoid_grad(h)
    gradW1 = data.T.dot(delta_2)
    gradb1 = np.sum(delta_2, axis=0)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
    n_sample,_ = data.shape

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### forward pass
    hiddens = sigmoid(data.dot(W1) + b1)
    probs = softmax(hiddens.dot(W2) + b2)
    true_labels = np.argmax(labels, axis = 1)
    cost = -1.0 * np.sum(np.log(probs[range(n_sample),true_labels])) / n_sample
    ### backward pass
    dscores = probs
    dscores[range(n_sample),true_labels] -= 1
    dscores /= n_sample
    gradW2 = np.dot(hiddens.T, dscores)
    gradb2 = np.sum(dscores,axis = 0)
    gradHiddens = np.dot(dscores, W2.T) * sigmoid_grad(hiddens)
    gradW1 = np.dot(data.T, gradHiddens)
    gradb1 = np.sum(gradHiddens, axis = 0)

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
        gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad
Exemple #57
0
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))



    h1 = sigmoid(np.dot(data,W1) + b1)
    out = softmax(np.dot(h1,W2) + b2)
    cost=-np.sum(np.log(out)*labels)

    dout = np.copy(out)
    dout-=labels
    dh1 = dout.dot(W2.T)
    gradW2 = h1.T.dot(dout)
    gradb2=np.sum(dout,axis=0)

    dsigmoid = sigmoid_grad(h1)
    dh1*=dsigmoid
    gradW1 =data.T.dot(dh1)
    gradb1=np.sum(dh1,axis=0)
    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
        gradW2.flatten(), gradb2.flatten()))

    return cost, grad
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
    
    z1 = np.dot(data, W1) + b1
    h = sigmoid(z1)
    z2 = np.dot(h, W2) + b2
    y_hat = softmax(z2)

    cost = -np.sum( labels * np.log(y_hat) )
    
    delta2 = y_hat - labels
    gradW2 = np.dot(h.T, delta2)
    gradb2 = np.sum(delta2, axis=0)
    
    delta1 = np.dot(delta2, W2.T)*sigmoid_grad(h)
    gradW1 = np.dot(data.T, delta1)
    gradb1 = np.sum(delta1, axis=0)
    
    ### Stack gradients
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad
Exemple #59
0
def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    labels = labels.astype("int64")
    a_1_0 = data.dot(W1) + b1
    a_1   = sigmoid(a_1_0)
    a_2_0 = (a_1.dot(W2) + b2)

    loss, dx = softmax_loss(a_2_0, labels)

    gradb2 = np.sum(dx, axis=0, keepdims=True)
    gradW2 = a_1.T.dot(dx)
    da_1 = sigmoid_grad(a_1)*dx.dot(W2.T)
    gradb1 = np.sum(da_1, axis=0, keepdims=True)
    gradW1 = data.T.dot(da_1)

    # fb2 = lambda x: (softmax_loss(a_1.dot(W2)+x, labels)[0])
    # print "+++++++++++++++++++++++++++"
    # print gradb2
    # print "---------------------------"
    # print gradcheck(fb2,b2)
    # print "***************************"

    # fW2 = lambda x: (softmax_loss(a_1.dot(x)+b2, labels)[0])
    # print "+++++++++++++++++++++++++++"
    # print gradW2
    # print "---------------------------"
    # print gradcheck(fW2,W2)
    # print "***************************"

    assert(gradb2.shape == b2.shape)
    assert(gradW2.shape == W2.shape)
    assert(gradb1.shape == b1.shape)
    assert(gradW1.shape == W1.shape)

    cost = loss
    ### END YOUR CODE
    
    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
        gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad
Exemple #60
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
    N = data.shape[0]

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    hidden   = np.dot(data,W1) + b1
    layer1_a = sigmoid(hidden)
    layer2   = np.dot(layer1_a, W2) + b2
    # need to calculate the softmax loss
    probs = softmax(layer2)
    cost  = - np.sum(np.log(probs[np.arange(N), np.argmax(labels, axis=1)]))
    ### END YOUR CODE
    
    ### YOUR CODE HERE: backward propagation
    #There is no regularization :/
    # dx -> sigmoid -> W2 * layer1_a + b -> sigmoid -> W1 * data + b1 -> ..
    dx     = probs.copy()
    dx    -= labels

    dlayer2   = np.zeros_like(dx)
    gradW2    = np.zeros_like(W2)
    gradW1    = np.zeros_like(W1)
    gradb2    = np.zeros_like(b2)
    gradb1    = np.zeros_like(b1)

    gradW2    = np.dot(layer1_a.T, dx)
    gradb2    = np.sum(dx, axis=0)
    dlayer2   = np.dot(dx, W2.T)
    dlayer1   = sigmoid_grad(layer1_a) * dlayer2
    gradW1    = np.dot(data.T, dlayer1)
    gradb1    = np.sum(dlayer1, axis=0)

    # Decided to implement affine (forward and backward function)
    #                      sigmoid (forward and backward function)
    # These should work properly;
    # scores, cache_1  = affine_forward(data, W1, b1)
    # scores, cache_s1 = sigmoid_forward(scores)
    # scores, cache_2  = affine_forward(scores, W2, b2)

    # # need to calculate the softmax loss
    # probs = softmax(scores)
    # cost  = -np.sum(np.log(probs[np.arange(N), np.argmax(labels)] + 1e-12)) / N
    # softmax_dx    = probs.copy()
    # softmax_dx[np.arange(N), np.argmax(labels,axis=1)] -= 1
    # softmax_dx /= N

    # grads = {}

    # dlayer2, grads['W2'], grads['b2'] = affine_backward(softmax_dx, cache_2)
    # dlayer1s                          = sigmoid_backward(dlayer2, cache_s1)
    # dlayer1, grads['W1'], grads['b1'] = affine_backward(dlayer1s, cache_1)
    #softmax_dx is the gradient of the loss w.r.t. y_{est}
    ### END YOUR CODE
    
    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
        gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad