Exemple #1
0
def negSamplingCostAndGradient(dataset, predicted, target, outputVectors, K=10):
    """ Negative sampling cost function for word2vec models """
    ###################################################################
    # Implement the cost and gradients for one predicted word vector  #
    # and one target word vector as a building block for word2vec     #
    # models, using the negative sampling technique. K is the sample  #
    # size. You might want to use dataset.sampleTokenIdx() to sample  #
    # a random word index.                                            #
    # Input/Output Specifications: same as softmaxCostAndGradient     #
    # We will not provide starter code for this function, but feel    #
    # free to reference the code you previously wrote for this        #
    # assignment!                                                     #
    ###################################################################

    ### YOUR CODE HERE
    samples = []
    sample_indices = []
    for i in range(0,10):
        index = dataset.sampleTokenIdx()
        samples.append(outputVectors[index])
        sample_indices.append(index)

    samples = np.array(samples)
    N, D = outputVectors.shape

    # cost = - log(\sigma(v^{i-C+j} \dot h)) + \sum_{k=1}^{K} log(\sigma(v^{(k)} \dot h))
    samples_dot_predicted = sigmoid(samples.dot(predicted.reshape((D,1))))
    predicted_dot_target = sigmoid(predicted.dot(outputVectors[target]))
    cost = -1.0 * np.sum(np.log(samples_dot_predicted)) - np.log(predicted_dot_target)

    # Derivative w.r.t. predicted (h)
    # -(\frac{1}{\sigma(v^{i-C+j} \dot h)})(\grad sigmoid(v^{i-C+j} \dot (h)))(v^{i-C+j})
    # => - (1-sigmoid(samples \dot (predicted)))(samples)
    # + (1-sigmoid(sampled \dot predicted))()
    # import pdb;pdb.set_trace()
    # import pdb; pdb.set_trace()

    sig = predicted_dot_target - 1.0
    gradPred = sig * outputVectors[target].reshape(1, D) + (1.0 - samples_dot_predicted).reshape(1, K).dot(samples)
    gradPred = gradPred.reshape(D,)

    grad = np.zeros(outputVectors.shape)
    grad[target, :] = predicted * sig
    for sample, k in zip(samples, sample_indices):
        grad[k, :] += -1.0 * predicted * (sigmoid(-1.0 * predicted.dot(sample)) - 1.0)

    ### END YOUR CODE


    assert grad.shape == outputVectors.shape
    assert gradPred.shape == predicted.shape

    return cost, gradPred, grad
def predict(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']

    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = softmax(a3)

    return y
Exemple #3
0
 def test_sigmoid(self):
     x = np.array([[1, 2], [-1, -2]])
     f = sigmoid(x)
     g = sigmoid_grad(f)
     np.testing.assert_array_almost_equal(
         f, np.array([[0.73105858, 0.88079708], [0.26894142, 0.11920292]]))
     np.testing.assert_array_almost_equal(
         g, np.array([[0.19661193, 0.10499359], [0.19661193, 0.10499359]]))
def compare_functions():
    x = np.arange(-5.0, 5.0, 0.1)
    y1 = sigmoid(x)
    y2 = step_function(x)
    y3 = relu(x)
    plt.plot(x, y1, label="sigmoid")
    plt.plot(x, y2, label="step", linestyle="--")
    plt.plot(x, y3, label="ReLU", linestyle=":")
    plt.ylim(-0.1, 1.1)
    plt.title("sigmoid & step")
    plt.legend()
    plt.show()
def forward(network, X):
    print(network)

    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    B1, B2, B3 = network['B1'], network['B2'], network['B3']

    A1 = np.dot(X, W1) + B1
    Z1 = sigmoid(A1)

    print(A1)
    print(Z1)

    A2 = np.dot(Z1, W2) + B2
    Z2 = sigmoid(A2)

    print(A2)
    print(Z2)

    A3 = np.dot(Z2, W3) + B3
    Y = softmax(A3)

    print(A3)

    return Y
Exemple #6
0
def forward_backward_prop(dimensions, data, labels, params):
    """ Forward and backward propagation for a two-layer sigmoidal network """
    ###################################################################
    # Compute the forward propagation and for the cross entropy cost, #
    # and backward propagation for the gradients for all parameters.  #
    ###################################################################

    ### Unpack network parameters (do not modify)
    t = 0
    W1 = np.reshape(params[t : t + dimensions[0] * dimensions[1]], (dimensions[0], dimensions[1]))
    t += dimensions[0] * dimensions[1]
    b1 = np.reshape(params[t : t + dimensions[1]], (1, dimensions[1]))
    t += dimensions[1]
    W2 = np.reshape(params[t : t + dimensions[1] * dimensions[2]], (dimensions[1], dimensions[2]))
    t += dimensions[1] * dimensions[2]
    b2 = np.reshape(params[t : t + dimensions[2]], (1, dimensions[2]))

    ### YOUR CODE HERE: forward propagation

    # cost = ...
    # labels is (20, 10) (20 1-hot vectors) - this is y
    # data is (20, 10) - this is x
    # W1 is (10, 5)
    # W2 is (5, 10)
    # b1 is (1, 5)
    # b2 is (1, 10)

    a = data.dot(W1) + b1
    h = sigmoid(a)  # hidden layer
    y_hat = softmax(h.dot(W2) + b2)  # Top classifier layer
    N, D = data.shape
    (Dx, H) = W1.shape

    # TODO: may need to change this to sum over rows and then sum up rows?
    # cost = np.sum(-np.sum(np.multiply(labels, np.log(y_hat)), axis=1).reshape((N, 1)))
    cost_per_datapoint = -np.sum(labels * np.log(y_hat), axis=1).reshape((N, 1))  # sum over rows
    cost = np.sum(cost_per_datapoint)

    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation

    # gradW1 = ...
    # gradb1 = ...
    # gradW2 = ...
    # gradb2 = ...

    # d_y_hat/d_W2
    J_theta = y_hat - labels
    # theta_W2 = h
    # theta_h = W2
    h_a = h * (1.0 - h)
    a_W1 = data
    y_hathw = J_theta.dot(W2.T) * h_a

    gradW2 = h.T.dot(J_theta)
    gradW1 = data.T.dot(y_hathw)
    # gradW1 = np.dot(data.T, np.dot(J_theta, theta_h.T) * h_a)
    gradb1 = np.sum(y_hathw, axis=0).reshape((1, H))
    gradb2 = np.sum(J_theta, axis=0).reshape((1, D))

    assert gradW1.shape == W1.shape
    assert gradb1.shape == b1.shape
    assert W2.shape == gradW2.shape
    assert gradb2.shape == b2.shape

    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), gradW2.flatten(), gradb2.flatten()))

    return cost, grad
def draw_sigmoid():
    x = np.arange(-5.0, 5.0, 0.1)
    y = sigmoid(x)
    plt.plot(x, y)
    plt.ylim(-0.1, 1.1)
    plt.show()
Exemple #8
0
 def test_sigmoid(self):
     x = np.array([[1, 2], [-1, -2]])
     f = sigmoid(x)
     g = sigmoid_grad(f)
     np.testing.assert_array_almost_equal(f, np.array([[0.73105858, 0.88079708], [0.26894142, 0.11920292]]))
     np.testing.assert_array_almost_equal(g, np.array([[0.19661193, 0.10499359], [0.19661193, 0.10499359]]))
Exemple #9
0
def forward_backward_prop(dimensions, data, labels, params):
    """ Forward and backward propagation for a two-layer sigmoidal network """
    ###################################################################
    # Compute the forward propagation and for the cross entropy cost, #
    # and backward propagation for the gradients for all parameters.  #
    ###################################################################

    ### Unpack network parameters (do not modify)
    t = 0
    W1 = np.reshape(params[t:t + dimensions[0] * dimensions[1]],
                    (dimensions[0], dimensions[1]))
    t += dimensions[0] * dimensions[1]
    b1 = np.reshape(params[t:t + dimensions[1]], (1, dimensions[1]))
    t += dimensions[1]
    W2 = np.reshape(params[t:t + dimensions[1] * dimensions[2]],
                    (dimensions[1], dimensions[2]))
    t += dimensions[1] * dimensions[2]
    b2 = np.reshape(params[t:t + dimensions[2]], (1, dimensions[2]))

    ### YOUR CODE HERE: forward propagation

    # cost = ...
    # labels is (20, 10) (20 1-hot vectors) - this is y
    # data is (20, 10) - this is x
    # W1 is (10, 5)
    # W2 is (5, 10)
    # b1 is (1, 5)
    # b2 is (1, 10)

    a = data.dot(W1) + b1
    h = sigmoid(a)  # hidden layer
    y_hat = softmax(h.dot(W2) + b2)  # Top classifier layer
    N, D = data.shape
    (Dx, H) = W1.shape

    # TODO: may need to change this to sum over rows and then sum up rows?
    # cost = np.sum(-np.sum(np.multiply(labels, np.log(y_hat)), axis=1).reshape((N, 1)))
    cost_per_datapoint = -np.sum(labels * np.log(y_hat), axis=1).reshape(
        (N, 1))  # sum over rows
    cost = np.sum(cost_per_datapoint)

    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation

    #gradW1 = ...
    #gradb1 = ...
    #gradW2 = ...
    #gradb2 = ...

    # d_y_hat/d_W2
    J_theta = y_hat - labels
    # theta_W2 = h
    # theta_h = W2
    h_a = h * (1.0 - h)
    a_W1 = data
    y_hathw = J_theta.dot(W2.T) * h_a

    gradW2 = h.T.dot(J_theta)
    gradW1 = data.T.dot(y_hathw)
    # gradW1 = np.dot(data.T, np.dot(J_theta, theta_h.T) * h_a)
    gradb1 = np.sum(y_hathw, axis=0).reshape((1, H))
    gradb2 = np.sum(J_theta, axis=0).reshape((1, D))

    assert gradW1.shape == W1.shape
    assert gradb1.shape == b1.shape
    assert W2.shape == gradW2.shape
    assert gradb2.shape == b2.shape

    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad