Beispiel #1
0
def nplm_cost_gradient(parameters, input, output):
    """
    Cost function for NPLM
    :param parameters: tuple of (W, U, H, C)
    :param input: indices of context word
    :param output: index of current word
    :return: cost and gradient
    """
    W, U, H, C = parameters
    context_size = len(input)
    x = np.concatenate([C[input[i]] for i in range(context_size)])
    x = np.append(x, 1.)  # Append bias term
    x = x.reshape(-1, 1)
    hidden_layer = np.tanh(H.dot(x))
    y = W.dot(x) + U.dot(hidden_layer)
    prediction = softmax(y.reshape(-1)).reshape(-1, 1)
    cost = -np.sum(np.log(prediction[output]))

    one_hot = np.zeros_like(prediction)
    one_hot[output] = 1
    delta = prediction - one_hot
    gradient_W = delta.dot(x.T)
    gradient_U = delta.dot(hidden_layer.T)
    gradient_H = tanh_gradient(hidden_layer) * U.T.dot(delta).dot(x.T)
    gradient_C = np.zeros_like(C)
    gradient_y_x = W + U.dot(tanh_gradient(hidden_layer) * H)
    gradient_x = gradient_y_x.T.dot(delta)
    gradient_x = gradient_x[:-1, :]

    gradient_x_split = np.split(gradient_x, context_size)
    for i in range(context_size):
        gradient_C[input[i]] += gradient_x_split[i].flatten()

    gradient = [gradient_W, gradient_U, gradient_H, gradient_C]
    return cost, gradient
Beispiel #2
0
def nplm_cost_gradient(parameters, input, output):
    """
    Cost function for NPLM
    :param parameters: tuple of (W, U, H, C)
    :param input: indices of context word
    :param output: index of current word
    :return: cost and gradient
    """
    W, U, H, C = parameters
    context_size = len(input)
    x = np.concatenate([C[input[i]] for i in range(context_size)])
    x = np.append(x, 1.)    # Append bias term
    x = x.reshape(-1, 1)
    hidden_layer = np.tanh(H.dot(x))
    y = W.dot(x) + U.dot(hidden_layer)
    prediction = softmax(y.reshape(-1)).reshape(-1, 1)
    cost = -np.sum(np.log(prediction[output]))

    one_hot = np.zeros_like(prediction)
    one_hot[output] = 1
    delta = prediction - one_hot
    gradient_W = delta.dot(x.T)
    gradient_U = delta.dot(hidden_layer.T)
    gradient_H = tanh_gradient(hidden_layer) * U.T.dot(delta).dot(x.T)
    gradient_C = np.zeros_like(C)
    gradient_y_x = W + U.dot(tanh_gradient(hidden_layer) * H)
    gradient_x = gradient_y_x.T.dot(delta)
    gradient_x = gradient_x[:-1, :]

    gradient_x_split = np.split(gradient_x, context_size)
    for i in range(context_size):
        gradient_C[input[i]] += gradient_x_split[i].flatten()

    gradient = [gradient_W, gradient_U, gradient_H, gradient_C]
    return cost, gradient
Beispiel #3
0
def neural_network_cost_gradient(parameters, input, output):
    """
    3-layer network cost and gradient function
    :param parameters: pair of (W1, W2)
    :param input: input vector
    :param output: index to correct label
    :return: cross entropy cost and gradient
    """
    W1, W2 = parameters
    input = input.reshape(-1, 1)

    hidden_layer = expit(W1.dot(input))
    inside_softmax = W2.dot(hidden_layer)

    # TODO: allow softmax to normalize column vector
    prediction = softmax(inside_softmax.reshape(-1)).reshape(-1, 1)
    cost = -np.sum(np.log(prediction[output]))

    one_hot = np.zeros_like(prediction)
    one_hot[output] = 1
    delta = prediction - one_hot
    gradient_W2 = delta.dot(hidden_layer.T)
    gradient_W1 = sigmoid_gradient(hidden_layer) * W2.T.dot(delta).dot(input.T)

    gradient = [gradient_W1, gradient_W2]
    return cost, gradient
    def assertMultinomialLogisticRegression(self, sampler):
        data_size = 3
        input_size = 5
        output_size = 4
        inputs = np.random.uniform(-10.0, 10.0, size=(data_size, input_size))
        outputs = np.random.randint(0, output_size, size=data_size)
        initial_parameters = np.random.normal(size=(input_size, output_size))

        # Create cost and gradient function for gradient descent and check its gradient
        cost_gradient = bind_cost_gradient(
            multinomial_logistic_regression_cost_gradient,
            inputs,
            outputs,
            sampler=sampler)
        result = gradient_check(cost_gradient, initial_parameters)
        self.assertEqual([], result)

        # Train multinomial logistic regression and see if it predicts correct labels
        final_parameters, cost_history = gradient_descent(
            cost_gradient, initial_parameters, 100)
        predictions = np.argmax(softmax(np.dot(final_parameters.T, inputs.T)),
                                axis=0)

        for output, prediction in zip(outputs, predictions):
            self.assertEqual(output, prediction)
Beispiel #5
0
    def test_softmax(self):
        # softmax should receive numpy array and return normalized vector
        expect = np.array([exp(1) / (exp(1) + exp(2)), exp(2) / (exp(1) + exp(2))])
        actual = softmax(np.array([1, 2]))
        self.assertDistribution(actual)
        self.assertNumpyEqual(expect, actual)

        # softmax should be invariant to constant offsets in the input
        # softmax should be able to handle very large or small values
        actual = softmax(np.array([1001, 1002]))
        self.assertNumpyEqual(expect, actual)
        actual = softmax(np.array([-1002, -1001]))
        self.assertNumpyEqual(expect, actual)

        # softmax should receive matrix and return matrix of same size
        expect = np.array([[exp(1) / (exp(1) + exp(2)), exp(2) / (exp(1) + exp(2))],
                           [exp(1) / (exp(1) + exp(2)), exp(2) / (exp(1) + exp(2))]])
        actual = softmax(np.array([[1, 2], [3, 4]]))
        self.assertNumpyEqual(expect, actual)
Beispiel #6
0
    def test_neural_network(self):
        np.random.seed(0)
        input_size = 2
        hidden_size = 2
        output_size = 2

        # Classic XOR test data
        inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
        outputs = np.array([0, 1, 1, 0])

        # Create cost and gradient function for gradient descent
        shapes = [(hidden_size, (input_size)), (output_size, (hidden_size))]
        flatten_neural_network_cost_gradient = flatten_cost_gradient(
            neural_network_cost_gradient, shapes)
        cost_gradient = bind_cost_gradient(
            flatten_neural_network_cost_gradient,
            inputs,
            outputs,
            sampler=batch_sampler)

        # Check gradient with initial parameters
        parameters_size = np.sum(np.product(shape) for shape in shapes)
        initial_parameters = np.random.normal(size=parameters_size)
        result = gradient_check(cost_gradient, initial_parameters)
        self.assertEqual([], result)

        # Train neural network (this is slow even such a simple task!)
        final_parameters, cost_history = gradient_descent(
            cost_gradient, initial_parameters, 1000)

        # Check if cost monotonically decrease (no guarantee in theory, but works in practice)
        previous_cost = None
        for cost in cost_history:
            if previous_cost is not None:
                self.assertLessEqual(cost, previous_cost)
            previous_cost = cost

        # TODO: extract duplicated code for prediction to reusable component
        split_index = hidden_size * (input_size)
        W1, W2 = np.split(final_parameters, [split_index])
        W1 = W1.reshape((hidden_size, input_size))
        W2 = W2.reshape((output_size, hidden_size))

        for input, output in zip(inputs, outputs):
            input = input.reshape(-1, 1)
            hidden_layer = expit(W1.dot(input))
            inside_softmax = W2.dot(hidden_layer)
            prediction = softmax(inside_softmax.reshape(-1)).reshape(-1, 1)
            label = np.argmax(prediction)

            # Check if output is correctly predicted
            self.assertEqual(output, label)
Beispiel #7
0
    def test_softmax(self):
        # softmax should receive numpy array and return normalized vector
        expect = np.array(
            [exp(1) / (exp(1) + exp(2)),
             exp(2) / (exp(1) + exp(2))])
        actual = softmax(np.array([1, 2]))
        self.assertDistribution(actual)
        self.assertNumpyEqual(expect, actual)

        # softmax should be invariant to constant offsets in the input
        # softmax should be able to handle very large or small values
        actual = softmax(np.array([1001, 1002]))
        self.assertNumpyEqual(expect, actual)
        actual = softmax(np.array([-1002, -1001]))
        self.assertNumpyEqual(expect, actual)

        # softmax should receive matrix and return matrix of same size
        expect = np.array(
            [[exp(1) / (exp(1) + exp(2)),
              exp(2) / (exp(1) + exp(2))],
             [exp(1) / (exp(1) + exp(2)),
              exp(2) / (exp(1) + exp(2))]])
        actual = softmax(np.array([[1, 2], [3, 4]]))
        self.assertNumpyEqual(expect, actual)
def multinomial_logistic_regression_cost_gradient(parameters, input, output):
    """
    Cost and gradient for multinomial logistic regression
    :param parameters: weight vector
    :param input: feature vector
    :param output: integer label
    :return: cost and gradient for the input and output
    """
    prediction = softmax(np.dot(parameters.T, input))
    cost = -np.log(prediction[output])
    # Create one-hot vector
    one_hot = np.zeros_like(prediction)
    one_hot[output] = 1
    gradient = np.dot(input.reshape(-1, 1), (prediction - one_hot).reshape(-1, 1).T)
    return cost, gradient
def multinomial_logistic_regression_cost_gradient(parameters, input, output):
    """
    Cost and gradient for multinomial logistic regression
    :param parameters: weight vector
    :param input: feature vector
    :param output: integer label
    :return: cost and gradient for the input and output
    """
    prediction = softmax(np.dot(parameters.T, input))
    cost = -np.log(prediction[output])
    # Create one-hot vector
    one_hot = np.zeros_like(prediction)
    one_hot[output] = 1
    gradient = np.dot(input.reshape(-1, 1),
                      (prediction - one_hot).reshape(-1, 1).T)
    return cost, gradient
Beispiel #10
0
    def test_neural_network(self):
        np.random.seed(0)
        input_size = 2
        hidden_size = 2
        output_size = 2

        # Classic XOR test data
        inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
        outputs = np.array([0, 1, 1, 0])

        # Create cost and gradient function for gradient descent
        shapes = [(hidden_size, (input_size)), (output_size, (hidden_size))]
        flatten_neural_network_cost_gradient = flatten_cost_gradient(neural_network_cost_gradient, shapes)
        cost_gradient = bind_cost_gradient(flatten_neural_network_cost_gradient, inputs, outputs, sampler=batch_sampler)

        # Check gradient with initial parameters
        parameters_size = np.sum(np.product(shape) for shape in shapes)
        initial_parameters = np.random.normal(size=parameters_size)
        result = gradient_check(cost_gradient, initial_parameters)
        self.assertEqual([], result)

        # Train neural network (this is slow even such a simple task!)
        final_parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 1000)

        # Check if cost monotonically decrease (no guarantee in theory, but works in practice)
        previous_cost = None
        for cost in cost_history:
            if previous_cost is not None:
                self.assertLessEqual(cost, previous_cost)
            previous_cost = cost

        # TODO: extract duplicated code for prediction to reusable component
        split_index = hidden_size * (input_size)
        W1, W2 = np.split(final_parameters, [split_index])
        W1 = W1.reshape((hidden_size, input_size))
        W2 = W2.reshape((output_size, hidden_size ))

        for input, output in zip(inputs, outputs):
            input = input.reshape(-1, 1)
            hidden_layer = expit(W1.dot(input))
            inside_softmax = W2.dot(hidden_layer)
            prediction = softmax(inside_softmax.reshape(-1)).reshape(-1, 1)
            label = np.argmax(prediction)

            # Check if output is correctly predicted
            self.assertEqual(output, label)
    def assertMultinomialLogisticRegression(self, sampler):
        data_size = 3
        input_size = 5
        output_size = 4
        inputs = np.random.uniform(-10.0, 10.0, size=(data_size, input_size))
        outputs = np.random.randint(0, output_size, size=data_size)
        initial_parameters = np.random.normal(size=(input_size, output_size))

        # Create cost and gradient function for gradient descent and check its gradient
        cost_gradient = bind_cost_gradient(multinomial_logistic_regression_cost_gradient,
                                           inputs, outputs, sampler=sampler)
        result = gradient_check(cost_gradient, initial_parameters)
        self.assertEqual([], result)

        # Train multinomial logistic regression and see if it predicts correct labels
        final_parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 100)
        predictions = np.argmax(softmax(np.dot(final_parameters.T, inputs.T)), axis=0)

        for output, prediction in zip(outputs, predictions):
            self.assertEqual(output, prediction)
Beispiel #12
0
def softmax_cost_gradient(parameters, input, output):
    """
    Softmax cost and gradient function for word2vec models
    :param parameters: word vectors for input and output (shape: (2, vocabulary_size, vector_size))
    :param input: index to input word vectors
    :param output: index to output word vectors
    :return: cross entropy cost and gradient
    """
    input_vectors, output_vectors = parameters
    input_vector = input_vectors[input]
    prediction = softmax(output_vectors.dot(input_vector))

    one_hot_vector = np.zeros_like(prediction)
    one_hot_vector[output] = 1

    gradient_input = np.zeros_like(input_vectors)
    gradient_input[input] = output_vectors.T.dot(prediction - one_hot_vector)
    gradient_output = (prediction - one_hot_vector).reshape(-1, 1).dot(input_vector.reshape(-1, 1).T)
    gradient = np.array([gradient_input, gradient_output])

    cost = -np.log(prediction[output])
    return cost, gradient
    def test_multinomial_logistic_regression(self):
        input_size = 10
        output_size = 5
        input = np.random.normal(size=(input_size,))
        output = np.random.randint(0, output_size)

        def multinomial_logistic_regression_wrapper(parameters):
            return multinomial_logistic_regression_cost_gradient(parameters, input, output)

        initial_parameters = np.random.normal(size=(input_size, output_size))
        result = gradient_check(multinomial_logistic_regression_wrapper, initial_parameters)
        self.assertEqual([], result)

        # Train multinomial logistic regression and see if it predicts correct label
        final_parameters, cost_history = gradient_descent(
            multinomial_logistic_regression_wrapper, initial_parameters, 100)
        prediction = softmax(np.dot(final_parameters.T, input)) > 0.5
        for i in range(len(prediction)):
            if output == i:
                self.assertEqual(1, prediction[i])
            else:
                self.assertEqual(0, prediction[i])
    def test_multinomial_logistic_regression(self):
        input_size = 10
        output_size = 5
        input = np.random.normal(size=(input_size, ))
        output = np.random.randint(0, output_size)

        def multinomial_logistic_regression_wrapper(parameters):
            return multinomial_logistic_regression_cost_gradient(
                parameters, input, output)

        initial_parameters = np.random.normal(size=(input_size, output_size))
        result = gradient_check(multinomial_logistic_regression_wrapper,
                                initial_parameters)
        self.assertEqual([], result)

        # Train multinomial logistic regression and see if it predicts correct label
        final_parameters, cost_history = gradient_descent(
            multinomial_logistic_regression_wrapper, initial_parameters, 100)
        prediction = softmax(np.dot(final_parameters.T, input)) > 0.5
        for i in range(len(prediction)):
            if output == i:
                self.assertEqual(1, prediction[i])
            else:
                self.assertEqual(0, prediction[i])