Exemple #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input_file', type=argparse.FileType())
    parser.add_argument('output_file', type=argparse.FileType('w'))
    parser.add_argument('vector_size', type=int)
    parser.add_argument('context_size', type=int)
    parser.add_argument('vocabulary_size', type=int)
    args = parser.parse_args()

    sentences = list(lower(tokenize(args.input_file)))
    dictionary = build_dictionary(sentences, args.vocabulary_size)
    indices = to_indices(sentences, dictionary)
    inputs, outputs = create_context(indices, args.context_size)

    cost_gradient = bind_cost_gradient(skip_gram_cost_gradient, inputs, outputs, sampler=get_stochastic_sampler(100))
    initial_parameters = np.random.normal(size=(2, len(dictionary) + 1, args.vector_size))
    parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 10000)
    input_vectors, output_vectors = parameters
    word_vectors = input_vectors + output_vectors
    sorted_pairs = sorted(dictionary.items(), key=operator.itemgetter(1))
    words = [word for word, index in sorted_pairs]

    for word in words:
        vector = word_vectors[dictionary[word]]
        vector_string = ' '.join(str(element) for element in vector)
        print(word, vector_string, file=args.output_file)
Exemple #2
0
    def train(self, sentences, iterations=1000):
        # Preprocess sentences to create indices of context and next words
        self.dictionary = build_dictionary(sentences, self.vocabulary_size)
        indices = to_indices(sentences, self.dictionary)
        self.reverse_dictionary = {
            index: word
            for word, index in self.dictionary.items()
        }
        inputs, outputs = self.create_context(indices)

        # Create cost and gradient function for gradient descent
        shapes = [self.W_shape, self.U_shape, self.H_shape, self.C_shape]
        flatten_nplm_cost_gradient = flatten_cost_gradient(
            nplm_cost_gradient, shapes)
        cost_gradient = bind_cost_gradient(flatten_nplm_cost_gradient,
                                           inputs,
                                           outputs,
                                           sampler=get_stochastic_sampler(10))

        # Train neural network
        parameters_size = np.sum(np.product(shape) for shape in shapes)
        initial_parameters = np.random.normal(size=parameters_size)
        self.parameters, cost_history = gradient_descent(
            cost_gradient, initial_parameters, iterations)
        return cost_history
Exemple #3
0
    def test_supervised_gradient_descent(self):
        def linear_regression_cost_gradient(parameters, input, output):
            prediction = np.dot(parameters, input)
            cost = (prediction - output)**2
            gradient = 2.0 * (prediction - output) * input
            return cost, gradient

        inputs = np.random.normal(0.0, size=(10, 2))
        outputs = np.random.normal(0.0, size=10)
        initial_parameters = np.random.uniform(-1.0, 1.0, size=2)

        # Create cost and gradient function for supervised SGD and check its gradient
        cost_gradient = bind_cost_gradient(linear_regression_cost_gradient,
                                           inputs,
                                           outputs,
                                           sampler=batch_sampler)
        result = gradient_check(cost_gradient, initial_parameters)
        self.assertEqual([], result)

        # Run gradient descent on the function and see if it minimizes cost function
        actual, cost_history = gradient_descent(cost_gradient,
                                                initial_parameters, 10)

        # Compute exact solution of linear regression by closed form
        expected = np.linalg.solve(np.dot(inputs.T, inputs),
                                   np.dot(inputs.T, outputs))

        for e, a in zip(expected, actual):
            self.assertAlmostEqual(e, a, places=0)
    def assertMultinomialLogisticRegression(self, sampler):
        data_size = 3
        input_size = 5
        output_size = 4
        inputs = np.random.uniform(-10.0, 10.0, size=(data_size, input_size))
        outputs = np.random.randint(0, output_size, size=data_size)
        initial_parameters = np.random.normal(size=(input_size, output_size))

        # Create cost and gradient function for gradient descent and check its gradient
        cost_gradient = bind_cost_gradient(
            multinomial_logistic_regression_cost_gradient,
            inputs,
            outputs,
            sampler=sampler)
        result = gradient_check(cost_gradient, initial_parameters)
        self.assertEqual([], result)

        # Train multinomial logistic regression and see if it predicts correct labels
        final_parameters, cost_history = gradient_descent(
            cost_gradient, initial_parameters, 100)
        predictions = np.argmax(softmax(np.dot(final_parameters.T, inputs.T)),
                                axis=0)

        for output, prediction in zip(outputs, predictions):
            self.assertEqual(output, prediction)
Exemple #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input_file', type=argparse.FileType())
    parser.add_argument('output_file', type=argparse.FileType('w'))
    parser.add_argument('vector_size', type=int)
    parser.add_argument('context_size', type=int)
    parser.add_argument('vocabulary_size', type=int)
    args = parser.parse_args()

    sentences = list(lower(tokenize(args.input_file)))
    dictionary = build_dictionary(sentences, args.vocabulary_size)
    indices = to_indices(sentences, dictionary)
    inputs, outputs = create_context(indices, args.context_size)

    cost_gradient = bind_cost_gradient(skip_gram_cost_gradient,
                                       inputs,
                                       outputs,
                                       sampler=get_stochastic_sampler(100))
    initial_parameters = np.random.normal(size=(2, len(dictionary) + 1,
                                                args.vector_size))
    parameters, cost_history = gradient_descent(cost_gradient,
                                                initial_parameters, 10000)
    input_vectors, output_vectors = parameters
    word_vectors = input_vectors + output_vectors
    sorted_pairs = sorted(dictionary.items(), key=operator.itemgetter(1))
    words = [word for word, index in sorted_pairs]

    for word in words:
        vector = word_vectors[dictionary[word]]
        vector_string = ' '.join(str(element) for element in vector)
        print(word, vector_string, file=args.output_file)
Exemple #6
0
    def test_supervised_gradient_descent(self):
        def linear_regression_cost_gradient(parameters, input, output):
            prediction = np.dot(parameters, input)
            cost = (prediction - output) ** 2
            gradient = 2.0 * (prediction - output) * input
            return cost, gradient

        inputs = np.random.normal(0.0, size=(10, 2))
        outputs = np.random.normal(0.0, size=10)
        initial_parameters = np.random.uniform(-1.0, 1.0, size=2)

        # Create cost and gradient function for supervised SGD and check its gradient
        cost_gradient = bind_cost_gradient(linear_regression_cost_gradient,
                                           inputs, outputs, sampler=batch_sampler)
        result = gradient_check(cost_gradient, initial_parameters)
        self.assertEqual([], result)

        # Run gradient descent on the function and see if it minimizes cost function
        actual, cost_history = gradient_descent(cost_gradient, initial_parameters, 10)

        # Compute exact solution of linear regression by closed form
        expected = np.linalg.solve(np.dot(inputs.T, inputs), np.dot(inputs.T, outputs))

        for e, a in zip(expected, actual):
            self.assertAlmostEqual(e, a, places=0)
Exemple #7
0
    def gradient_check(self, inputs, outputs):
        # Create cost and gradient function for gradient check
        shapes = [self.W_shape, self.U_shape, self.H_shape, self.C_shape]
        flatten_nplm_cost_gradient = flatten_cost_gradient(nplm_cost_gradient, shapes)
        cost_gradient = bind_cost_gradient(flatten_nplm_cost_gradient, inputs, outputs)

        # Gradient check!
        parameters_size = np.sum(np.product(shape) for shape in shapes)
        initial_parameters = np.random.normal(size=parameters_size)
        result = gradient_check(cost_gradient, initial_parameters)
        return result
Exemple #8
0
    def test_neural_network(self):
        np.random.seed(0)
        input_size = 2
        hidden_size = 2
        output_size = 2

        # Classic XOR test data
        inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
        outputs = np.array([0, 1, 1, 0])

        # Create cost and gradient function for gradient descent
        shapes = [(hidden_size, (input_size)), (output_size, (hidden_size))]
        flatten_neural_network_cost_gradient = flatten_cost_gradient(
            neural_network_cost_gradient, shapes)
        cost_gradient = bind_cost_gradient(
            flatten_neural_network_cost_gradient,
            inputs,
            outputs,
            sampler=batch_sampler)

        # Check gradient with initial parameters
        parameters_size = np.sum(np.product(shape) for shape in shapes)
        initial_parameters = np.random.normal(size=parameters_size)
        result = gradient_check(cost_gradient, initial_parameters)
        self.assertEqual([], result)

        # Train neural network (this is slow even such a simple task!)
        final_parameters, cost_history = gradient_descent(
            cost_gradient, initial_parameters, 1000)

        # Check if cost monotonically decrease (no guarantee in theory, but works in practice)
        previous_cost = None
        for cost in cost_history:
            if previous_cost is not None:
                self.assertLessEqual(cost, previous_cost)
            previous_cost = cost

        # TODO: extract duplicated code for prediction to reusable component
        split_index = hidden_size * (input_size)
        W1, W2 = np.split(final_parameters, [split_index])
        W1 = W1.reshape((hidden_size, input_size))
        W2 = W2.reshape((output_size, hidden_size))

        for input, output in zip(inputs, outputs):
            input = input.reshape(-1, 1)
            hidden_layer = expit(W1.dot(input))
            inside_softmax = W2.dot(hidden_layer)
            prediction = softmax(inside_softmax.reshape(-1)).reshape(-1, 1)
            label = np.argmax(prediction)

            # Check if output is correctly predicted
            self.assertEqual(output, label)
Exemple #9
0
    def gradient_check(self, inputs, outputs):
        # Create cost and gradient function for gradient check
        shapes = [self.W_shape, self.U_shape, self.H_shape, self.C_shape]
        flatten_nplm_cost_gradient = flatten_cost_gradient(
            nplm_cost_gradient, shapes)
        cost_gradient = bind_cost_gradient(flatten_nplm_cost_gradient, inputs,
                                           outputs)

        # Gradient check!
        parameters_size = np.sum(np.product(shape) for shape in shapes)
        initial_parameters = np.random.normal(size=parameters_size)
        result = gradient_check(cost_gradient, initial_parameters)
        return result
    def test_neural_network(self):
        np.random.seed(0)
        input_size = 2
        hidden_size = 2
        output_size = 2

        # Classic XOR test data
        inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
        outputs = np.array([0, 1, 1, 0])

        # Create cost and gradient function for gradient descent
        shapes = [(hidden_size, (input_size)), (output_size, (hidden_size))]
        flatten_neural_network_cost_gradient = flatten_cost_gradient(neural_network_cost_gradient, shapes)
        cost_gradient = bind_cost_gradient(flatten_neural_network_cost_gradient, inputs, outputs, sampler=batch_sampler)

        # Check gradient with initial parameters
        parameters_size = np.sum(np.product(shape) for shape in shapes)
        initial_parameters = np.random.normal(size=parameters_size)
        result = gradient_check(cost_gradient, initial_parameters)
        self.assertEqual([], result)

        # Train neural network (this is slow even such a simple task!)
        final_parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 1000)

        # Check if cost monotonically decrease (no guarantee in theory, but works in practice)
        previous_cost = None
        for cost in cost_history:
            if previous_cost is not None:
                self.assertLessEqual(cost, previous_cost)
            previous_cost = cost

        # TODO: extract duplicated code for prediction to reusable component
        split_index = hidden_size * (input_size)
        W1, W2 = np.split(final_parameters, [split_index])
        W1 = W1.reshape((hidden_size, input_size))
        W2 = W2.reshape((output_size, hidden_size ))

        for input, output in zip(inputs, outputs):
            input = input.reshape(-1, 1)
            hidden_layer = expit(W1.dot(input))
            inside_softmax = W2.dot(hidden_layer)
            prediction = softmax(inside_softmax.reshape(-1)).reshape(-1, 1)
            label = np.argmax(prediction)

            # Check if output is correctly predicted
            self.assertEqual(output, label)
Exemple #11
0
    def train(self, sentences, iterations=1000):
        # Preprocess sentences to create indices of context and next words
        self.dictionary = build_dictionary(sentences, self.vocabulary_size)
        indices = to_indices(sentences, self.dictionary)
        self.reverse_dictionary = {index: word for word, index in self.dictionary.items()}
        inputs, outputs = self.create_context(indices)

        # Create cost and gradient function for gradient descent
        shapes = [self.W_shape, self.U_shape, self.H_shape, self.C_shape]
        flatten_nplm_cost_gradient = flatten_cost_gradient(nplm_cost_gradient, shapes)
        cost_gradient = bind_cost_gradient(flatten_nplm_cost_gradient, inputs, outputs,
                                           sampler=get_stochastic_sampler(10))

        # Train neural network
        parameters_size = np.sum(np.product(shape) for shape in shapes)
        initial_parameters = np.random.normal(size=parameters_size)
        self.parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, iterations)
        return cost_history
    def assertMultinomialLogisticRegression(self, sampler):
        data_size = 3
        input_size = 5
        output_size = 4
        inputs = np.random.uniform(-10.0, 10.0, size=(data_size, input_size))
        outputs = np.random.randint(0, output_size, size=data_size)
        initial_parameters = np.random.normal(size=(input_size, output_size))

        # Create cost and gradient function for gradient descent and check its gradient
        cost_gradient = bind_cost_gradient(multinomial_logistic_regression_cost_gradient,
                                           inputs, outputs, sampler=sampler)
        result = gradient_check(cost_gradient, initial_parameters)
        self.assertEqual([], result)

        # Train multinomial logistic regression and see if it predicts correct labels
        final_parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 100)
        predictions = np.argmax(softmax(np.dot(final_parameters.T, inputs.T)), axis=0)

        for output, prediction in zip(outputs, predictions):
            self.assertEqual(output, prediction)
    def assertLogisticRegression(self, sampler):
        data_size = 3
        input_size = 5
        inputs = np.random.uniform(-10.0, 10.0, size=(data_size, input_size))
        outputs = np.random.randint(0, 2, size=data_size)
        initial_parameters = np.random.normal(scale=1e-5, size=input_size)

        # Create cost and gradient function for gradient descent and check its gradient
        cost_gradient = bind_cost_gradient(logistic_regression_cost_gradient,
                                           inputs, outputs, sampler=sampler)
        result = gradient_check(cost_gradient, initial_parameters)
        self.assertEqual([], result)

        # Train logistic regression and see if it predicts correct labels
        final_parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 100)
        predictions = expit(np.dot(inputs, final_parameters)) > 0.5

        # Binary classification of 3 data points with 5 dimension is always linearly separable
        for output, prediction in zip(outputs, predictions):
            self.assertEqual(output, prediction)
    def assertLogisticRegression(self, sampler):
        data_size = 3
        input_size = 5
        inputs = np.random.uniform(-10.0, 10.0, size=(data_size, input_size))
        outputs = np.random.randint(0, 2, size=data_size)
        initial_parameters = np.random.normal(scale=1e-5, size=input_size)

        # Create cost and gradient function for gradient descent and check its gradient
        cost_gradient = bind_cost_gradient(logistic_regression_cost_gradient,
                                           inputs,
                                           outputs,
                                           sampler=sampler)
        result = gradient_check(cost_gradient, initial_parameters)
        self.assertEqual([], result)

        # Train logistic regression and see if it predicts correct labels
        final_parameters, cost_history = gradient_descent(
            cost_gradient, initial_parameters, 100)
        predictions = expit(np.dot(inputs, final_parameters)) > 0.5

        # Binary classification of 3 data points with 5 dimension is always linearly separable
        for output, prediction in zip(outputs, predictions):
            self.assertEqual(output, prediction)