Example #1
0
    def test_multi_step_gradient(self):
        input_size = 5
        hidden_size = 6
        n = Gru(input_size, hidden_size)

        xs = [frand(size=input_size) for _ in range(10)]
        h0 = frand(hidden_size)

        intermediate_results = {}
        hs = n.forward_prop(xs, h0, intermediate_results)
        n.back_prop([derr(h) for h in hs], intermediate_results)

        for index in scalar_indices(n):
            array_name = gru_array_names(n)[index[0]]
            delta = 1e-4

            slightly_less = clone(n)
            gru_arrays(slightly_less)[index[0]][index[1]] -= delta
            slightly_less_hs = slightly_less.forward_prop(xs, h0, {})
            err_slightly_less = sum([err(h) for h in slightly_less_hs])

            slightly_more = clone(n)
            gru_arrays(slightly_more)[index[0]][index[1]] += delta
            slightly_more_hs = slightly_more.forward_prop(xs, h0, {})
            err_slightly_more = sum([err(h) for h in slightly_more_hs])

            expected_grad = (err_slightly_more - err_slightly_less) / (2 * delta)
            numerical_grad = gru_results_arrays(intermediate_results)[index[0]][index[1]]

            self.assertTrue(abs(expected_grad - numerical_grad) < 0.01,
                            "{}: {} not within threshold of {}".format(array_name, numerical_grad, expected_grad))
Example #2
0
    def test_multi_step_gradient(self):
        input_size = 5
        hidden_size = 6
        n = Gru(input_size, hidden_size)

        xs = [frand(size=input_size) for _ in range(10)]
        h0 = frand(hidden_size)

        intermediate_results = {}
        hs = n.forward_prop(xs, h0, intermediate_results)
        n.back_prop([derr(h) for h in hs], intermediate_results)

        for index in scalar_indices(n):
            array_name = gru_array_names(n)[index[0]]
            delta = 1e-4

            slightly_less = clone(n)
            gru_arrays(slightly_less)[index[0]][index[1]] -= delta
            slightly_less_hs = slightly_less.forward_prop(xs, h0, {})
            err_slightly_less = sum([err(h) for h in slightly_less_hs])

            slightly_more = clone(n)
            gru_arrays(slightly_more)[index[0]][index[1]] += delta
            slightly_more_hs = slightly_more.forward_prop(xs, h0, {})
            err_slightly_more = sum([err(h) for h in slightly_more_hs])

            expected_grad = (err_slightly_more - err_slightly_less) / (2 *
                                                                       delta)
            numerical_grad = gru_results_arrays(intermediate_results)[
                index[0]][index[1]]

            self.assertTrue(
                abs(expected_grad - numerical_grad) < 0.01,
                "{}: {} not within threshold of {}".format(
                    array_name, numerical_grad, expected_grad))
Example #3
0
def clone(gru: Gru):
    gru_clone = Gru(0, 0)
    gru_clone.w_rx = np.copy(gru.w_rx)
    gru_clone.w_rh = np.copy(gru.w_rh)
    gru_clone.b_r = np.copy(gru.b_r)
    gru_clone.w_zx = np.copy(gru.w_zx)
    gru_clone.w_zh = np.copy(gru.w_zh)
    gru_clone.b_z = np.copy(gru.b_z)
    gru_clone.w_hx = np.copy(gru.w_hx)
    gru_clone.w_hh = np.copy(gru.w_hh)
    gru_clone.b_h = np.copy(gru.b_h)
    return gru_clone
Example #4
0
    def test_learn_word_vectors_from_char_vector_sequence(self):
        text = "please learn how to infer word vectors from sequences of character vectors"

        index_to_word = list(set(text.split()))
        index_to_char = list(set(text))

        word_to_index = {
            word: index
            for index, word in enumerate(index_to_word)
        }
        char_to_index = {
            word: index
            for index, word in enumerate(index_to_char)
        }

        def to_char_vector_sequence(word):
            sequence = []
            for char in word:
                vector = np.ones(len(char_to_index)) * -1
                vector[char_to_index[char]] = 1
                sequence.append(vector)
            sequence.append(np.zeros(len(char_to_index)))

            return sequence

        def to_word_vector(word):
            vector = np.ones(len(word_to_index)) * -1
            vector[word_to_index[word]] = 1
            return vector

        training_data = [(to_char_vector_sequence(word), to_word_vector(word))
                         for word in text.split()]
        n = Gru(len(index_to_char), len(index_to_word))

        for i in range(1000):
            for char_vectors, word_vector in training_data:
                intermediate_results = {}
                hs = n.forward_prop(char_vectors, np.zeros(len(index_to_word)),
                                    intermediate_results)
                dhs = [
                    np.zeros(shape=word_vector.shape) for _ in range(len(hs))
                ]
                dhs[-1] = ce_err_prime(hs[-1], word_vector)
                n.back_prop(dhs, intermediate_results)
                n.train(0.1, intermediate_results)

            if i % 200 == 0:
                total_err = 0
                for char_vectors, word_vector in training_data:
                    hs = n.forward_prop(char_vectors,
                                        np.zeros(len(index_to_word)), {})
                    total_err += mathutils.mse(hs[-1], word_vector)
                print(total_err / len(training_data))

        result = n.forward_prop(to_char_vector_sequence("infer"),
                                np.zeros(len(index_to_word)), {})[-1]
        self.assertEquals("infer", index_to_word[np.argmax(result)])
Example #5
0
    def test_single_step_gradient(self):
        input_size = 5
        hidden_size = 6
        n = Gru(input_size, hidden_size)

        xs = [frand(size=input_size)]
        h0 = frand(hidden_size)

        intermediate_results = {}
        hs = n.forward_prop(xs, h0, intermediate_results)
        dh0 = n.back_prop([derr(hs[-1])], intermediate_results)

        delta = 1e-4

        for index in scalar_indices(n):
            array_name = gru_array_names(n)[index[0]]

            slightly_less = clone(n)
            gru_arrays(slightly_less)[index[0]][index[1]] -= delta
            err_slightly_less = err(slightly_less.forward_prop(xs, h0, {})[-1])

            slightly_more = clone(n)
            gru_arrays(slightly_more)[index[0]][index[1]] += delta
            err_slightly_more = err(slightly_more.forward_prop(xs, h0, {})[-1])

            expected_grad = (err_slightly_more - err_slightly_less) / (2 * delta)
            numerical_grad = gru_results_arrays(intermediate_results)[index[0]][index[1]]

            self.assertTrue(abs(expected_grad - numerical_grad) < 0.01,
                            "{}: {} not within threshold of {}".format(array_name, numerical_grad, expected_grad))

        for index in np.ndindex(h0.shape):
            slightly_less_h0 = np.copy(h0)
            slightly_less_h0[index] -= delta
            err_slightly_less_h0 = err(n.forward_prop(xs, slightly_less_h0, {})[-1])

            slightly_more_h0 = np.copy(h0)
            slightly_more_h0[index] += delta
            err_slightly_more_h0 = err(n.forward_prop(xs, slightly_more_h0, {})[-1])

            expected_grad = (err_slightly_more_h0 - err_slightly_less_h0) / (2 * delta)
            numerical_grad = dh0[index]

            self.assertTrue(abs(expected_grad - numerical_grad) < 0.01,
                            "h0: {} not within threshold of {}".format(numerical_grad, expected_grad))
Example #6
0
    def test_learn_word_vectors_from_char_vector_sequence(self):
        text = "please learn how to infer word vectors from sequences of character vectors"

        index_to_word = list(set(text.split()))
        index_to_char = list(set(text))

        word_to_index = {word: index for index, word in enumerate(index_to_word)}
        char_to_index = {word: index for index, word in enumerate(index_to_char)}

        def to_char_vector_sequence(word):
            sequence = []
            for char in word:
                vector = np.ones(len(char_to_index)) * -1
                vector[char_to_index[char]] = 1
                sequence.append(vector)
            sequence.append(np.zeros(len(char_to_index)))

            return sequence

        def to_word_vector(word):
            vector = np.ones(len(word_to_index)) * -1
            vector[word_to_index[word]] = 1
            return vector

        training_data = [(to_char_vector_sequence(word), to_word_vector(word)) for word in text.split()]
        n = Gru(len(index_to_char), len(index_to_word))

        for i in range(1000):
            for char_vectors, word_vector in training_data:
                intermediate_results = {}
                hs = n.forward_prop(char_vectors, np.zeros(len(index_to_word)), intermediate_results)
                dhs = [np.zeros(shape=word_vector.shape) for _ in range(len(hs))]
                dhs[-1] = ce_err_prime(hs[-1], word_vector)
                n.back_prop(dhs, intermediate_results)
                n.train(0.1, intermediate_results)

            if i % 200 == 0:
                total_err = 0
                for char_vectors, word_vector in training_data:
                    hs = n.forward_prop(char_vectors, np.zeros(len(index_to_word)), {})
                    total_err += mathutils.mse(hs[-1], word_vector)
                print(total_err/len(training_data))

        result = n.forward_prop(to_char_vector_sequence("infer"), np.zeros(len(index_to_word)), {})[-1]
        self.assertEquals("infer", index_to_word[np.argmax(result)])
Example #7
0
    def test_single_step_gradient(self):
        input_size = 5
        hidden_size = 6
        n = Gru(input_size, hidden_size)

        xs = [frand(size=input_size)]
        h0 = frand(hidden_size)

        intermediate_results = {}
        hs = n.forward_prop(xs, h0, intermediate_results)
        dh0 = n.back_prop([derr(hs[-1])], intermediate_results)

        delta = 1e-4

        for index in scalar_indices(n):
            array_name = gru_array_names(n)[index[0]]

            slightly_less = clone(n)
            gru_arrays(slightly_less)[index[0]][index[1]] -= delta
            err_slightly_less = err(slightly_less.forward_prop(xs, h0, {})[-1])

            slightly_more = clone(n)
            gru_arrays(slightly_more)[index[0]][index[1]] += delta
            err_slightly_more = err(slightly_more.forward_prop(xs, h0, {})[-1])

            expected_grad = (err_slightly_more - err_slightly_less) / (2 *
                                                                       delta)
            numerical_grad = gru_results_arrays(intermediate_results)[
                index[0]][index[1]]

            self.assertTrue(
                abs(expected_grad - numerical_grad) < 0.01,
                "{}: {} not within threshold of {}".format(
                    array_name, numerical_grad, expected_grad))

        for index in np.ndindex(h0.shape):
            slightly_less_h0 = np.copy(h0)
            slightly_less_h0[index] -= delta
            err_slightly_less_h0 = err(
                n.forward_prop(xs, slightly_less_h0, {})[-1])

            slightly_more_h0 = np.copy(h0)
            slightly_more_h0[index] += delta
            err_slightly_more_h0 = err(
                n.forward_prop(xs, slightly_more_h0, {})[-1])

            expected_grad = (err_slightly_more_h0 -
                             err_slightly_less_h0) / (2 * delta)
            numerical_grad = dh0[index]

            self.assertTrue(
                abs(expected_grad - numerical_grad) < 0.01,
                "h0: {} not within threshold of {}".format(
                    numerical_grad, expected_grad))
Example #8
0
def clone(gru: Gru):
    gru_clone = Gru(0, 0)
    gru_clone.w_rx = np.copy(gru.w_rx)
    gru_clone.w_rh = np.copy(gru.w_rh)
    gru_clone.b_r = np.copy(gru.b_r)
    gru_clone.w_zx = np.copy(gru.w_zx)
    gru_clone.w_zh = np.copy(gru.w_zh)
    gru_clone.b_z = np.copy(gru.b_z)
    gru_clone.w_hx = np.copy(gru.w_hx)
    gru_clone.w_hh = np.copy(gru.w_hh)
    gru_clone.b_h = np.copy(gru.b_h)
    return gru_clone
Example #9
0
    def testTranslateWordsIntoInitialisms(self):
        text = "Born in Vienna into one of Europe's richest families, he inherited a large fortune " \
               "from his father in 1913. He gave some considerable sums to poor artists. In a period " \
               "of severe personal depression after the first World War, he then gave away his entire " \
               "fortune to his brothers and sisters. Three of his brothers committed suicide, with " \
               "Wittgenstein contemplating it too. He left academia several times—serving as an " \
               "officer on the front line during World War I, where he was decorated a number of times " \
               "for his courage; teaching in schools in remote Austrian villages where he encountered " \
               "controversy for hitting children when they made mistakes in mathematics; and working " \
               "as a hospital porter during World War II in London where he told patients not to take " \
               "the drugs they were prescribed while largely managing to keep secret the fact that he " \
               "was one of the world's most famous philosophers."

        index_to_word = sorted(list(set(text.split(sep=" "))))
        word_to_index = {word: i for i, word in enumerate(index_to_word)}

        index_to_char = sorted(
            list(set([word[0].upper() for word in index_to_word])))
        char_to_index = {char: i for i, char in enumerate(index_to_char)}

        def vector_from_word(word):
            index = word_to_index[word]
            vec = np.zeros(len(index_to_word))
            vec[index] = 1
            return vec

        def word_from_vector(vector):
            index = vector.argmax()
            if vector[index] < 0.3:
                return "?"
            else:
                return index_to_word[index]

        def vector_from_char(char):
            vec = np.zeros(len(index_to_char))
            upper = char.upper()
            if upper in char_to_index:
                index = char_to_index[upper]
                vec[index] = 1
            return vec

        def char_from_vector(vector):
            index = vector.argmax()
            if vector[index] < -0.3:
                return " "
            elif vector[index] < 0.3:
                return "?"
            else:
                return index_to_char[index]

        max_seq_size = 5

        training_set = []
        for _ in range(500):
            seq_size = random.randint(1, max_seq_size)
            word_indices = [
                random.randrange(0, len(index_to_word))
                for _ in range(seq_size)
            ]
            words = [index_to_word[index] for index in word_indices]

            initials = [word[0].upper() for word in words]

            training_set.append(
                ([vector_from_word(word) for word in words],
                 [vector_from_char(char) for char in initials]))

        encoder_hidden_state_size = 50
        encoder = Gru(len(index_to_word), encoder_hidden_state_size)
        decoder = Gru(
            len(index_to_char) + encoder_hidden_state_size, len(index_to_char))

        encoder_h0 = np.random.uniform(-0.2, 0.2, encoder_hidden_state_size)
        decoder_h0 = np.random.uniform(-0.2, 0.2, len(index_to_char))

        end_of_sequence = np.ones(len(index_to_char)) * -1

        for epoch in range(10000):
            debug = epoch % 5000 == 0
            for word_vectors, char_vectors in random.sample(training_set, 30):
                encoder_results = {}
                encoded_state = encoder.forward_prop(word_vectors, encoder_h0,
                                                     encoder_results)[-1]

                decoder_results = {}

                def decoder_input_generator(max):
                    yield np.concatenate(
                        [encoded_state,
                         np.zeros(len(index_to_char))])

                    prev_h = decoder_results["hs"][-1]
                    resulting_char = char_from_vector(prev_h)

                    i = 0
                    while resulting_char is not " " and i <= max:
                        yield np.concatenate(
                            [encoded_state,
                             vector_from_char(resulting_char)])
                        i += 1

                hs = decoder.forward_prop(
                    decoder_input_generator(len(char_vectors)), decoder_h0,
                    decoder_results)
                if len(hs) <= len(char_vectors):
                    targets = char_vectors[:len(hs)]
                else:
                    targets = char_vectors + [
                        end_of_sequence
                        for _ in range(len(hs) - len(char_vectors))
                    ]

                decoder_errors = [h - target for h, target in zip(hs, targets)]

                decoder.back_prop(decoder_errors, decoder_results)
                decoder.train(0.1, decoder_results)

                encoded_state_error = decoder_results["dx"][:len(encoded_state
                                                                 )]
                encoder_errors = (
                    [np.zeros(encoder_hidden_state_size)] *
                    (len(word_vectors) - 1)) + [encoded_state_error]

                encoder.back_prop(encoder_errors, encoder_results)
                encoder.train(0.1, encoder_results)

                if debug:
                    print((" ".join([
                        word_from_vector(word_vector)
                        for word_vector in word_vectors
                    ])))
                    print(("".join([char_from_vector(h) for h in hs])))
                    print((sum(
                        [np.sum(np.square(err)) for err in decoder_errors])))

                    encoder.save("encoder")
                    decoder.save("decoder")
                    debug = False
    def testTranslateWordsIntoInitialisms(self):
        text = "Born in Vienna into one of Europe's richest families, he inherited a large fortune " \
               "from his father in 1913. He gave some considerable sums to poor artists. In a period " \
               "of severe personal depression after the first World War, he then gave away his entire " \
               "fortune to his brothers and sisters. Three of his brothers committed suicide, with " \
               "Wittgenstein contemplating it too. He left academia several times—serving as an " \
               "officer on the front line during World War I, where he was decorated a number of times " \
               "for his courage; teaching in schools in remote Austrian villages where he encountered " \
               "controversy for hitting children when they made mistakes in mathematics; and working " \
               "as a hospital porter during World War II in London where he told patients not to take " \
               "the drugs they were prescribed while largely managing to keep secret the fact that he " \
               "was one of the world's most famous philosophers."

        index_to_word = sorted(list(set(text.split(sep=" "))))
        word_to_index = {word:i for i, word in enumerate(index_to_word)}

        index_to_char = sorted(list(set([word[0].upper() for word in index_to_word])))
        char_to_index = {char:i for i, char in enumerate(index_to_char)}

        def vector_from_word(word):
            index = word_to_index[word]
            vec = np.zeros(len(index_to_word))
            vec[index] = 1
            return vec

        def word_from_vector(vector):
            index = vector.argmax()
            if vector[index] < 0.3:
                return "?"
            else:
                return index_to_word[index]

        def vector_from_char(char):
            vec = np.zeros(len(index_to_char))
            upper = char.upper()
            if upper in char_to_index:
                index = char_to_index[upper]
                vec[index] = 1
            return vec

        def char_from_vector(vector):
            index = vector.argmax()
            if vector[index] < -0.3:
                return " "
            elif vector[index] < 0.3:
                return "?"
            else:
                return index_to_char[index]

        max_seq_size = 5

        training_set = []
        for _ in range(500):
            seq_size = random.randint(1, max_seq_size)
            word_indices = [random.randrange(0, len(index_to_word)) for _ in range(seq_size)]
            words = [index_to_word[index] for index in word_indices]

            initials = [word[0].upper() for word in words]

            training_set.append(([vector_from_word(word) for word in words],
                                 [vector_from_char(char) for char in initials]))

        encoder_hidden_state_size = 50
        encoder = Gru(len(index_to_word), encoder_hidden_state_size)
        decoder = Gru(len(index_to_char) + encoder_hidden_state_size, len(index_to_char))

        encoder_h0 = np.random.uniform(-0.2, 0.2, encoder_hidden_state_size)
        decoder_h0 = np.random.uniform(-0.2, 0.2, len(index_to_char))

        end_of_sequence = np.ones(len(index_to_char)) * -1

        for epoch in range(10000):
            debug = epoch % 5000 == 0
            for word_vectors, char_vectors in random.sample(training_set, 30):
                encoder_results = {}
                encoded_state = encoder.forward_prop(word_vectors, encoder_h0, encoder_results)[-1]

                decoder_results = {}

                def decoder_input_generator(max):
                    yield np.concatenate([encoded_state, np.zeros(len(index_to_char))])

                    prev_h = decoder_results["hs"][-1]
                    resulting_char = char_from_vector(prev_h)

                    i = 0
                    while resulting_char is not " " and i <= max:
                        yield np.concatenate([encoded_state, vector_from_char(resulting_char)])
                        i += 1

                hs = decoder.forward_prop(decoder_input_generator(len(char_vectors)), decoder_h0, decoder_results)
                if len(hs) <= len(char_vectors):
                    targets = char_vectors[:len(hs)]
                else:
                    targets = char_vectors + [end_of_sequence for _ in range(len(hs) - len(char_vectors))]

                decoder_errors = [h - target for h, target in zip(hs, targets)]

                decoder.back_prop(decoder_errors, decoder_results)
                decoder.train(0.1, decoder_results)

                encoded_state_error = decoder_results["dx"][:len(encoded_state)]
                encoder_errors = ([np.zeros(encoder_hidden_state_size)] * (len(word_vectors) - 1)) + [encoded_state_error]

                encoder.back_prop(encoder_errors, encoder_results)
                encoder.train(0.1, encoder_results)

                if debug:
                    print(" ".join([word_from_vector(word_vector) for word_vector in word_vectors]))
                    print("".join([char_from_vector(h) for h in hs]))
                    print(sum([np.sum(np.square(err)) for err in decoder_errors]))

                    encoder.save("encoder")
                    decoder.save("decoder")
                    debug = False