예제 #1
0
    def test_iris_data_set(self):
        def create_data_entry(line):
            split = line.strip().split(",")
            data_input = np.array([float(str) / 7 for str in split[:-1]]).astype(TYPE)

            classes = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
            data_target = np.array([float(split[-1] == class_) for class_ in classes]).astype(TYPE)

            return data_input, data_target

        iris_data_file = open("iris.data")
        data_set = [create_data_entry(line) for line in iris_data_file.readlines() if line.strip()]
        iris_data_file.close()
        random.shuffle(data_set)

        training_set = data_set[:-30]
        test_set = data_set[-30:]

        n = FeedForwardNetwork([4, 50, 3])
        learning_rate = 0.5

        for _ in range(10000):
            training_input, training_target = training_set[random.randrange(0, len(training_set))]
            intermediate_results = {}
            y = n.forward_prop(training_input, intermediate_results)
            dy = mathutils.mse_prime(y, training_target)
            n.back_prop(dy, intermediate_results)
            n.train(learning_rate, intermediate_results)

        errors = [mathutils.mean_squared_error(n.forward_prop(test_input, {}), test_target) for test_input, test_target in test_set]
        mean_squared_error = np.mean(np.square(errors))
        npt.assert_array_less(mean_squared_error, 0.05)
예제 #2
0
    def test_learn_word_vectors_from_char_vector_sequence(self):
        text = "please learn how to infer word vectors from sequences of character vectors"

        index_to_word = list(set(text.split()))
        index_to_char = list(set(text))

        word_to_index = {
            word: index
            for index, word in enumerate(index_to_word)
        }
        char_to_index = {
            word: index
            for index, word in enumerate(index_to_char)
        }

        def to_char_vector_sequence(word):
            sequence = []
            for char in word:
                vector = np.ones(len(char_to_index)) * -1
                vector[char_to_index[char]] = 1
                sequence.append(vector)
            sequence.append(np.zeros(len(char_to_index)))

            return np.asarray(sequence)

        def to_word_vector(word):
            vector = np.ones(len(word_to_index)) * -1
            vector[word_to_index[word]] = 1
            return vector

        training_data = [(to_char_vector_sequence(word), to_word_vector(word))
                         for word in text.split()]
        n = NoOutputLstm(len(index_to_char), len(index_to_word))

        for i in range(1000):
            for char_vectors, word_vector in training_data:
                intermediate_results = {}
                h_last = n.forward_prop(char_vectors,
                                        np.zeros(len(index_to_word)),
                                        intermediate_results)
                n.back_prop(ce_err_prime(h_last, word_vector),
                            intermediate_results)
                n.train(0.1, intermediate_results)

            if i % 200 == 0:
                total_err = 0
                for char_vectors, word_vector in training_data:
                    h = n.activate(char_vectors, np.zeros(len(index_to_word)))
                    total_err += mathutils.mean_squared_error(h, word_vector)
                print((total_err / len(training_data)))

        result = n.activate(to_char_vector_sequence("infer"),
                            np.zeros(len(index_to_word)))
        self.assertEquals("infer", index_to_word[np.argmax(result)])
예제 #3
0
    def test_learn_word_vectors_from_char_vector_sequence(self):
        text = "please learn how to infer word vectors from sequences of character vectors"

        index_to_word = list(set(text.split()))
        index_to_char = list(set(text))

        word_to_index = {word: index for index, word in enumerate(index_to_word)}
        char_to_index = {word: index for index, word in enumerate(index_to_char)}

        def to_char_vector_sequence(word):
            sequence = []
            for char in word:
                vector = np.ones(len(char_to_index)) * -1
                vector[char_to_index[char]] = 1
                sequence.append(vector)
            sequence.append(np.zeros(len(char_to_index)))

            return np.asarray(sequence)

        def to_word_vector(word):
            vector = np.ones(len(word_to_index)) * -1
            vector[word_to_index[word]] = 1
            return vector

        training_data = [(to_char_vector_sequence(word), to_word_vector(word)) for word in text.split()]
        n = NoOutputLstm(len(index_to_char), len(index_to_word))

        for i in range(1000):
            for char_vectors, word_vector in training_data:
                intermediate_results = {}
                h_last = n.forward_prop(char_vectors, np.zeros(len(index_to_word)), intermediate_results)
                n.back_prop(ce_err_prime(h_last, word_vector), intermediate_results)
                n.train(0.1, intermediate_results)

            if i % 200 == 0:
                total_err = 0
                for char_vectors, word_vector in training_data:
                    h = n.activate(char_vectors, np.zeros(len(index_to_word)))
                    total_err += mathutils.mean_squared_error(h, word_vector)
                print(total_err/len(training_data))

        result = n.activate(to_char_vector_sequence("infer"), np.zeros(len(index_to_word)))
        self.assertEquals("infer", index_to_word[np.argmax(result)])
예제 #4
0
def err(y):
    return mathutils.mean_squared_error(y, np.zeros(y.shape))
    def test_learn_word_vectors_from_char_vector_sequence_2(self):
        text = "please learn how to infer word vectors from sequences of character vectors" \
               "giving it more words to try and confuse it" \
               "how evil" \
               "much diabolical" \
               "many genius" \
               "the doge of venice gives his regards"

        index_to_word = list(set(text.split()))
        index_to_char = list(set(text))

        word_to_index = {word: index for index, word in enumerate(index_to_word)}
        char_to_index = {word: index for index, word in enumerate(index_to_char)}

        def to_char_vector_sequence(word):
            sequence = []
            for char in word:
                vector = np.ones(len(char_to_index)) * -1
                vector[char_to_index[char]] = 1
                sequence.append(vector)
            sequence.append(np.zeros(len(char_to_index)))

            return np.asarray(sequence)

        def to_word_vector(word):
            vector = np.ones(len(word_to_index)) * -1
            vector[word_to_index[word]] = 1
            return vector

        hidden_size = 50

        training_data = [(to_char_vector_sequence(word), to_word_vector(word)) for word in text.split()]
        lstm = NoOutputLstm(len(index_to_char), hidden_size)
        ffn = FeedForwardNetwork([hidden_size, len(index_to_word)])

        h0 = np.random.uniform(-1, 1, size=hidden_size)

        learning_rate = 5

        for i in range(2000):
            for char_vectors, word_vector in training_data:
                hs, f_gs, i_gs, cs, h = lstm.forward_prop(char_vectors, h0)
                res = {}
                y = ffn.forward_prop(h, res)
                dy = mathutils.mean_squared_error(y, word_vector)
                dx = ffn.dx(h, dy, res)
                ffn.train(learning_rate, h, dy, res)
                dh = dx
                dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop(char_vectors, hs, f_gs, i_gs, cs, dh)
                lstm.w_xf_g -= dw_xf_g * learning_rate
                lstm.w_hf_g -= dw_hf_g * learning_rate
                lstm.b_f_g -= db_f_g * learning_rate
                lstm.w_xi_g -= dw_xi_g * learning_rate
                lstm.w_hi_g -= dw_hi_g * learning_rate
                lstm.b_i_g -= db_i_g * learning_rate
                lstm.w_xc -= dw_xc * learning_rate
                lstm.w_hc -= dw_hc * learning_rate
                lstm.b_c -= db_c * learning_rate

            if i % 200 == 0:
                total_err = 0
                for char_vectors, word_vector in training_data:
                    h = lstm.activate(char_vectors, h0)
                    y = ffn.forward_prop(h, {})
                    total_err += mathutils.mean_squared_error(y, word_vector)
                print(total_err/len(training_data))

        h = lstm.activate(to_char_vector_sequence("infer"), h0)
        y = ffn.forward_prop(h, {})
        self.assertEquals("infer", index_to_word[np.argmax(y)])
    def test_learn_word_vectors_from_char_vector_sequence(self):
        text = "please learn how to infer word vectors from sequences of character vectors"

        index_to_word = list(set(text.split()))
        index_to_char = list(set(text))

        word_to_index = {word: index for index, word in enumerate(index_to_word)}
        char_to_index = {word: index for index, word in enumerate(index_to_char)}

        def to_char_vector_sequence(word):
            sequence = []
            for char in word:
                vector = np.ones(len(char_to_index)) * -1
                vector[char_to_index[char]] = 1
                sequence.append(vector)
            sequence.append(np.zeros(len(char_to_index)))

            return np.asarray(sequence)

        def to_word_vector(word):
            vector = np.ones(len(word_to_index)) * -1
            vector[word_to_index[word]] = 1
            return vector

        training_data = [(to_char_vector_sequence(word), to_word_vector(word)) for word in text.split()]
        # hidden_size = 100
        hidden_size = len(index_to_word)
        lstm = NoOutputLstm(len(index_to_char), hidden_size)
        ffn = FeedForwardNetwork([hidden_size, 50, 20, len(index_to_word)])

        h0 = np.random.uniform(-1, 1, size=hidden_size)

        learning_rate = 0.5

        for i in range(1000):
            for char_vectors, word_vector in training_data:
                hs, f_gs, i_gs, cs, lstm_output = lstm.forward_prop(char_vectors, h0)
                res = {}
                y = ffn.forward_prop(lstm_output, res)
                # dy = mathutils.mean_squared_error_prime(y, word_vector)
                dy = mathutils.mean_squared_error_prime(lstm_output, word_vector)
                dx = ffn.dx(lstm_output, dy, res)
                ffn.train(learning_rate, lstm_output, dy, res)

                # dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop(char_vectors, hs, f_gs, i_gs, cs, dx)
                dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop(char_vectors, hs, f_gs, i_gs, cs, dy)

                lstm.w_xf_g -= dw_xf_g * learning_rate
                lstm.w_hf_g -= dw_hf_g * learning_rate
                lstm.b_f_g -= db_f_g * learning_rate
                lstm.w_xi_g -= dw_xi_g * learning_rate
                lstm.w_hi_g -= dw_hi_g * learning_rate
                lstm.b_i_g -= db_i_g * learning_rate
                lstm.w_xc -= dw_xc * learning_rate
                lstm.w_hc -= dw_hc * learning_rate
                lstm.b_c -= db_c * learning_rate

            if i % 200 == 0:
                total_err = 0
                for char_vectors, word_vector in training_data:
                    h = lstm.activate(char_vectors, h0)
                    output_vector = ffn.forward_prop(h[-1], {})
                    total_err += mathutils.mean_squared_error(output_vector, word_vector)
                print(total_err/len(training_data))

        lstm_out = lstm.activate(to_char_vector_sequence("infer"), h0)
        result = ffn.forward_prop(lstm_out, {})

        self.assertEquals("infer", index_to_word[np.argmax(result)])
    def test_learn_word_vectors_from_char_vector_sequence_2(self):
        text = "please learn how to infer word vectors from sequences of character vectors" \
               "giving it more words to try and confuse it" \
               "how evil" \
               "much diabolical" \
               "many genius" \
               "the doge of venice gives his regards"

        index_to_word = list(set(text.split()))
        index_to_char = list(set(text))

        word_to_index = {
            word: index
            for index, word in enumerate(index_to_word)
        }
        char_to_index = {
            word: index
            for index, word in enumerate(index_to_char)
        }

        def to_char_vector_sequence(word):
            sequence = []
            for char in word:
                vector = np.ones(len(char_to_index)) * -1
                vector[char_to_index[char]] = 1
                sequence.append(vector)
            sequence.append(np.zeros(len(char_to_index)))

            return np.asarray(sequence)

        def to_word_vector(word):
            vector = np.ones(len(word_to_index)) * -1
            vector[word_to_index[word]] = 1
            return vector

        hidden_size = 50

        training_data = [(to_char_vector_sequence(word), to_word_vector(word))
                         for word in text.split()]
        lstm = NoOutputLstm(len(index_to_char), hidden_size)
        ffn = FeedForwardNetwork([hidden_size, len(index_to_word)])

        h0 = np.random.uniform(-1, 1, size=hidden_size)

        learning_rate = 5

        for i in range(2000):
            for char_vectors, word_vector in training_data:
                hs, f_gs, i_gs, cs, h = lstm.forward_prop(char_vectors, h0)
                res = {}
                y = ffn.forward_prop(h, res)
                dy = mathutils.mean_squared_error(y, word_vector)
                dx = ffn.dx(h, dy, res)
                ffn.train(learning_rate, h, dy, res)
                dh = dx
                dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop(
                    char_vectors, hs, f_gs, i_gs, cs, dh)
                lstm.w_xf_g -= dw_xf_g * learning_rate
                lstm.w_hf_g -= dw_hf_g * learning_rate
                lstm.b_f_g -= db_f_g * learning_rate
                lstm.w_xi_g -= dw_xi_g * learning_rate
                lstm.w_hi_g -= dw_hi_g * learning_rate
                lstm.b_i_g -= db_i_g * learning_rate
                lstm.w_xc -= dw_xc * learning_rate
                lstm.w_hc -= dw_hc * learning_rate
                lstm.b_c -= db_c * learning_rate

            if i % 200 == 0:
                total_err = 0
                for char_vectors, word_vector in training_data:
                    h = lstm.activate(char_vectors, h0)
                    y = ffn.forward_prop(h, {})
                    total_err += mathutils.mean_squared_error(y, word_vector)
                print(total_err / len(training_data))

        h = lstm.activate(to_char_vector_sequence("infer"), h0)
        y = ffn.forward_prop(h, {})
        self.assertEquals("infer", index_to_word[np.argmax(y)])
    def test_learn_word_vectors_from_char_vector_sequence(self):
        text = "please learn how to infer word vectors from sequences of character vectors"

        index_to_word = list(set(text.split()))
        index_to_char = list(set(text))

        word_to_index = {
            word: index
            for index, word in enumerate(index_to_word)
        }
        char_to_index = {
            word: index
            for index, word in enumerate(index_to_char)
        }

        def to_char_vector_sequence(word):
            sequence = []
            for char in word:
                vector = np.ones(len(char_to_index)) * -1
                vector[char_to_index[char]] = 1
                sequence.append(vector)
            sequence.append(np.zeros(len(char_to_index)))

            return np.asarray(sequence)

        def to_word_vector(word):
            vector = np.ones(len(word_to_index)) * -1
            vector[word_to_index[word]] = 1
            return vector

        training_data = [(to_char_vector_sequence(word), to_word_vector(word))
                         for word in text.split()]
        # hidden_size = 100
        hidden_size = len(index_to_word)
        lstm = NoOutputLstm(len(index_to_char), hidden_size)
        ffn = FeedForwardNetwork([hidden_size, 50, 20, len(index_to_word)])

        h0 = np.random.uniform(-1, 1, size=hidden_size)

        learning_rate = 0.5

        for i in range(1000):
            for char_vectors, word_vector in training_data:
                hs, f_gs, i_gs, cs, lstm_output = lstm.forward_prop(
                    char_vectors, h0)
                res = {}
                y = ffn.forward_prop(lstm_output, res)
                # dy = mathutils.mean_squared_error_prime(y, word_vector)
                dy = mathutils.mean_squared_error_prime(
                    lstm_output, word_vector)
                dx = ffn.dx(lstm_output, dy, res)
                ffn.train(learning_rate, lstm_output, dy, res)

                # dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop(char_vectors, hs, f_gs, i_gs, cs, dx)
                dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop(
                    char_vectors, hs, f_gs, i_gs, cs, dy)

                lstm.w_xf_g -= dw_xf_g * learning_rate
                lstm.w_hf_g -= dw_hf_g * learning_rate
                lstm.b_f_g -= db_f_g * learning_rate
                lstm.w_xi_g -= dw_xi_g * learning_rate
                lstm.w_hi_g -= dw_hi_g * learning_rate
                lstm.b_i_g -= db_i_g * learning_rate
                lstm.w_xc -= dw_xc * learning_rate
                lstm.w_hc -= dw_hc * learning_rate
                lstm.b_c -= db_c * learning_rate

            if i % 200 == 0:
                total_err = 0
                for char_vectors, word_vector in training_data:
                    h = lstm.activate(char_vectors, h0)
                    output_vector = ffn.forward_prop(h[-1], {})
                    total_err += mathutils.mean_squared_error(
                        output_vector, word_vector)
                print(total_err / len(training_data))

        lstm_out = lstm.activate(to_char_vector_sequence("infer"), h0)
        result = ffn.forward_prop(lstm_out, {})

        self.assertEquals("infer", index_to_word[np.argmax(result)])