예제 #1
0
    def test_grad_bs(self):
        n = FeedForwardNetwork([4, 7, 2, 3])
        x0 = np.random.uniform(size=4).astype(TYPE)

        intermediate_results = {}
        y = n.forward_prop(x0, intermediate_results)
        t = np.zeros(3).astype(TYPE)
        dy = mathutils.mean_squared_error_prime(y, t)
        n.back_prop(dy, intermediate_results)

        dbs = intermediate_results["dbs"]

        delta = 1e-4

        exp_dbs = []
        for i in range(len(n.bs)):
            b = n.bs[i]
            exp_db = np.zeros(b.shape)
            for index in np.ndindex(b.shape):
                n1 = clone(n)
                n2 = clone(n)

                n1.bs[i][index] -= delta
                n2.bs[i][index] += delta

                exp_grad = (err(n2.forward_prop(x0, {})) - err(n1.forward_prop(x0, {}))) / (2 * delta)
                exp_db[index] = exp_grad

            exp_dbs.append(exp_db)

        for dw, exp_db in zip(dbs, exp_dbs):
            npt.assert_array_almost_equal(dw, exp_db, decimal=3)
예제 #2
0
    def test_grad_ws(self):
        n = FeedForwardNetwork([5, 4, 3, 2])
        x0 = np.random.uniform(size=5).astype(TYPE)

        intermediate_results = {}
        y = n.forward_prop(x0, intermediate_results)
        t = np.zeros(2).astype(TYPE)
        dy = mathutils.mean_squared_error_prime(y, t)
        n.back_prop(dy, intermediate_results)

        dws = intermediate_results["dws"]

        delta = 1e-4

        exp_dws = []
        for i in range(len(n.ws)):
            w = n.ws[i]
            exp_dw = np.zeros(w.shape)
            for index in np.ndindex(w.shape):
                n1 = clone(n)
                n2 = clone(n)

                n1.ws[i][index] -= delta
                n2.ws[i][index] += delta

                exp_grad = (err(n2.forward_prop(x0, {})) - err(n1.forward_prop(x0, {}))) / (2 * delta)
                exp_dw[index] = exp_grad

            exp_dws.append(exp_dw)

        for dw, exp_dw in zip(dws, exp_dws):
            npt.assert_array_almost_equal(dw, exp_dw, decimal=3)
예제 #3
0
    def test_grad_x(self):
        n = FeedForwardNetwork([3, 4, 4, 2])
        x0 = np.random.uniform(size=3).astype(TYPE)

        intermediate_results = {}
        y = n.forward_prop(x0, intermediate_results)
        t = np.zeros(2).astype(TYPE)
        dy = mathutils.mean_squared_error_prime(y, t)
        dx = n.back_prop(dy, intermediate_results)

        delta = 1e-4

        exp_dx = np.zeros(x0.shape)
        for index in np.ndindex(x0.shape):
            x0_a = np.copy(x0)
            x0_b = np.copy(x0)

            x0_a[index] -= delta
            x0_b[index] += delta

            exp_grad = (err(n.forward_prop(x0_b, {})) - err(n.forward_prop(x0_a, {}))) / (2 * delta)
            exp_dx[index] = exp_grad

        npt.assert_array_almost_equal(dx, exp_dx, decimal=3)
    def test_learn_word_vectors_from_char_vector_sequence(self):
        text = "please learn how to infer word vectors from sequences of character vectors"

        index_to_word = list(set(text.split()))
        index_to_char = list(set(text))

        word_to_index = {word: index for index, word in enumerate(index_to_word)}
        char_to_index = {word: index for index, word in enumerate(index_to_char)}

        def to_char_vector_sequence(word):
            sequence = []
            for char in word:
                vector = np.ones(len(char_to_index)) * -1
                vector[char_to_index[char]] = 1
                sequence.append(vector)
            sequence.append(np.zeros(len(char_to_index)))

            return np.asarray(sequence)

        def to_word_vector(word):
            vector = np.ones(len(word_to_index)) * -1
            vector[word_to_index[word]] = 1
            return vector

        training_data = [(to_char_vector_sequence(word), to_word_vector(word)) for word in text.split()]
        # hidden_size = 100
        hidden_size = len(index_to_word)
        lstm = NoOutputLstm(len(index_to_char), hidden_size)
        ffn = FeedForwardNetwork([hidden_size, 50, 20, len(index_to_word)])

        h0 = np.random.uniform(-1, 1, size=hidden_size)

        learning_rate = 0.5

        for i in range(1000):
            for char_vectors, word_vector in training_data:
                hs, f_gs, i_gs, cs, lstm_output = lstm.forward_prop(char_vectors, h0)
                res = {}
                y = ffn.forward_prop(lstm_output, res)
                # dy = mathutils.mean_squared_error_prime(y, word_vector)
                dy = mathutils.mean_squared_error_prime(lstm_output, word_vector)
                dx = ffn.dx(lstm_output, dy, res)
                ffn.train(learning_rate, lstm_output, dy, res)

                # dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop(char_vectors, hs, f_gs, i_gs, cs, dx)
                dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop(char_vectors, hs, f_gs, i_gs, cs, dy)

                lstm.w_xf_g -= dw_xf_g * learning_rate
                lstm.w_hf_g -= dw_hf_g * learning_rate
                lstm.b_f_g -= db_f_g * learning_rate
                lstm.w_xi_g -= dw_xi_g * learning_rate
                lstm.w_hi_g -= dw_hi_g * learning_rate
                lstm.b_i_g -= db_i_g * learning_rate
                lstm.w_xc -= dw_xc * learning_rate
                lstm.w_hc -= dw_hc * learning_rate
                lstm.b_c -= db_c * learning_rate

            if i % 200 == 0:
                total_err = 0
                for char_vectors, word_vector in training_data:
                    h = lstm.activate(char_vectors, h0)
                    output_vector = ffn.forward_prop(h[-1], {})
                    total_err += mathutils.mean_squared_error(output_vector, word_vector)
                print(total_err/len(training_data))

        lstm_out = lstm.activate(to_char_vector_sequence("infer"), h0)
        result = ffn.forward_prop(lstm_out, {})

        self.assertEquals("infer", index_to_word[np.argmax(result)])
    def test_learn_word_vectors_from_char_vector_sequence(self):
        text = "please learn how to infer word vectors from sequences of character vectors"

        index_to_word = list(set(text.split()))
        index_to_char = list(set(text))

        word_to_index = {
            word: index
            for index, word in enumerate(index_to_word)
        }
        char_to_index = {
            word: index
            for index, word in enumerate(index_to_char)
        }

        def to_char_vector_sequence(word):
            sequence = []
            for char in word:
                vector = np.ones(len(char_to_index)) * -1
                vector[char_to_index[char]] = 1
                sequence.append(vector)
            sequence.append(np.zeros(len(char_to_index)))

            return np.asarray(sequence)

        def to_word_vector(word):
            vector = np.ones(len(word_to_index)) * -1
            vector[word_to_index[word]] = 1
            return vector

        training_data = [(to_char_vector_sequence(word), to_word_vector(word))
                         for word in text.split()]
        # hidden_size = 100
        hidden_size = len(index_to_word)
        lstm = NoOutputLstm(len(index_to_char), hidden_size)
        ffn = FeedForwardNetwork([hidden_size, 50, 20, len(index_to_word)])

        h0 = np.random.uniform(-1, 1, size=hidden_size)

        learning_rate = 0.5

        for i in range(1000):
            for char_vectors, word_vector in training_data:
                hs, f_gs, i_gs, cs, lstm_output = lstm.forward_prop(
                    char_vectors, h0)
                res = {}
                y = ffn.forward_prop(lstm_output, res)
                # dy = mathutils.mean_squared_error_prime(y, word_vector)
                dy = mathutils.mean_squared_error_prime(
                    lstm_output, word_vector)
                dx = ffn.dx(lstm_output, dy, res)
                ffn.train(learning_rate, lstm_output, dy, res)

                # dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop(char_vectors, hs, f_gs, i_gs, cs, dx)
                dw_xf_g, dw_hf_g, db_f_g, dw_xi_g, dw_hi_g, db_i_g, dw_xc, dw_hc, db_c = lstm.back_prop(
                    char_vectors, hs, f_gs, i_gs, cs, dy)

                lstm.w_xf_g -= dw_xf_g * learning_rate
                lstm.w_hf_g -= dw_hf_g * learning_rate
                lstm.b_f_g -= db_f_g * learning_rate
                lstm.w_xi_g -= dw_xi_g * learning_rate
                lstm.w_hi_g -= dw_hi_g * learning_rate
                lstm.b_i_g -= db_i_g * learning_rate
                lstm.w_xc -= dw_xc * learning_rate
                lstm.w_hc -= dw_hc * learning_rate
                lstm.b_c -= db_c * learning_rate

            if i % 200 == 0:
                total_err = 0
                for char_vectors, word_vector in training_data:
                    h = lstm.activate(char_vectors, h0)
                    output_vector = ffn.forward_prop(h[-1], {})
                    total_err += mathutils.mean_squared_error(
                        output_vector, word_vector)
                print(total_err / len(training_data))

        lstm_out = lstm.activate(to_char_vector_sequence("infer"), h0)
        result = ffn.forward_prop(lstm_out, {})

        self.assertEquals("infer", index_to_word[np.argmax(result)])