Ejemplo n.º 1
0
class TestSimpleCBOW(unittest.TestCase):
    def setUp(self):
        text = 'You said good-bye and I said hello.'
        cbm = CountBasedMethod()
        word_list = cbm.text_to_word_list(text)
        word_to_id, _, self.corpus = cbm.preprocess(word_list)
        self.vocab_size = len(word_to_id)
        hidden_size = 3
        self.simple_cbow = SimpleCBOW(self.vocab_size, hidden_size)
        self.simple_word2vec = SimpleWord2Vec()
        self.contexts_array, self.target_array = self.simple_word2vec.create_contexts_target(
            self.corpus)
        self.contexts = self.simple_word2vec.convert_to_one_hot(
            self.contexts_array, self.vocab_size)
        self.target = self.simple_word2vec.convert_to_one_hot(
            self.target_array, self.vocab_size)

    def test_forward(self):
        loss = self.simple_cbow.forward(self.contexts, self.target)
        self.assertEqual(1.946, round(loss, 3))

    def test_grads_diff(self):
        before_in_grads_0, = self.simple_cbow.in_layer_0.grads
        before_in_grads_0 = copy.copy(before_in_grads_0)
        before_in_grads_1, = self.simple_cbow.in_layer_1.grads
        before_in_grads_1 = copy.copy(before_in_grads_1)
        before_out_grads, = self.simple_cbow.out_layer.grads
        before_out_grads = copy.copy(before_out_grads)
        self.simple_cbow.forward(self.contexts, self.target)
        self.simple_cbow.backward()
        after_in_grads_0, = self.simple_cbow.in_layer_0.grads
        after_in_grads_1, = self.simple_cbow.in_layer_1.grads
        after_out_grads, = self.simple_cbow.out_layer.grads
        in_grads_0 = before_in_grads_0 == after_in_grads_0
        in_grads_1 = before_in_grads_1 == after_in_grads_1
        out_grads = before_out_grads == after_out_grads
        assert_array_equal(
            np.array([[False, False, False], [False, False, False],
                      [False, False, False], [False, False, False],
                      [False, False, False], [True, True, True],
                      [True, True, True]]), in_grads_0)
        assert_array_equal(
            np.array([[True, True, True], [False, False, False],
                      [False, False, False], [False, False, False],
                      [False, False, False], [False, False, False],
                      [False, False, False]]), in_grads_1)
        assert_array_equal(
            np.array([[False, False, False, False, False, False, False],
                      [False, False, False, False, False, False, False],
                      [False, False, False, False, False, False, False]]),
            out_grads)
Ejemplo n.º 2
0
# coding: utf-8
import sys
sys.path.append('..')

import numpy as np
from simple_cbow import SimpleCBOW
from utils.layers import MatMul
from utils.tools import create_contexts_target, preprocess, convert_one_hot

if __name__ == '__main__':
    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)
    # print(corpus)  # [0 1 2 3 4 1 5 6]
    # {'you': 0, 'say': 1, 'goodbye': 2, 'and': 3, 'i': 4, 'hello': 5, '.': 6}
    # print(word_to_id)
    # {0: 'you', 1: 'say', 2: 'goodbye', 3: 'and', 4: 'i', 5: 'hello', 6: '.'}
    # print(corpus[1:-1])  # [1 2 3 4 1 5]
    contexts, target = create_contexts_target(corpus)
    contexts = convert_one_hot(contexts, len(word_to_id))
    target = convert_one_hot(target, len(word_to_id))
    # print(contexts) [[0 2][1 3][2 4][3 1][4 5][1 6]]
    # print(contexts[:, 1]) [2 3 4 1 5 6]
    print(contexts.shape)
    print(
        np.dot(contexts[:, 1],
               0.01 * np.random.randn(len(word_to_id), 5).astype('f')))
    model = SimpleCBOW(vocab_size, hidden_size)
    model.forward(contexts, target)

    model.forward(contexts, target)