class TestSimpleCBOW(unittest.TestCase): def setUp(self): text = 'You said good-bye and I said hello.' cbm = CountBasedMethod() word_list = cbm.text_to_word_list(text) word_to_id, _, self.corpus = cbm.preprocess(word_list) self.vocab_size = len(word_to_id) hidden_size = 3 self.simple_cbow = SimpleCBOW(self.vocab_size, hidden_size) self.simple_word2vec = SimpleWord2Vec() self.contexts_array, self.target_array = self.simple_word2vec.create_contexts_target( self.corpus) self.contexts = self.simple_word2vec.convert_to_one_hot( self.contexts_array, self.vocab_size) self.target = self.simple_word2vec.convert_to_one_hot( self.target_array, self.vocab_size) def test_forward(self): loss = self.simple_cbow.forward(self.contexts, self.target) self.assertEqual(1.946, round(loss, 3)) def test_grads_diff(self): before_in_grads_0, = self.simple_cbow.in_layer_0.grads before_in_grads_0 = copy.copy(before_in_grads_0) before_in_grads_1, = self.simple_cbow.in_layer_1.grads before_in_grads_1 = copy.copy(before_in_grads_1) before_out_grads, = self.simple_cbow.out_layer.grads before_out_grads = copy.copy(before_out_grads) self.simple_cbow.forward(self.contexts, self.target) self.simple_cbow.backward() after_in_grads_0, = self.simple_cbow.in_layer_0.grads after_in_grads_1, = self.simple_cbow.in_layer_1.grads after_out_grads, = self.simple_cbow.out_layer.grads in_grads_0 = before_in_grads_0 == after_in_grads_0 in_grads_1 = before_in_grads_1 == after_in_grads_1 out_grads = before_out_grads == after_out_grads assert_array_equal( np.array([[False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True]]), in_grads_0) assert_array_equal( np.array([[True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False]]), in_grads_1) assert_array_equal( np.array([[False, False, False, False, False, False, False], [False, False, False, False, False, False, False], [False, False, False, False, False, False, False]]), out_grads)
# coding: utf-8 import sys sys.path.append('..') import numpy as np from simple_cbow import SimpleCBOW from utils.layers import MatMul from utils.tools import create_contexts_target, preprocess, convert_one_hot if __name__ == '__main__': text = 'You say goodbye and I say hello.' corpus, word_to_id, id_to_word = preprocess(text) # print(corpus) # [0 1 2 3 4 1 5 6] # {'you': 0, 'say': 1, 'goodbye': 2, 'and': 3, 'i': 4, 'hello': 5, '.': 6} # print(word_to_id) # {0: 'you', 1: 'say', 2: 'goodbye', 3: 'and', 4: 'i', 5: 'hello', 6: '.'} # print(corpus[1:-1]) # [1 2 3 4 1 5] contexts, target = create_contexts_target(corpus) contexts = convert_one_hot(contexts, len(word_to_id)) target = convert_one_hot(target, len(word_to_id)) # print(contexts) [[0 2][1 3][2 4][3 1][4 5][1 6]] # print(contexts[:, 1]) [2 3 4 1 5 6] print(contexts.shape) print( np.dot(contexts[:, 1], 0.01 * np.random.randn(len(word_to_id), 5).astype('f'))) model = SimpleCBOW(vocab_size, hidden_size) model.forward(contexts, target) model.forward(contexts, target)