class TestSimpleCBOW(unittest.TestCase): def setUp(self): text = 'You said good-bye and I said hello.' cbm = CountBasedMethod() word_list = cbm.text_to_word_list(text) word_to_id, _, self.corpus = cbm.preprocess(word_list) self.vocab_size = len(word_to_id) hidden_size = 3 self.simple_cbow = SimpleCBOW(self.vocab_size, hidden_size) self.simple_word2vec = SimpleWord2Vec() self.contexts_array, self.target_array = self.simple_word2vec.create_contexts_target( self.corpus) self.contexts = self.simple_word2vec.convert_to_one_hot( self.contexts_array, self.vocab_size) self.target = self.simple_word2vec.convert_to_one_hot( self.target_array, self.vocab_size) def test_forward(self): loss = self.simple_cbow.forward(self.contexts, self.target) self.assertEqual(1.946, round(loss, 3)) def test_grads_diff(self): before_in_grads_0, = self.simple_cbow.in_layer_0.grads before_in_grads_0 = copy.copy(before_in_grads_0) before_in_grads_1, = self.simple_cbow.in_layer_1.grads before_in_grads_1 = copy.copy(before_in_grads_1) before_out_grads, = self.simple_cbow.out_layer.grads before_out_grads = copy.copy(before_out_grads) self.simple_cbow.forward(self.contexts, self.target) self.simple_cbow.backward() after_in_grads_0, = self.simple_cbow.in_layer_0.grads after_in_grads_1, = self.simple_cbow.in_layer_1.grads after_out_grads, = self.simple_cbow.out_layer.grads in_grads_0 = before_in_grads_0 == after_in_grads_0 in_grads_1 = before_in_grads_1 == after_in_grads_1 out_grads = before_out_grads == after_out_grads assert_array_equal( np.array([[False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True]]), in_grads_0) assert_array_equal( np.array([[True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False]]), in_grads_1) assert_array_equal( np.array([[False, False, False, False, False, False, False], [False, False, False, False, False, False, False], [False, False, False, False, False, False, False]]), out_grads)
def setUp(self): text = 'You said good-bye and I said hello.' cbm = CountBasedMethod() word_list = cbm.text_to_word_list(text) word_to_id, _, self.corpus = cbm.preprocess(word_list) self.vocab_size = len(word_to_id) hidden_size = 3 self.simple_cbow = SimpleCBOW(self.vocab_size, hidden_size) self.simple_word2vec = SimpleWord2Vec() self.contexts_array, self.target_array = self.simple_word2vec.create_contexts_target( self.corpus) self.contexts = self.simple_word2vec.convert_to_one_hot( self.contexts_array, self.vocab_size) self.target = self.simple_word2vec.convert_to_one_hot( self.target_array, self.vocab_size)
def main(): window_size = 1 hidden_size = 5 batch_size = 3 max_epoch = 1000 text = 'You say goodbye and I say hello.' corpus, word_to_id, id_to_word = preprocess(text) vocab_size = len(word_to_id) contexts, target = create_context_target(corpus, window_size) one_hot_target = convert_one_hot(target, vocab_size) one_hot_contexts = convert_one_hot(contexts, vocab_size) model = SimpleCBOW(vocab_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) trainer.fit(one_hot_contexts, one_hot_target, max_epoch, batch_size) # trainer.plot() word_vecs = model.word_vecs for word_id, word in id_to_word.items(): print(word, word_vecs[word_id]) print('DONE')
def main(): window_size = 1 hidden_size = 5 batch_size = 3 max_epoch = 1000 text = 'You say goodbye and I say hello.' corpus, word_to_id, id_to_word = preprocess(text) vocab_size = len(word_to_id) contexts, target = create_contexts_target(corpus, window_size) target = convert_one_hot(target, vocab_size) contexts = convert_one_hot(contexts, vocab_size) model = SimpleCBOW(vocab_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) trainer.fit(contexts, target, max_epoch, batch_size) trainer.plot()
sys.path.append( '/home/hiromasa/deep-learning-from-scratch-2') # 親ディレクトリのファイルをインポートするための設定 from common.trainer import Trainer from common.optimizer import Adam from simple_cbow import SimpleCBOW from common.util import preprocess, create_contexts_target, convert_one_hot window_size = 1 hidden_size = 5 batch_size = 3 max_epoch = 1000 text = 'You say goodbye and I say hello.' corpus, word_to_id, id_to_word = preprocess(text) vocab_size = len(word_to_id) contexts, target = create_contexts_target(corpus, window_size) target = convert_one_hot(target, vocab_size) contexts = convert_one_hot(contexts, vocab_size) model = SimpleCBOW(vocab_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) trainer.fit(contexts, target, max_epoch, batch_size) trainer.plot() word_vecs = model.word_vecs for word_id, word in id_to_word.items(): print(word, word_vecs[word_id])
[[[1 0 0 0 0 0 0] [0 0 1 0 0 0 0]] <BLANKLINE> [[0 1 0 0 0 0 0] [0 0 0 1 0 0 0]] <BLANKLINE> [[0 0 1 0 0 0 0] [0 0 0 0 1 0 0]] <BLANKLINE> [[0 0 0 1 0 0 0] [0 1 0 0 0 0 0]] <BLANKLINE> [[0 0 0 0 1 0 0] [0 0 0 0 0 1 0]] <BLANKLINE> [[0 1 0 0 0 0 0] [0 0 0 0 0 0 1]]] """ print(contexts_one_hot) model = SimpleCBOW(vocabulary_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) trainer.fit(contexts_one_hot, target_one_hot, max_epoch, batch_size) trainer.plot() word_vecs = model.word_vecs for word_id, word in id_to_word.items(): print(word, word_vecs[word_id])
# coding: utf-8 import sys sys.path.append('..') import numpy as np from simple_cbow import SimpleCBOW from utils.layers import MatMul from utils.tools import create_contexts_target, preprocess, convert_one_hot if __name__ == '__main__': text = 'You say goodbye and I say hello.' corpus, word_to_id, id_to_word = preprocess(text) # print(corpus) # [0 1 2 3 4 1 5 6] # {'you': 0, 'say': 1, 'goodbye': 2, 'and': 3, 'i': 4, 'hello': 5, '.': 6} # print(word_to_id) # {0: 'you', 1: 'say', 2: 'goodbye', 3: 'and', 4: 'i', 5: 'hello', 6: '.'} # print(corpus[1:-1]) # [1 2 3 4 1 5] contexts, target = create_contexts_target(corpus) contexts = convert_one_hot(contexts, len(word_to_id)) target = convert_one_hot(target, len(word_to_id)) # print(contexts) [[0 2][1 3][2 4][3 1][4 5][1 6]] # print(contexts[:, 1]) [2 3 4 1 5 6] print(contexts.shape) print( np.dot(contexts[:, 1], 0.01 * np.random.randn(len(word_to_id), 5).astype('f'))) model = SimpleCBOW(vocab_size, hidden_size) model.forward(contexts, target) model.forward(contexts, target)
import sys sys.path.append('..') from common.trainer import trainer from common.optimizer import Adam from simple_cbow import SimpleCBOW from common.util import preprocess, create_contexts_target, convert_one_hot window_size = 1 hidden_size = 5 batch_size = 3 max_epoch = 1000 text = 'You say goodbye and I say hello.' corpus, word_to_id, id_to_word = preprocess(text) vocab_size = len(word_to_id) contexts, target = create_contexts_target(corpus, window_size) target = convert_one_hot(target, vocab_size) contexts = convert_one_hot(contexts, vocab_size) model = SimpleCBOW(vocab_size, vocab_size) optimizer = Adam() trainer = Trainer(model, optimizer) trainer.fit(contexts, target, max_epoch, batch_size) trainer.plot()