def test_charngram_composer(self): enc = self.segmenting_encoder word_vocab = Vocab(vocab_file="examples/data/head.ja.vocab") enc.segment_composer = CharNGramComposer( word_vocab=word_vocab, char_vocab=self.src_reader.vocab, hidden_dim=self.layer_dim) enc.transduce(self.inp_emb(0))
def test_add_multiple_segment_composer(self): enc = self.segmenting_encoder word_vocab = Vocab(vocab_file="examples/data/head.ja.vocab") enc.segment_composer = SumMultipleComposer(composers=[ LookupComposer(word_vocab=word_vocab, char_vocab=self.src_reader.vocab, hidden_dim=self.layer_dim), CharNGramComposer(word_vocab=word_vocab, char_vocab=self.src_reader.vocab, hidden_dim=self.layer_dim) ]) enc.transduce(self.inp_emb(0))
def test_chargram_composer_learn(self): enc = self.segmenting_encoder char_vocab = Vocab(i2w=['a', 'b', 'c', 'd']) enc.segment_composer = CharNGramComposer( word_vocab = None, char_vocab = char_vocab, hidden_dim = self.layer_dim, ngram_size = 2, vocab_size = 5, ) event_trigger.set_train(True) enc.segment_composer.set_word((0, 1, 2)) # a:0, ab:1, b: 2, bc: 3, c: 4 enc.segment_composer.transduce([]) act = dict(enc.segment_composer.lrucache.items()) exp = {'a': 0, 'ab': 1, 'b': 2, 'bc': 3, 'c': 4} self.assertDictEqual(act, exp) enc.segment_composer.set_word((2, 3)) # c, cd, d enc.segment_composer.transduce([]) act = dict(enc.segment_composer.lrucache.items()) exp = {'cd': 0, 'd': 1, 'b': 2, 'bc': 3, 'c': 4} self.assertDictEqual(act, exp)