예제 #1
0
 def setUp(self):
     self.src_txt, self.trg_txt = test_utils.create_test_text_files()
     self.vocab_file_path = test_utils.make_temp_file()
     self.d = dictionary.Dictionary.build_vocab_file(
         corpus_files=[self.src_txt, self.trg_txt],
         vocab_file=self.vocab_file_path,
         max_vocab_size=0,
         padding_factor=1,  # don't add extra padding symbols
     )
     # src_ref is reversed
     self.src_ref = [
         [106, 104, 102, 100],
         [104, 104, 102, 102, 100, 100],
         [102, 102, 102, 102, 100, 100, 100, 100],
         [100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
     ]
     self.trg_ref = [
         [101, 101, 101, 101, 101, 101, 101, 101, 101, 101],
         [101, 101, 101, 101, 103, 103, 103, 103],
         [101, 101, 103, 103, 105, 105],
         [101, 103, 105, 107],
     ]
     self.src_txt_numberized, self.trg_txt_numberized = test_utils.create_test_numberized_data_files(
         self.src_ref, self.trg_ref, reverse_source=True
     )
     self.num_sentences = 4
예제 #2
0
 def setUp(self):
     self.src_txt, self.trg_txt = test_utils.create_test_text_files()
     self.vocab_file_path = test_utils.make_temp_file()
     self.d = dictionary.Dictionary.build_vocab_file(
         corpus_files=[self.src_txt, self.trg_txt],
         vocab_file=self.vocab_file_path,
         max_vocab_size=0,
     )
     # src_ref is reversed, +1 for lua
     self.src_ref = [
         [107, 105, 103, 101],
         [105, 105, 103, 103, 101, 101],
         [103, 103, 103, 103, 101, 101, 101, 101],
         [101, 101, 101, 101, 101, 101, 101, 101, 101, 101],
     ]
     self.trg_ref = [
         [102, 102, 102, 102, 102, 102, 102, 102, 102, 102],
         [102, 102, 102, 102, 104, 104, 104, 104],
         [102, 102, 104, 104, 106, 106],
         [102, 104, 106, 108],
     ]
     self.src_txt_numberized, self.trg_txt_numberized = test_utils.create_test_numberized_data_files(
         self.src_ref, self.trg_ref, reverse_source=True)
     self.lua_eos = self.d.eos_index + 1
     self.num_sentences = 4