def setUp(self): self.src_txt, self.trg_txt = test_utils.create_test_text_files() self.vocab_file_path = test_utils.make_temp_file() self.d = dictionary.Dictionary.build_vocab_file( corpus_files=[self.src_txt, self.trg_txt], vocab_file=self.vocab_file_path, max_vocab_size=0, padding_factor=1, # don't add extra padding symbols ) # src_ref is reversed self.src_ref = [ [106, 104, 102, 100], [104, 104, 102, 102, 100, 100], [102, 102, 102, 102, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100], ] self.trg_ref = [ [101, 101, 101, 101, 101, 101, 101, 101, 101, 101], [101, 101, 101, 101, 103, 103, 103, 103], [101, 101, 103, 103, 105, 105], [101, 103, 105, 107], ] self.src_txt_numberized, self.trg_txt_numberized = test_utils.create_test_numberized_data_files( self.src_ref, self.trg_ref, reverse_source=True ) self.num_sentences = 4
def setUp(self): self.src_txt, self.trg_txt = test_utils.create_test_text_files() self.vocab_file_path = test_utils.make_temp_file() self.d = dictionary.Dictionary.build_vocab_file( corpus_files=[self.src_txt, self.trg_txt], vocab_file=self.vocab_file_path, max_vocab_size=0, ) # src_ref is reversed, +1 for lua self.src_ref = [ [107, 105, 103, 101], [105, 105, 103, 103, 101, 101], [103, 103, 103, 103, 101, 101, 101, 101], [101, 101, 101, 101, 101, 101, 101, 101, 101, 101], ] self.trg_ref = [ [102, 102, 102, 102, 102, 102, 102, 102, 102, 102], [102, 102, 102, 102, 104, 104, 104, 104], [102, 102, 104, 104, 106, 106], [102, 104, 106, 108], ] self.src_txt_numberized, self.trg_txt_numberized = test_utils.create_test_numberized_data_files( self.src_ref, self.trg_ref, reverse_source=True) self.lua_eos = self.d.eos_index + 1 self.num_sentences = 4