예제 #1
0
 def test_two_word_seq(self):
     data_source = getStream(self.test_words_long)
     ts = split_corpus_dataset.DomainAdaptationSplit(
         data_source, self.vocab, 2, 0.5)
     tokens_strings = list(iter(ts))
     expectation = [(torch.LongTensor([0, 1]), torch.LongTensor([2]))]
     self.assertEqual(tokens_strings, expectation)
예제 #2
0
 def test_single_word_retrieval(self):
     data_source = getStream(self.test_words_short)
     ts = split_corpus_dataset.DomainAdaptationSplit(data_source,
                                                     self.vocab,
                                                     1,
                                                     end_portion=0.5)
     words = list(ts.input_words())
     self.assertEqual(words, ['a'])  # we expect the input words
예제 #3
0
 def test_single_word(self):
     data_source = getStream(self.test_words_short)
     ts = split_corpus_dataset.DomainAdaptationSplit(
         data_source, self.vocab, 1, 0.5)
     tokens_string = next(iter(ts))
     expectation = (torch.LongTensor([0]), torch.LongTensor([1])
                    )  # input, target
     self.assertEqual(tokens_string, expectation)
예제 #4
0
 def test_two_word_retrieval(self):
     data_source = getStream(self.test_words_long)
     ts = split_corpus_dataset.DomainAdaptationSplit(
         data_source, self.vocab, 2, 0.5)
     words = list(ts.input_words())
     self.assertEqual(words, ['a a'])  # we expect the input words
예제 #5
0
 def test_len_no_output(self):
     data_source = getStream(self.test_words_short)
     ts = split_corpus_dataset.DomainAdaptationSplit(
         data_source, self.vocab, 3, 0.5)
     self.assertEqual(len(ts), 0)
예제 #6
0
 def test_single_word_len(self):
     data_source = getStream(self.test_words_short)
     ts = split_corpus_dataset.DomainAdaptationSplit(
         data_source, self.vocab, 1, 0.5)
     self.assertEqual(len(ts), 2)