def test_one_line_n_bigger_than_seq_len(self): with open('test_files/one_line') as f: g = extract_x_y_words_with_x_shifting_by_n_each_yield( opened_file=f, seq_len=4, n=5) self.assertRaisesRegex(AssertionError, '.*cannot overlap more than sequence length.*', lambda: list(g))
def test_one_line_with_leftover(self): with open('test_files/one_line') as f: g = extract_x_y_words_with_x_shifting_by_n_each_yield( opened_file=f, seq_len=7, n=5) res = list(g) self.assertEqual( res, [(['WONDER', 'HOW', 'MUCH', 'OF', 'THE', 'MEETINGS', 'IS' ], ['HOW', 'MUCH', 'OF', 'THE', 'MEETINGS', 'IS', 'TALKING']), (['MEETINGS', 'IS', 'TALKING', 'ABOUT', 'THE', 'STUFF', 'AT' ], ['IS', 'TALKING', 'ABOUT', 'THE', 'STUFF', 'AT', 'THE'])])
def test_one_line_with_seq_len_of_1(self): with open('test_files/one_line') as f: g = extract_x_y_words_with_x_shifting_by_n_each_yield( opened_file=f, seq_len=1, n=1) res = list(g) self.assertEqual(res, [(['WONDER'], ['HOW']), (['HOW'], ['MUCH']), (['MUCH'], ['OF']), (['OF'], ['THE']), (['THE'], ['MEETINGS']), (['MEETINGS'], ['IS']), (['IS'], ['TALKING']), (['TALKING'], ['ABOUT']), (['ABOUT'], ['THE']), (['THE'], ['STUFF']), (['STUFF'], ['AT']), (['AT'], ['THE']), (['THE'], ['MEETINGS']), (['MEETINGS'], ['</s>'])])
def test_short_lines_with_n_equal_to_seq_len(self): with open('test_files/short_lines') as f: g = extract_x_y_words_with_x_shifting_by_n_each_yield( opened_file=f, seq_len=7, n=7) res = list(g) self.assertEqual( res, [(['WONDER', 'HOW', 'MUCH', 'OF', 'THE', 'MEETINGS', 'IS' ], ['HOW', 'MUCH', 'OF', 'THE', 'MEETINGS', 'IS', 'TALKING']), (['TALKING', 'ABOUT', 'THE', 'STUFF', 'AT', 'THE', 'MEETINGS' ], ['ABOUT', 'THE', 'STUFF', 'AT', 'THE', 'MEETINGS', '</s>']), (['</s>', 'YEAH', '</s>', 'NOT', 'A', 'LOT', '</s>' ], ['YEAH', '</s>', 'NOT', 'A', 'LOT', '</s>', 'NO']), (['NO', '</s>', 'HMM', '</s>', 'OKAY', '</s>', 'SOUNDS' ], ['</s>', 'HMM', '</s>', 'OKAY', '</s>', 'SOUNDS', 'LIKE'])])
def test_short_lines_with_seq_len_and_n_that_overlaps_lines(self): with open('test_files/short_lines') as f: g = extract_x_y_words_with_x_shifting_by_n_each_yield( opened_file=f, seq_len=17, n=16) res = list(g) self.assertEqual(res, [([ 'WONDER', 'HOW', 'MUCH', 'OF', 'THE', 'MEETINGS', 'IS', 'TALKING', 'ABOUT', 'THE', 'STUFF', 'AT', 'THE', 'MEETINGS', '</s>', 'YEAH', '</s>' ], [ 'HOW', 'MUCH', 'OF', 'THE', 'MEETINGS', 'IS', 'TALKING', 'ABOUT', 'THE', 'STUFF', 'AT', 'THE', 'MEETINGS', '</s>', 'YEAH', '</s>', 'NOT' ]), ([ '</s>', 'NOT', 'A', 'LOT', '</s>', 'NO', '</s>', 'HMM', '</s>', 'OKAY', '</s>', 'SOUNDS', 'LIKE', "YOU'VE", 'DONE', 'SOME', 'STUFF' ], [ 'NOT', 'A', 'LOT', '</s>', 'NO', '</s>', 'HMM', '</s>', 'OKAY', '</s>', 'SOUNDS', 'LIKE', "YOU'VE", 'DONE', 'SOME', 'STUFF', '</s>' ])])
def test_one_line_seq_len_bigger_than_words_in_file(self): with open('test_files/one_line') as f: g = extract_x_y_words_with_x_shifting_by_n_each_yield( opened_file=f, seq_len=16, n=16) res = list(g) self.assertEqual(res, [])
def test_empty_file_with_n_0(self): with open('test_files/empty') as f: g = extract_x_y_words_with_x_shifting_by_n_each_yield( opened_file=f, seq_len=2, n=0) res = list(g) self.assertEqual(len(res), 0)