def test_one_line_n_bigger_than_seq_len(self):
        with open('test_files/one_line') as f:
            g = extract_x_y_words_with_x_shifting_by_n_each_yield(
                opened_file=f, seq_len=4, n=5)

        self.assertRaisesRegex(AssertionError,
                               '.*cannot overlap more than sequence length.*',
                               lambda: list(g))
 def test_one_line_with_leftover(self):
     with open('test_files/one_line') as f:
         g = extract_x_y_words_with_x_shifting_by_n_each_yield(
             opened_file=f, seq_len=7, n=5)
         res = list(g)
     self.assertEqual(
         res,
         [(['WONDER', 'HOW', 'MUCH', 'OF', 'THE', 'MEETINGS', 'IS'
            ], ['HOW', 'MUCH', 'OF', 'THE', 'MEETINGS', 'IS', 'TALKING']),
          (['MEETINGS', 'IS', 'TALKING', 'ABOUT', 'THE', 'STUFF', 'AT'
            ], ['IS', 'TALKING', 'ABOUT', 'THE', 'STUFF', 'AT', 'THE'])])
 def test_one_line_with_seq_len_of_1(self):
     with open('test_files/one_line') as f:
         g = extract_x_y_words_with_x_shifting_by_n_each_yield(
             opened_file=f, seq_len=1, n=1)
         res = list(g)
     self.assertEqual(res, [(['WONDER'], ['HOW']), (['HOW'], ['MUCH']),
                            (['MUCH'], ['OF']), (['OF'], ['THE']),
                            (['THE'], ['MEETINGS']), (['MEETINGS'], ['IS']),
                            (['IS'], ['TALKING']), (['TALKING'], ['ABOUT']),
                            (['ABOUT'], ['THE']), (['THE'], ['STUFF']),
                            (['STUFF'], ['AT']), (['AT'], ['THE']),
                            (['THE'], ['MEETINGS']),
                            (['MEETINGS'], ['</s>'])])
 def test_short_lines_with_n_equal_to_seq_len(self):
     with open('test_files/short_lines') as f:
         g = extract_x_y_words_with_x_shifting_by_n_each_yield(
             opened_file=f, seq_len=7, n=7)
         res = list(g)
     self.assertEqual(
         res,
         [(['WONDER', 'HOW', 'MUCH', 'OF', 'THE', 'MEETINGS', 'IS'
            ], ['HOW', 'MUCH', 'OF', 'THE', 'MEETINGS', 'IS', 'TALKING']),
          (['TALKING', 'ABOUT', 'THE', 'STUFF', 'AT', 'THE', 'MEETINGS'
            ], ['ABOUT', 'THE', 'STUFF', 'AT', 'THE', 'MEETINGS', '</s>']),
          (['</s>', 'YEAH', '</s>', 'NOT', 'A', 'LOT', '</s>'
            ], ['YEAH', '</s>', 'NOT', 'A', 'LOT', '</s>', 'NO']),
          (['NO', '</s>', 'HMM', '</s>', 'OKAY', '</s>', 'SOUNDS'
            ], ['</s>', 'HMM', '</s>', 'OKAY', '</s>', 'SOUNDS', 'LIKE'])])
 def test_short_lines_with_seq_len_and_n_that_overlaps_lines(self):
     with open('test_files/short_lines') as f:
         g = extract_x_y_words_with_x_shifting_by_n_each_yield(
             opened_file=f, seq_len=17, n=16)
         res = list(g)
     self.assertEqual(res,
                      [([
                          'WONDER', 'HOW', 'MUCH', 'OF', 'THE', 'MEETINGS',
                          'IS', 'TALKING', 'ABOUT', 'THE', 'STUFF', 'AT',
                          'THE', 'MEETINGS', '</s>', 'YEAH', '</s>'
                      ], [
                          'HOW', 'MUCH', 'OF', 'THE', 'MEETINGS', 'IS',
                          'TALKING', 'ABOUT', 'THE', 'STUFF', 'AT', 'THE',
                          'MEETINGS', '</s>', 'YEAH', '</s>', 'NOT'
                      ]),
                       ([
                           '</s>', 'NOT', 'A', 'LOT', '</s>', 'NO', '</s>',
                           'HMM', '</s>', 'OKAY', '</s>', 'SOUNDS', 'LIKE',
                           "YOU'VE", 'DONE', 'SOME', 'STUFF'
                       ], [
                           'NOT', 'A', 'LOT', '</s>', 'NO', '</s>', 'HMM',
                           '</s>', 'OKAY', '</s>', 'SOUNDS', 'LIKE',
                           "YOU'VE", 'DONE', 'SOME', 'STUFF', '</s>'
                       ])])
 def test_one_line_seq_len_bigger_than_words_in_file(self):
     with open('test_files/one_line') as f:
         g = extract_x_y_words_with_x_shifting_by_n_each_yield(
             opened_file=f, seq_len=16, n=16)
         res = list(g)
     self.assertEqual(res, [])
 def test_empty_file_with_n_0(self):
     with open('test_files/empty') as f:
         g = extract_x_y_words_with_x_shifting_by_n_each_yield(
             opened_file=f, seq_len=2, n=0)
         res = list(g)
     self.assertEqual(len(res), 0)