def test_n_gram_slices_range(self): ng = n_gram_slices_range(self.tokens, range(1, 4)) self.assertEqual( ng, { 1: [("a", ), ("nice", ), ("sentence", )], 2: [("a", "nice"), ("nice", "sentence")], 3: [("a", "nice", "sentence")] })
def test_n_gram_slices_range(self): ng = n_gram_slices_range(self.tokens, range(1,4)) self.assertEqual(ng, { 1: [ ("a",), ("nice",), ("sentence",) ], 2: [ ("a", "nice"), ("nice", "sentence") ], 3: [ ("a", "nice", "sentence") ] } )
def test_n_gram_slices_range_padded(self): ng = n_gram_slices_range(self.tokens, range(1, 4), pad=True) self.assertEqual( ng, { 1: [("a", ), ("nice", ), ("sentence", )], 2: [("_", "a"), ("a", "nice"), ("nice", "sentence"), ("sentence", "_")], 3: [("_", "_", "a"), ("_", "a", "nice"), ("a", "nice", "sentence"), ("nice", "sentence", "_"), ("sentence", "_", "_")] })
def test_n_gram_slices_range_padded(self): ng = n_gram_slices_range(self.tokens, range(1, 4), pad=True) self.assertEqual(ng, { 1: [ ("a",), ("nice",), ("sentence",) ], 2: [ ("_", "a"), ("a", "nice"), ("nice", "sentence"), ("sentence", "_") ], 3: [ ("_", "_", "a"), ("_", "a", "nice"), ("a", "nice", "sentence"), ("nice", "sentence", "_"), ("sentence", "_", "_" ) ] } )