def test_get_tokens_from_line_should_omit_numbers_2(self): input_str = 'Bob owes me $200 dollars full of 100items' expected_tokens = ['bob', 'owes', 'me', 'dollars', 'full', 'of'] spacy_instance = get_spacy_instance('en') self.assertEqual(get_tokens_from_line(input_str, spacy_instance), expected_tokens)
def test_get_tokens_from_line_should_omit_nums(self): input_str = 'Cohesion Fund: 2000-2006 [COM(1999)344 - C5-0122/1999 - 1999/2127(COS)]' expected_tokens = ['cohesion', 'fund'] spacy_instance = get_spacy_instance('en') self.assertEqual(get_tokens_from_line(input_str, spacy_instance), expected_tokens)
def test_get_tokens_from_line_should_omit_numbers_1(self): input_str = 'I like my $300 200-PO jacket' expected_tokens = ['i', 'like', 'my', 'jacket'] spacy_instance = get_spacy_instance('en') self.assertEqual(get_tokens_from_line(input_str, spacy_instance), expected_tokens)
def test_get_tokens_from_line_should_make_lowercase(self): input_str = "I AM HERE nOWww! :)" expected_tokens = ['i', 'am', 'here', 'nowww'] spacy_instance = get_spacy_instance('en') self.assertEquals(get_tokens_from_line(input_str, spacy_instance), expected_tokens)
def test_get_tokens_from_line_should_not_break_contractions_correctly_4( self): input_str = 'citizens\' representatives' expected_tokens = ['citizens', 'representatives'] spacy_instance = get_spacy_instance('en') self.assertEqual(get_tokens_from_line(input_str, spacy_instance), expected_tokens)
def test_get_tokens_from_line_should_break_contractions_correctly_2(self): input_str = 'didn\'t tomorrow\' s lesson cancel?' expected_tokens = [ 'did', 'n\'t', 'tomorrow', '\'s', 'lesson', 'cancel' ] spacy_instance = get_spacy_instance('en') self.assertEqual(get_tokens_from_line(input_str, spacy_instance), expected_tokens)
def test_get_tokens_from_line_should_not_omit_appostope(self): input_str = 'I\'m enjoying my brother\'s dinner' expected_tokens = [ 'i', '\'m', 'enjoying', 'my', 'brother', '\'s', 'dinner' ] spacy_instance = get_spacy_instance('en') self.assertEquals(get_tokens_from_line(input_str, spacy_instance), expected_tokens)
def test_get_tokens_from_line_should_omit_punctuations_1(self): input_str = '(In the meantime), I should like to observe!!' expected_tokens = [ 'in', 'the', 'meantime', 'i', 'should', 'like', 'to', 'observe' ] spacy_instance = get_spacy_instance('en') self.assertEquals(get_tokens_from_line(input_str, spacy_instance), expected_tokens)
def test_get_tokens_from_line_should_break_contractions_correctly_3(self): input_str = "de l' avis de l' AFET, dont j' ai été le rapporteur" expected_tokens = [ 'de', 'l\'', 'avis', 'de', 'l\'', 'afet', 'dont', 'j\'', 'ai', 'été', 'le', 'rapporteur' ] spacy_instance = get_spacy_instance('fr') self.assertEqual(get_tokens_from_line(input_str, spacy_instance), expected_tokens)
def test_get_tokens_from_line_should_break_contractions_correctly(self): input_str = 'I should like to observe a minute\' s silence' expected_tokens = [ 'i', 'should', 'like', 'to', 'observe', 'a', 'minute', "'s", 'silence' ] spacy_instance = get_spacy_instance('en') self.assertEqual(get_tokens_from_line(input_str, spacy_instance), expected_tokens)
def test_get_tokens_from_line_should_break_contractions(self): input_str = 'He\'ll explain that I won\'t break the shell?' expected_tokens = [ 'he', '\'ll', 'explain', 'that', 'i', 'wo', 'n\'t', 'break', 'the', 'shell' ] spacy_instance = get_spacy_instance('en') self.assertEqual(get_tokens_from_line(input_str, spacy_instance), expected_tokens)
def test_get_tokens_from_line_should_omit_punctuations_2(self): input_str = 'Ms. Marlene Jennings (Notre-Dame-de-Grâce-Lachine, Lib.):' expected_tokens = [ 'ms', 'marlene', 'jennings', 'notre', 'dame', 'de', 'grâce', 'lachine', 'lib' ] spacy_instance = get_spacy_instance('en') self.assertEquals(get_tokens_from_line(input_str, spacy_instance), expected_tokens)