def test_nouns_only_retain_case(self): """ Test that when extracting nouns, the case is retained. """ text = "A week in the life of Arsenal Football Club" t = Tokenizer(pos=['NN', 'NNS', 'NNP', 'NNPS']) self.assertEqual(['week', 'life', 'Arsenal', 'Football', 'Club'], t._pos(text))
def test_nouns_proper(self): """ Test that proper nouns are extracted from text. """ text = "Tanguy Ndombele told Jose Mourinho he never wants to play for him again following a clash earlier this week" t = Tokenizer(pos=['NN', 'NNS', 'NNP', 'NNPS']) self.assertEqual( ['Tanguy', 'Ndombele', 'Jose', 'Mourinho', 'clash', 'week'], t._pos(text))
def test_nouns_only_multiple_sentence(self): """ Test extracting nouns from multiple sentences. """ text = "Night Call is now out on Xbox One and Nintendo Switch! We're so proud to see the game there and hope you'll enjoy your ride in Paris." t = Tokenizer(pos=['NN', 'NNS', 'NNP', 'NNPS']) self.assertEqual([ 'Night', 'Call', 'Xbox', 'One', 'Nintendo', 'Switch', 'game', 'ride', 'Paris' ], t._pos(text))
def test_nouns_only_punctuation(self): """ Test that no sentence-delimiting punctuation is retained when extracting nouns. """ text = "Night Call is now out on Xbox One, Android and Nintendo Switch! We're so proud to see the game there and hope you'll enjoy your ride in Paris." t = Tokenizer(pos=['NN', 'NNS', 'NNP', 'NNPS']) self.assertFalse( any( any(p in noun for p in ['!', ',', '.']) for noun in t._pos(text)))
def test_nouns_only_one_sentence(self): """ Test extracting nouns from a single sentence. """ text = "I have no idea how we balance this but with a midfield with Pogba and Ndombele its curtains for the low blocks that once haunted us." t = Tokenizer(pos=['NN', 'NNS', 'NNP', 'NNPS']) self.assertEqual( ['idea', 'midfield', 'Pogba', 'Ndombele', 'curtains', 'blocks'], t._pos(text)) t = Tokenizer(pos=['NN', 'NNS', 'NNP', 'NNPS']) text = "Is Rojo really starting if he doesn't shoot from 40 yards" self.assertEqual(['rojo', 'yard'], t.tokenize(text))