예제 #1
0
    def test_nouns_only_retain_case(self):
        """
        Test that when extracting nouns, the case is retained.
        """

        text = "A week in the life of Arsenal Football Club"
        t = Tokenizer(pos=['NN', 'NNS', 'NNP', 'NNPS'])
        self.assertEqual(['week', 'life', 'Arsenal', 'Football', 'Club'],
                         t._pos(text))
예제 #2
0
    def test_nouns_proper(self):
        """
        Test that proper nouns are extracted from text.
        """

        text = "Tanguy Ndombele told Jose Mourinho he never wants to play for him again following a clash earlier this week"
        t = Tokenizer(pos=['NN', 'NNS', 'NNP', 'NNPS'])
        self.assertEqual(
            ['Tanguy', 'Ndombele', 'Jose', 'Mourinho', 'clash', 'week'],
            t._pos(text))
예제 #3
0
    def test_nouns_only_multiple_sentence(self):
        """
        Test extracting nouns from multiple sentences.
        """

        text = "Night Call is now out on Xbox One and Nintendo Switch! We're so proud to see the game there and hope you'll enjoy your ride in Paris."
        t = Tokenizer(pos=['NN', 'NNS', 'NNP', 'NNPS'])
        self.assertEqual([
            'Night', 'Call', 'Xbox', 'One', 'Nintendo', 'Switch', 'game',
            'ride', 'Paris'
        ], t._pos(text))
예제 #4
0
    def test_nouns_only_punctuation(self):
        """
        Test that no sentence-delimiting punctuation is retained when extracting nouns.
        """

        text = "Night Call is now out on Xbox One, Android and Nintendo Switch! We're so proud to see the game there and hope you'll enjoy your ride in Paris."
        t = Tokenizer(pos=['NN', 'NNS', 'NNP', 'NNPS'])
        self.assertFalse(
            any(
                any(p in noun for p in ['!', ',', '.'])
                for noun in t._pos(text)))
예제 #5
0
    def test_nouns_only_one_sentence(self):
        """
        Test extracting nouns from a single sentence.
        """

        text = "I have no idea how we balance this but with a midfield with Pogba and Ndombele its curtains for the low blocks that once haunted us."
        t = Tokenizer(pos=['NN', 'NNS', 'NNP', 'NNPS'])
        self.assertEqual(
            ['idea', 'midfield', 'Pogba', 'Ndombele', 'curtains', 'blocks'],
            t._pos(text))

        t = Tokenizer(pos=['NN', 'NNS', 'NNP', 'NNPS'])
        text = "Is Rojo really starting if he doesn't shoot from 40 yards"
        self.assertEqual(['rojo', 'yard'], t.tokenize(text))