Esempio n. 1
0
 def test_stem(self):
     # Assert stem with PORTER, LEMMA and pattern.en.Word.
     s = "WOLVES"
     v1 = vector.stem(s, stemmer=None)
     v2 = vector.stem(s, stemmer=vector.PORTER)
     v3 = vector.stem(s, stemmer=vector.LEMMA)
     v4 = vector.stem(s, stemmer=lambda w: "wolf*")
     v5 = vector.stem(Word(None, s, lemma=u"wolf*"), stemmer=vector.LEMMA)
     v6 = vector.stem(Word(None, s, type="NNS"), stemmer=vector.LEMMA)
     self.assertEqual(v1, "wolves")
     self.assertEqual(v2, "wolv")
     self.assertEqual(v3, "wolf")
     self.assertEqual(v4, "wolf*")
     self.assertEqual(v5, "wolf*")
     self.assertEqual(v6, "wolf")
     # Assert unicode output.
     self.assertTrue(isinstance(v1, unicode))
     self.assertTrue(isinstance(v2, unicode))
     self.assertTrue(isinstance(v3, unicode))
     self.assertTrue(isinstance(v4, unicode))
     self.assertTrue(isinstance(v5, unicode))
     self.assertTrue(isinstance(v6, unicode))
     print "pattern.vector.stem()"
Esempio n. 2
0
print sentence.words  # List of Word objects.
print sentence.lemmata  # List of word lemmata.
print sentence.chunks  # List of Chunk objects.
print sentence.subjects  # List of NP-SBJ chunks.
print sentence.objects  # List of NP-OBJ chunks.
print sentence.verbs  # List of VP chunks.
print sentence.relations  # {'SBJ': {1: Chunk('the cat/NP-SBJ-1')}, 'VP': {1: Chunk('sat/VP-1')}, 'OBJ': {}}
print sentence.pnp  # List of PNPChunks: [Chunk('on the mat/PNP')]
print sentence.constituents(pnp=False)
# print sentence.slice([0], [1])
print sentence.copy()
print sentence.xml
# sentence words
word = Word('The cat sat on the mat.',
            'The cat sat on the mat.',
            lemma=None,
            type=None,
            index=0)
print word.sentence  # Sentence parent.
print word.index  # Sentence index of word.
print word.string  # String (Unicode).
print word.lemma  # String lemma, e.g. 'sat' => 'sit',
print word.type  # Part-of-speech tag (NN, JJ, VBD, ...)
print word.chunk  # Chunk parent, or None.
print word.pnp  # PNPChunk parent, or None.
# sentence chunk
chunk = Chunk('The cat sat on the mat.',
              words=[],
              type=None,
              role=None,
              relation=None)