def test_stem(self): # Assert stem with PORTER, LEMMA and pattern.en.Word. s = "WOLVES" v1 = vector.stem(s, stemmer=None) v2 = vector.stem(s, stemmer=vector.PORTER) v3 = vector.stem(s, stemmer=vector.LEMMA) v4 = vector.stem(s, stemmer=lambda w: "wolf*") v5 = vector.stem(Word(None, s, lemma=u"wolf*"), stemmer=vector.LEMMA) v6 = vector.stem(Word(None, s, type="NNS"), stemmer=vector.LEMMA) self.assertEqual(v1, "wolves") self.assertEqual(v2, "wolv") self.assertEqual(v3, "wolf") self.assertEqual(v4, "wolf*") self.assertEqual(v5, "wolf*") self.assertEqual(v6, "wolf") # Assert unicode output. self.assertTrue(isinstance(v1, unicode)) self.assertTrue(isinstance(v2, unicode)) self.assertTrue(isinstance(v3, unicode)) self.assertTrue(isinstance(v4, unicode)) self.assertTrue(isinstance(v5, unicode)) self.assertTrue(isinstance(v6, unicode)) print "pattern.vector.stem()"
print sentence.words # List of Word objects. print sentence.lemmata # List of word lemmata. print sentence.chunks # List of Chunk objects. print sentence.subjects # List of NP-SBJ chunks. print sentence.objects # List of NP-OBJ chunks. print sentence.verbs # List of VP chunks. print sentence.relations # {'SBJ': {1: Chunk('the cat/NP-SBJ-1')}, 'VP': {1: Chunk('sat/VP-1')}, 'OBJ': {}} print sentence.pnp # List of PNPChunks: [Chunk('on the mat/PNP')] print sentence.constituents(pnp=False) # print sentence.slice([0], [1]) print sentence.copy() print sentence.xml # sentence words word = Word('The cat sat on the mat.', 'The cat sat on the mat.', lemma=None, type=None, index=0) print word.sentence # Sentence parent. print word.index # Sentence index of word. print word.string # String (Unicode). print word.lemma # String lemma, e.g. 'sat' => 'sit', print word.type # Part-of-speech tag (NN, JJ, VBD, ...) print word.chunk # Chunk parent, or None. print word.pnp # PNPChunk parent, or None. # sentence chunk chunk = Chunk('The cat sat on the mat.', words=[], type=None, role=None, relation=None)