Ejemplo n.º 1
0
 def test_tag_pattern_defaults(self):
     _tagger = PatternTagger()
     tags = _tagger.tag(self.text)
     logging.debug("tags: {0}".format(tags))
     words = ["Das", "ist", "ein", "schönes", "Auto"]
     for i, word_tag in enumerate(tags):
         assert_equal(word_tag[0], words[i])
Ejemplo n.º 2
0
 def test_tag_nltk_tok(self):
     _tagger = PatternTagger(tokenizer=NLTKPunktTokenizer())
     tags = _tagger.tag(self.text)
     logging.debug("tags: {0}".format(tags))
     words = ["Das", "ist", "ein", "schönes", "Auto"]
     for i, word_tag in enumerate(tags):
         assert_equal(word_tag[0], words[i])
Ejemplo n.º 3
0
 def test_tag_blob_pattern_tok_include_punc(self):
     blob = TextBlob(self.text, tokenizer=PatternTokenizer(),
                     pos_tagger=PatternTagger(include_punc=True))
     tags = blob.tags
     logging.debug("tags: {0}".format(tags))
     words = ["Das", "ist", "ein", "schönes", "Auto", "."]
     for i, word_tag in enumerate(tags):
         assert_equal(word_tag[0], words[i])
Ejemplo n.º 4
0
 def test_tag_blob_nltk_tok_include_punc_stts(self):
     blob = TextBlob(
         self.text,
         tokenizer=NLTKPunktTokenizer(),
         pos_tagger=PatternTagger(
             include_punc=True,
             tagset='stts'))
     tags = blob.tags
     logging.debug("tags: {0}".format(tags))
     words = ["Das", "ist", "ein", "schönes", "Auto", "."]
     stts_tags = ["PDS", "VVFIN", "ARTIND", "ADJA", "NN", "S"]
     for i, word_tag in enumerate(tags):
         assert_equal(word_tag[0], words[i])
         assert_equal(word_tag[1], stts_tags[i])
     assert_equal(tags[-1][0], ".")