def test_tag(self): trained_tagger = PerceptronTagger() tokens = trained_tagger.tag(self.text) assert_equal([w for w, t in tokens], [ 'Simple', 'is', 'better', 'than', 'complex', '.', 'Complex', 'is', 'better', 'than', 'complicated', '.' ])
if word in SYMBOLS: pos = 'SYM' elif word == "'" and pos == 'POS' and has_open_left_single_quote: pos = "''" has_open_left_single_quote = False elif word == "'" and pos == "''": has_open_left_single_quote = False elif word == '`' and pos == '``': has_open_left_single_quote = True word = reverse_map_paren(word) tokens.append((word, pos)) prev2 = prev prev = pos return tokens perceptron_tagger.tag = partial(_tag, perceptron_tagger) except: # pragma: no cover raise NotImplementedError( 'PerceptronTagger from textblob_aptagger does not exist!') def tag(text): """Returns the POS tags of the text using PerceptronTagger Parameters ---------- text : str or iterable This is the text to be processed. If it's a str, it will be sentence tokenized and word tokenized using nltk If it's an iterable, it will be assumed to be a list of tokens
def test_tag(self): trained_tagger = PerceptronTagger() tokens = trained_tagger.tag(self.text) assert_equal([w for w, t in tokens], ['Simple', 'is', 'better', 'than', 'complex', '.', 'Complex', 'is', 'better', 'than', 'complicated', '.'])