class Term(object): def __init__(self, full_word): self.full_word = full_word # TODO: Lemmatization requires downloads # wnl = WordNetLemmatizer() # lemmas = [wnl.lemmatize(token) for token in tokens] self.stem = PorterStemmer().stem(full_word).lower() def __eq__(self, other): return self.stem == other.stem def __hash__(self): return hash(self.stem) def __repr__(self): return "Term {}({})".format(self.stem.encode('utf8'), self.full_word.encode('utf8')) def __str__(self): return repr(self) def is_punctuation(self): return self.stem in string.punctuation def is_stop_word(self): return self.full_word in _stop_words
class Term(object): def __init__(self, fullWord): self.fullWord = fullWord #TODO: Lemmantization self.stem = PorterStemmer().stem(fullWord).lower() def __eq__(self, other): return self.stem == other.stem def __hash__(self): return hash(self.stem) def __repr__(self): return "Term {}({})".format(self.stem.encode('utf8'), self.fullWord.encode('utf8')) def __str__(self): return repr(self) def is_punctuation(self): return self.stem in string.punctuation def is_stop_word(self): return self.fullWord in _stop_words