Example #1
0
 def get_terms(self):
     """ Return terms separated by whitespace """
     terms = util.remove_punctuations(self.desc)
     terms = [i for i in (terms.split()[4:]) if i is not '']
     return [util.normalize_token(i) for i in util.remove_stopwords(terms) if not i.isdigit()]
Example #2
0
 def get_terms(self):
     """ Return list of normalized terms extracted from title and abstract field """
     # self.fields['title'] = util.remove_punctuations(self.fields['title'])
     terms = util.remove_punctuations(self.fields["title"]).split() + self.abstract
     normalized_terms = [util.normalize_token(term) for term in util.remove_stopwords(terms)]
     return normalized_terms