def apply(self,term_list): new_term_list = list() for term in term_list: term_conflated = termproc.conflate(term) if term_conflated not in self: new_term_list.append(term) return new_term_list
def count_term(self,term,denominator = 1, weight = 1): ''' When a term is added it is assumed to be prepared except for stemming, i.e. it must be lowercase if necessary, and stoplist-checked. ''' term = termproc.conflate(term) orig_term = term if self.stem_map.has_key(term): term = self.stem_map[term] else: term = self.stemmer.stem(orig_term,0,len(orig_term) - 1) self.stem_map[orig_term] = term if self.terms_dict.has_key(term): self.terms_dict[term][0] += (float(1) * weight) / denominator self.terms_dict[term][1] += 1 self.terms_dict[term][2].add(orig_term) else: self.terms_dict[term] = [(float(1) * weight) / denominator,1,set([orig_term])] return term