def filter_inferred(self, result_vec, candidates, pos): filtered_results = {} candidates_found = set() if result_vec != None: for word, weight in result_vec: wn_pos = to_wordnet_pos[pos] lemma = WordNetLemmatizer().lemmatize(word, wn_pos) if lemma in candidates: self.add_inference_result(lemma, weight, filtered_results, candidates_found) if lemma.title() in candidates: # match also capitalized words self.add_inference_result(lemma.title(), weight, filtered_results, candidates_found) if word in candidates: # there are some few cases where the candidates are not lemmatized self.add_inference_result(word, weight, filtered_results, candidates_found) if word.title( ) in candidates: # there are some few cases where the candidates are not lemmatized self.add_inference_result(word.title(), weight, filtered_results, candidates_found) # assign negative weights for candidates with no score # they will appear last sorted according to their unigram count candidates_left = candidates - candidates_found for candidate in candidates_left: count = self.w2counts[ candidate] if candidate in self.w2counts else 1 score = -1 - (1.0 / count) # between (-1,-2] filtered_results[candidate] = score return filtered_results
def filter_inferred(self, result_vec, candidates, pos): filtered_results = {} candidates_found = set() if result_vec != None: for word, weight in result_vec: wn_pos = to_wordnet_pos[pos] lemma = WordNetLemmatizer().lemmatize(word, wn_pos) if lemma in candidates: self.add_inference_result(lemma, weight, filtered_results, candidates_found) if lemma.title() in candidates: self.add_inference_result(lemma.title(), weight, filtered_results, candidates_found) if word in candidates: # there are some few cases where the candidates are not lemmatized self.add_inference_result(word, weight, filtered_results, candidates_found) if word.title() in candidates: # there are some few cases where the candidates are not lemmatized self.add_inference_result(word.title(), weight, filtered_results, candidates_found) # assign negative weights for candidates with no score # they will appear last sorted according to their unigram count # candidates_left = candidates - candidates_found # for candidate in candidates_left: # count = self.w2counts[candidate] if candidate in self.w2counts else 1 # score = -1 - (1.0/count) # between (-1,-2] # filtered_results[candidate] = score return filtered_results
def filter_inferred(self, result_vec, candidates, pos): filtered_results = {} candidates_found = set() # SO There is no way a composite word can appear?! if result_vec != None: # # TODO: this is my modification to test the difference hypothesis in our impls. # for word, weight in result_vec: # if word in candidates: # self.add_inference_result(word, weight, filtered_results, candidates_found) for word, weight in result_vec: wn_pos = to_wordnet_pos[pos] lemma = WordNetLemmatizer().lemmatize(word, wn_pos) if lemma in candidates: self.add_inference_result(lemma, weight, filtered_results, candidates_found) if lemma.title() in candidates: self.add_inference_result(lemma.title(), weight, filtered_results, candidates_found) if word in candidates: # there are some few cases where the candidates are not lemmatized self.add_inference_result(word, weight, filtered_results, candidates_found) if word.title( ) in candidates: # there are some few cases where the candidates are not lemmatized self.add_inference_result(word.title(), weight, filtered_results, candidates_found) # assign negative weights for candidates with no score # they will appear last sorted according to their unigram count # candidates_left = candidates - candidates_found # for candidate in candidates_left: # count = self.w2counts[candidate] if candidate in self.w2counts else 1 # score = -1 - (1.0/count) # between (-1,-2] # filtered_results[candidate] = score return filtered_results
def filter_inferred(self, result_vec, candidates, pos): filtered_results = {} candidates_found = set() if result_vec != None: for word, weight in result_vec: wn_pos = to_wordnet_pos[pos] word = word.decode('utf-8') try: lemma = WordNetLemmatizer().lemmatize(word, wn_pos) except UnicodeDecodeError as e: print(word, e) continue if lemma in candidates: self.add_inference_result(lemma, weight, filtered_results, candidates_found) if lemma.title() in candidates: self.add_inference_result(lemma.title(), weight, filtered_results, candidates_found) if word in candidates: # there are some few cases where the candidates are not lemmatized self.add_inference_result(word, weight, filtered_results, candidates_found) if word.title( ) in candidates: # there are some few cases where the candidates are not lemmatized self.add_inference_result(word.title(), weight, filtered_results, candidates_found) # assign negative weights for candidates with no score # they will appear last sorted according to their unigram count # candidates_left = candidates - candidates_found # for candidate in candidates_left: # count = self.w2counts[candidate] if candidate in self.w2counts else 1 # score = -1 - (1.0/count) # between (-1,-2] # filtered_results[candidate] = score return filtered_results