def handle_candidate(self, reference, info={}): """For each entry in the reference Gold Standard, store it in main memory in the `pre_gs` global list. @param reference A `Pattern` contained in the reference Gold Standard. """ global ignore_pos global ref_counter global ignore_case global pre_gs global lemma_or_surface global fuzzy_pre_gs if ignore_pos : reference.set_all( pos=WILDCARD ) # reference has type Pattern pre_gs_key = reference.to_string() if ignore_case : pre_gs_key = pre_gs_key.lower() pre_gs_entry = pre_gs.get( pre_gs_key, [] ) pre_gs_entry.append( reference ) pre_gs[ pre_gs_key ] = pre_gs_entry if lemma_or_surface: fuzzy_pre_gs.setdefault(WORD_SEPARATOR.join( [w.lemma for w in reference]), []).append(reference) fuzzy_pre_gs.setdefault(WORD_SEPARATOR.join( [w.surface for w in reference]), []).append(reference) #gs.append( reference ) ref_counter = ref_counter + 1
def handle_candidate(self, reference, info={}): """For each entry in the reference Gold Standard, store it in main memory in the `pre_gs` global list. @param reference A `Pattern` contained in the reference Gold Standard. """ global ignore_pos global ref_counter global ignore_case global pre_gs global lemma_or_surface global fuzzy_pre_gs if ignore_pos: reference.set_all(pos=WILDCARD) # reference has type Pattern pre_gs_key = reference.to_string() if ignore_case: pre_gs_key = pre_gs_key.lower() pre_gs_entry = pre_gs.get(pre_gs_key, []) pre_gs_entry.append(reference) pre_gs[pre_gs_key] = pre_gs_entry if lemma_or_surface: fuzzy_pre_gs.setdefault( WORD_SEPARATOR.join([w.lemma for w in reference]), []).append(reference) fuzzy_pre_gs.setdefault( WORD_SEPARATOR.join([w.surface for w in reference]), []).append(reference) #gs.append( reference ) ref_counter = ref_counter + 1
def handle_candidate(self, candidate_i, info={}): """For each candidate, verifies whether it is contained in the reference list (in which case it is a *True* positive) or else, it is not in the reference list (in which case it is a *False* positive, i.e. a random ngram that does not constitute a MWE). @param candidate_i The `Candidate` that is being read from the XML file. """ global ignore_pos global gs_name global ignore_case global entity_counter global tp_counter global pre_gs global lemma_or_surface global fuzzy_pre_gs true_positive = False #pdb.set_trace() candidate = self.candidate_factory.make() for w in candidate_i: copy_w = Word(w.surface, w.lemma, w.pos, w.syn) candidate.append(copy_w) if ignore_pos: candidate.set_all(pos=WILDCARD) # reference has type Pattern pre_gs_key = candidate.to_string() if ignore_case: pre_gs_key = pre_gs_key.lower() entries_to_check = pre_gs.get(pre_gs_key, []) if lemma_or_surface: entries_to_check += fuzzy_pre_gs.get( WORD_SEPARATOR.join([w.lemma for w in candidate]), []) entries_to_check += fuzzy_pre_gs.get( WORD_SEPARATOR.join([w.surface for w in candidate]), []) for gold_entry in entries_to_check: if gold_entry.match(candidate, ignore_case=ignore_case, lemma_or_surface=lemma_or_surface): true_positive = True break # Stop at first positive match if true_positive: candidate_i.add_tpclass(TPClass(gs_name, "True")) tp_counter = tp_counter + 1 else: candidate_i.add_tpclass(TPClass(gs_name, "False")) self.chain.handle_candidate(candidate_i, info) entity_counter += 1
def handle_candidate(self, candidate_i, info={}) : """For each candidate, verifies whether it is contained in the reference list (in which case it is a *True* positive) or else, it is not in the reference list (in which case it is a *False* positive, i.e. a random ngram that does not constitute a MWE). @param candidate_i The `Candidate` that is being read from the XML file. """ global ignore_pos global gs_name global ignore_case global entity_counter global tp_counter global pre_gs global lemma_or_surface global fuzzy_pre_gs true_positive = False #pdb.set_trace() candidate = self.candidate_factory.make() for w in candidate_i : copy_w = Word( w.surface, w.lemma, w.pos, w.syn) candidate.append( copy_w ) if ignore_pos : candidate.set_all( pos=WILDCARD ) # reference has type Pattern pre_gs_key = candidate.to_string() if ignore_case : pre_gs_key = pre_gs_key.lower() entries_to_check = pre_gs.get( pre_gs_key, [] ) if lemma_or_surface: entries_to_check += fuzzy_pre_gs.get(WORD_SEPARATOR.join([w.lemma for w in candidate]), []) entries_to_check += fuzzy_pre_gs.get(WORD_SEPARATOR.join([w.surface for w in candidate]), []) for gold_entry in entries_to_check : if gold_entry.match( candidate, ignore_case=ignore_case, lemma_or_surface=lemma_or_surface ) : true_positive = True break # Stop at first positive match if true_positive : candidate_i.add_tpclass( TPClass( gs_name, "True" ) ) tp_counter = tp_counter + 1 else : candidate_i.add_tpclass( TPClass( gs_name, "False" ) ) self.chain.handle_candidate(candidate_i, info) entity_counter += 1
def key(ngram): """ Returns a string key for the given list of words (strings). (Shelves can only be indexed by strings and integers.) """ return WORD_SEPARATOR.join(ngram)