def run_all_algorithms(RESLVE_alg, site, use_cache): ''' @param RESLVE_alg: A constructed reslve_algorithm object @param use_cache: False if still working on algorithms and boosting performance and therefore don't want to cache their rankings in a file yet; True if ready to cache algorithms' rankings ''' # Valid entities and their labels annotated by Mechanical Turk workers entities_to_evaluate = entity_dataset_mgr.get_valid_ne_candidates(site) entity_judgments = entity_dataset_mgr.get_entity_judgements(site) if (entities_to_evaluate is None or len(entities_to_evaluate)==0 or entity_judgments is None or len(entity_judgments))==0: print "No labeled ambiguous entities + candidates available. Run appropriate scripts first." return {} # entities that have been labeled by human judges entities_to_resolve = [ne_obj for ne_obj in entities_to_evaluate if ne_obj.get_entity_id() in entity_judgments] print str(len(entities_to_evaluate))+" and "+str(len(entity_judgments))+\ " judgments available, resulting in "+str(len(entities_to_resolve))+" entities to resolve" # Usernames that do not belong to the same individual on the site and # Wikipedia and that we'll use as a baseline for no background knowledge nonmatch_usernames = crosssite_username_dataset_mgr.get_confirmed_nonmatch_usernames(site) resolved_entities = [] for ne_obj in entities_to_resolve: print str(len(resolved_entities))+" out of "+\ str(len(entities_to_resolve))+" resolved.." entity_id = ne_obj.get_entity_id() evaluated_candidates = entity_judgments[entity_id] # construct a ResolvedEntity object to represent this # ambiguous entity and its various candidate rankings resolved_entity = ResolvedEntity(ne_obj, evaluated_candidates) resolved_entities.append(resolved_entity) reslve_algorithms = [RESLVE_alg] for reslve_alg in reslve_algorithms: print "Ranking candidates using RESLVE's "+str(reslve_alg.alg_type)+" algorithm..." candidate_titles = ne_obj.get_candidate_titles() # perform the RESLVE ranking.. reslve_ranking_user_match = reslve_alg.rank_candidates(candidate_titles, ne_obj.username) # perform the same algorithm's ranking again but this time use # a non-match user's interest model as background information, # which according to our hypothesis should provide less relevant # semantic background knowledge and thus have lower performance random.shuffle(nonmatch_usernames) random_nonmatch_username = nonmatch_usernames[0] reslve_ranking_user_nonmatch = reslve_alg.rank_candidates(candidate_titles, random_nonmatch_username) resolved_entity.add_reslve_ranking(reslve_alg.alg_id, reslve_ranking_user_match, reslve_ranking_user_nonmatch) # cache intermittently in case we need to exit.. __save_resolved_entities__(resolved_entities, site, use_cache) __save_resolved_entities__(resolved_entities, site, use_cache) # Cache resolved entities return resolved_entities
def compute_annotator_agreement_libs(site): ''' Computes various annotator agreement measures using NLTK's metrics.agreement and SegEval's Agreement metrics ''' # NLTK's AnnotationTask expects list of triples, each containing one rater's # label for one candidate, ie [(rater, item, label),(rater, item, label),...] # See http://nltk.org/api/nltk.metrics.html#nltk.metrics.agreement nltk_dataArray = [] # SegEval expects items_masses dict { item -> { coder -> [label] } } # See http://packages.python.org/segeval/segeval.agreement/ segeval_items_masses = {} # 3 raters for each candidate judgment raters = ['rater1','rater2','rater3'] judgments = entity_dataset_mgr.get_entity_judgements(site) true_label = 'true_label' false_label = 'false_label' # iterate through the rater's labels to put them into the # proper data formats that the different agreement libs require for entity_id in judgments: candidate_labels = judgments[entity_id] for candidate_title in candidate_labels: (num_true, num_false) = candidate_labels[candidate_title] if num_true+num_false!=3: raise # we ask for 3 turkers per candidate eval task # check for the case when all raters choose the same label unanimous_decision = None if num_true==0: unanimous_decision = false_label elif num_false==0: unanimous_decision = true_label if unanimous_decision!=None: # all raters labeled this candidate either # unanimously with true or unanimously with false segeval_rater_map = {} for rater in raters: # data=[(rater, item, label),(rater, item, label),...] nltk_dataArray.append((rater, candidate_title, unanimous_decision)) # { coder -> [labels] } segeval_rater_map[rater] = [unanimous_decision] # add to items masses, which looks like { item -> { coder -> [label] } } segeval_items_masses[str(entity_id+candidate_title)] = segeval_rater_map else: # decision was split, so randomly select a number of raters # equal to num_true and a number of raters equal to num_false raters_copy = raters[:] random.shuffle(raters_copy) segeval_rater_map = {} for rater in raters_copy[:num_true]: nltk_dataArray.append((rater, candidate_title, true_label)) # [(rater, item, label)...] segeval_rater_map[rater] = [true_label] # { coder -> [labels] } for rater in raters_copy[num_true:]: nltk_dataArray.append((rater, candidate_title, false_label)) # [(rater, item, label)...] segeval_rater_map[rater] = [false_label] # { coder -> [labels] } compute_annotator_agreement_nltkmetrics(nltk_dataArray)