def make_usernames_csv_for_turk(): print "Creating csv file of usernames for evaluation on Mechanical Turk..." twitter_site = short_text_websites.get_twitter_site() usernames_to_evaluate = crosssite_username_dataset_mgr.get_usernames_to_evaluate_mturk(twitter_site) print str(len(usernames_to_evaluate))+" unconfirmed usernames available:" print usernames_to_evaluate # dump to csv username_rows = [[username] for username in usernames_to_evaluate] csv_util.write_to_spreadsheet(__usernames_to_judge_csv_path__, username_rows, None)
def make_tweet_entities_csv_for_turk(): twitter_site = short_text_websites.get_twitter_site() entities_to_evaluate = entity_dataset_mgr.get_valid_ne_candidates(twitter_site) if entities_to_evaluate is None: print "No ambiguous entities + candidates in cache. Run run_all_dataset_generators "+\ "script and choose to first fetch and store more entities from short texts." return judged_row_plus_headers = csv_util.query_csv_for_rows(__entities_results_csv_path__, False) judged_row_num = 0 already_judged = [] # list of (entity id, candidate link) for judge_row in judged_row_plus_headers: try: if judged_row_num==0: # row 0 is header entity_id_col = judge_row.index('Input.entity_id') candidate_link_col = judge_row.index('Input.candidate_link') else: judged_tuple = (judge_row[entity_id_col], judge_row[candidate_link_col]) if not judged_tuple in already_judged: already_judged.append(judged_tuple) judged_row_num = judged_row_num+1 except: continue # just ignore a problematic row # Determine what entity+candidate tasks we actually want to write to a spreadsheet # and send to mturk since we don't have resources for unlimited mturk tasks tasks = {} # NamedEntity object -> candidate judgment tasks we actually want performed user_entities = defaultdict(list) # username -> [NamedEntity obj] done_shorttexts = [] # list of shorttext id random.shuffle(entities_to_evaluate) # so we get a random subset of a user's entities for ne_obj in entities_to_evaluate: # "40 nouns usually enough to establish statistically significant # differences between WSD algorithms" (Santamaria et al., 2010) username = ne_obj.username if len(user_entities[username]) > 50: continue # have enough entities for this user # limiting our dataset to one named entity per short text shorttext_id = ne_obj.shorttext_id if shorttext_id in done_shorttexts: continue # no need to create tasks for candidates we already have annotator judgments for entity_id = ne_obj.get_entity_id() candidate_URLs = ne_obj.get_candidate_wikiURLs() valid_candidate_tasks = [] for candidate_URL in candidate_URLs: if ((entity_id, candidate_URL) in already_judged): continue valid_candidate_tasks.append(candidate_URL) if len(valid_candidate_tasks)==0: continue # already have annotator judgments for all of this entity's candidates if len(candidate_URLs)+len(valid_candidate_tasks) < 2: # this would be a non-ambiguous entity, and we should never reach this # point because such entities should have been filtered out by now raise tasks[entity_id] = valid_candidate_tasks user_entities[username].append(ne_obj) done_shorttexts.append(shorttext_id) # put valid entities + candidates in the spreadsheet until reach our limit of tasks task_max = 1400 rows = [] headers = ['entity_id', 'short_text', 'ambiguous_entity', 'candidate_link'] rows.append(headers) for username in user_entities: # add users until reach our limit on the number of tasks we can afford, # but break at this point in the loop rather than in the inner loop to # ensure that we do have at least 50 entities per user (even if this # means we go over our task limit a little in order to reach that amount) if len(rows) > task_max: break # bypass users who haven't written the minimum number of valid entities # required to establish statistical significance between the algorithms if len(user_entities[username]) < 50: continue # should be 50 NamedEntity objects per user, and we'll make tasks for their candidates for ne_obj in user_entities[username]: entity_id = ne_obj.get_entity_id() # make sure the entity presented to a Turker looks the same as # it appears in the short text (ie with the same capitalization) original_shorttext = ne_obj.shorttext_str.decode('latin-1') surface_form = ne_obj.surface_form if not surface_form in original_shorttext: surface_form = __match_appearance__(surface_form, original_shorttext) # shuffle candidates so that they don't appear # in wikiminer's/dbpedia's ranking order and bias the turker candidate_URLs = tasks[entity_id] random.shuffle(candidate_URLs) choices = candidate_URLs[:] # copy (list slicing) for choice in choices: # make a separate row for each candidate link # rather than putting all links in a single cell row = [entity_id, original_shorttext, surface_form, choice] rows.append(row) if len(rows)%50==0: # write the rows every once in a while in case we reach an error print "Updating spreadsheet..."+str(len(rows)) csv_util.write_to_spreadsheet(__entities_to_judge_csv_path__, rows) # dump to csv csv_util.write_to_spreadsheet(__entities_to_judge_csv_path__, rows)
def prompt_for_site(): site_input = raw_input('Work with (A) Twitter or (B) Flickr? (Enter A or B): ') site = {'A':short_text_websites.get_twitter_site(), 'B':'SITE_FLICKR'}[site_input] return site
def update_usernames_csv_with_judgments(): ''' Handles updating the cross-site-usernames spreadsheet given new Mechanical Turker evaluations ''' print "Updating cross-site-usernames spreadsheet with Mechanical Turker "+\ "judgments about username matching from spreadsheet of results..." twitter_site = short_text_websites.get_twitter_site() evaluated_usernames = crosssite_username_dataset_mgr.get_usernames_to_evaluate_mturk(twitter_site) judgments = {} row_num = 0 rows_plus_headers = csv_util.query_csv_for_rows(__usernames_results_csv_path__, False) for row in rows_plus_headers: try: if row_num==0: # row 0 is header username_col = row.index('Input.user') turkerID_col = row.index('WorkerId') answer_col = row.index('Answer.Q1') else: judged_username = row[username_col] if not judged_username in evaluated_usernames: raise # this shouldn't happen so see what's going on.. if judged_username in judgments: evaluations = judgments[judged_username] else: evaluations = Turker_Username_Evaluation(judged_username) judgments[judged_username] = evaluations workerID = row[turkerID_col] judgment = row[answer_col] if 'True'==judgment: evaluations.add_true_eval(workerID) elif 'False'==judgment: evaluations.add_false_eval(workerID) else: evaluations.add_unknown_eval(workerID) row_num = row_num+1 except: continue # just ignore a problematic assignment row # usernames that meet a threshold indicating that enough # of a majority of turkers agreed this is the same individual likely_usernames = [] conflicting_judgments = [] # usernames that all workers agreed belonged to a single # person (ie ignore usernames that were rejected by any worker) unanimous_confirmed_usernames = [] # Also get usernames that all workers unanimously # agreed do NOT belong to a single individual unanimous_nonmatch_usernames = [] for username in judgments: if username in __MANUAL_OVERRIDES_TRUE__: likely_usernames.append(username) continue evaluation = judgments[username] eval_score = evaluation.get_eval_measure() print "Match score for "+str(username)+": "+str(eval_score) if eval_score>=__JUDGMENT_THRESHOLD__: likely_usernames.append(username) elif evaluation.get_number_true_evals()>0: conflicting_judgments.append(username) # Each username given to 5 turkers to evaluate. if (evaluation.get_number_true_evals()>0 and evaluation.get_number_false_evals()==0 and evaluation.get_number_unknown_evals()==0): # all turkers unanimously confirmed this username unanimous_confirmed_usernames.append(username) if (evaluation.get_number_false_evals()>0 and evaluation.get_number_true_evals()==0 and evaluation.get_number_unknown_evals()==0 and not username.lower() in __MANUAL_OVERRIDES_FALSE_UNCLEAR__): unanimous_nonmatch_usernames.append(username) print "Judged "+str(len(judgments))+" usernames" print "Likely matches"+str(len(likely_usernames)) print "Conflicting judgments:"+str(conflicting_judgments) # Update the judgment cell in the spreadsheet for unanimously confirmed usernames crosssite_username_dataset_mgr.update_confirmed_positive_usernames(twitter_site, likely_usernames) print "Updated cross-site-usernames spreadsheet to reflect majority positive confirmations" crosssite_username_dataset_mgr.update_confirmed_negative_usernames(twitter_site, unanimous_nonmatch_usernames) print "Updated cross-site-usernames spreadsheet to reflect unanimous negative confirmations"