def make_usernames_csv_for_turk():
    print "Creating csv file of usernames for evaluation on Mechanical Turk..."
    twitter_site = short_text_websites.get_twitter_site()
    usernames_to_evaluate = crosssite_username_dataset_mgr.get_usernames_to_evaluate_mturk(twitter_site)
    print str(len(usernames_to_evaluate))+" unconfirmed usernames available:"
    print usernames_to_evaluate
    
    # dump to csv
    username_rows = [[username] for username in usernames_to_evaluate]
    csv_util.write_to_spreadsheet(__usernames_to_judge_csv_path__, username_rows, None)
def update_usernames_csv_with_judgments():
    ''' Handles updating the cross-site-usernames 
    spreadsheet given new Mechanical Turker evaluations '''
    print "Updating cross-site-usernames spreadsheet with Mechanical Turker "+\
    "judgments about username matching from spreadsheet of results..."
    
    twitter_site = short_text_websites.get_twitter_site()
    evaluated_usernames = crosssite_username_dataset_mgr.get_usernames_to_evaluate_mturk(twitter_site)
    
    judgments = {}
    row_num = 0
    rows_plus_headers = csv_util.query_csv_for_rows(__usernames_results_csv_path__, False)
    for row in rows_plus_headers:
        try:
            if row_num==0: # row 0 is header
                username_col = row.index('Input.user')
                turkerID_col = row.index('WorkerId')
                answer_col = row.index('Answer.Q1')
            else:
                judged_username = row[username_col]
                if not judged_username in evaluated_usernames:
                    raise # this shouldn't happen so see what's going on..
                
                if judged_username in judgments:
                    evaluations = judgments[judged_username]
                else:
                    evaluations = Turker_Username_Evaluation(judged_username)
                    judgments[judged_username] = evaluations
                    
                workerID = row[turkerID_col]
                judgment = row[answer_col]
                if 'True'==judgment:
                    evaluations.add_true_eval(workerID)
                elif 'False'==judgment:
                    evaluations.add_false_eval(workerID)
                else:
                    evaluations.add_unknown_eval(workerID)
                
            row_num = row_num+1    
        except:
            continue # just ignore a problematic assignment row
        
    # usernames that meet a threshold indicating that enough
    # of a majority of turkers agreed this is the same individual
    likely_usernames = []
    conflicting_judgments = []
        
    # usernames that all workers agreed belonged to a single 
    # person (ie ignore usernames that were rejected by any worker)
    unanimous_confirmed_usernames = []
    
    # Also get usernames that all workers unanimously
    # agreed do NOT belong to a single individual
    unanimous_nonmatch_usernames = []
    
    for username in judgments:
        
        if username in __MANUAL_OVERRIDES_TRUE__:
            likely_usernames.append(username)
            continue
        
        evaluation = judgments[username]
        eval_score = evaluation.get_eval_measure()
        print "Match score for "+str(username)+": "+str(eval_score)
        
        if eval_score>=__JUDGMENT_THRESHOLD__:
            likely_usernames.append(username)
        elif evaluation.get_number_true_evals()>0:
            conflicting_judgments.append(username)
            
        # Each username given to 5 turkers to evaluate. 
        if (evaluation.get_number_true_evals()>0 and 
            evaluation.get_number_false_evals()==0 and 
            evaluation.get_number_unknown_evals()==0):
            # all turkers unanimously confirmed this username
            unanimous_confirmed_usernames.append(username)
            
        if (evaluation.get_number_false_evals()>0 and
            evaluation.get_number_true_evals()==0 and 
            evaluation.get_number_unknown_evals()==0 and
            not username.lower() in __MANUAL_OVERRIDES_FALSE_UNCLEAR__):
            unanimous_nonmatch_usernames.append(username)
        
    print "Judged "+str(len(judgments))+" usernames"
    print "Likely matches"+str(len(likely_usernames))
    print "Conflicting judgments:"+str(conflicting_judgments) 
            
    # Update the judgment cell in the spreadsheet for unanimously confirmed usernames 
    crosssite_username_dataset_mgr.update_confirmed_positive_usernames(twitter_site, likely_usernames)
    print "Updated cross-site-usernames spreadsheet to reflect majority positive confirmations"
    
    crosssite_username_dataset_mgr.update_confirmed_negative_usernames(twitter_site, unanimous_nonmatch_usernames)
    print "Updated cross-site-usernames spreadsheet to reflect unanimous negative confirmations"