def analyze_2016_algo(): # real users # users = user.load_users('anon_data_2016.txt') # users = user.load_features(users, 'features_2016.txt') # users = user.load_prefs(users, 'preferences_2016.txt') # users = user.filter_prefs(users) # # users_dict = user.map_users_list_to_dict(users) # # matches = format_2016_matches('matches_2016.txt') # for u_id in matches: # if (3148 in matches[u_id]): # matches[u_id].remove(3148) # # del matches[3148] # or saved random users users = user.load_users('random_data_1500.txt') users = user.load_features(users, 'random_features_1500.txt') users = user.load_prefs(users, 'random_prefs_1500.txt') users = user.filter_prefs(users) users_dict = user.map_users_list_to_dict(users) matches = format_2016_matches('ip_matches_1500_real_False.txt') # double check that we have a valid match set for u in matches.keys(): for m in matches[u]: assert(users_dict[u].is_compatibile(users_dict[m])) user.sort_all_match_lists(matches, users_dict) # check on how many matches people actually have user.analyze_num_matches(matches, users_dict) for i in range(1, 4, 1): print '\033[95m#####################################' print 'TOP %s MATCHES' % i print '#####################################\033[0m' #check the utility values from rank perspective and distance perspective -- in two separate functions user.analyze_rank_utility(matches, users_dict, i) user.analyze_distance_utility(matches, users_dict, i)
def analyze_2016_algo(): # real users # users = user.load_users('anon_data_2016.txt') # users = user.load_features(users, 'features_2016.txt') # users = user.load_prefs(users, 'preferences_2016.txt') # users = user.filter_prefs(users) # # users_dict = user.map_users_list_to_dict(users) # # matches = format_2016_matches('matches_2016.txt') # for u_id in matches: # if (3148 in matches[u_id]): # matches[u_id].remove(3148) # # del matches[3148] # or saved random users users = user.load_users('random_data_1500.txt') users = user.load_features(users, 'random_features_1500.txt') users = user.load_prefs(users, 'random_prefs_1500.txt') users = user.filter_prefs(users) users_dict = user.map_users_list_to_dict(users) matches = format_2016_matches('ip_matches_1500_real_False.txt') # double check that we have a valid match set for u in matches.keys(): for m in matches[u]: assert (users_dict[u].is_compatibile(users_dict[m])) user.sort_all_match_lists(matches, users_dict) # check on how many matches people actually have user.analyze_num_matches(matches, users_dict) for i in range(1, 4, 1): print '\033[95m#####################################' print 'TOP %s MATCHES' % i print '#####################################\033[0m' #check the utility values from rank perspective and distance perspective -- in two separate functions user.analyze_rank_utility(matches, users_dict, i) user.analyze_distance_utility(matches, users_dict, i)
def run_mtm_da_for_all(): # random users... # users = user.gen_users(500) # users = user.calc_prefs(users, save=False) # users = user.filter_prefs(users) # saved random users... users = user.load_users('random_data_1500.txt') users = user.load_features(users, 'random_features_1500.txt') users = user.load_prefs(users, 'random_prefs_1500.txt') users = user.filter_prefs(users) # or actual users...? # users = user.load_users('anon_data_2016.txt') # users = user.load_features(users, 'features_2016.txt') # users = user.load_prefs(users, 'preferences_2016.txt') # users = user.filter_prefs(users) users_dict = user.map_users_list_to_dict(users) all_users_ids = users_dict.keys() #print "id: %d, prefs: %s" % (users[0].id, users[0].prefs[0:10]) #for u in users[0].prefs[0:40]: # print "id: %d, dist: %s" % (u, users[0].dist(users_dict[u])) #define parameters overall_target_min = 10 mixing_ratio = 0.5 #set up dictionary to hold everyone's matchings #divide population into particular groups #record ids for different groups matches = {} homo_male = [] homo_female = [] heter_male = [] heter_female = [] bi_male = [] bi_female = [] homo_m_id = [] homo_f_id = [] heter_m_id = [] heter_f_id = [] bi_m_id = [] bi_f_id = [] for u in users: matches[u.id] = set() if u.gender == 0: if u.seeking == 0: homo_male.append(u) homo_m_id.append(u.id) elif u.seeking == 1: heter_male.append(u) heter_m_id.append(u.id) else: bi_male.append(u) bi_m_id.append(u.id) else: if u.seeking == 0: heter_female.append(u) heter_f_id.append(u.id) elif u.seeking == 1: homo_female.append(u) homo_f_id.append(u.id) else: bi_female.append(u) bi_f_id.append(u.id) ''' print len(homo_m_id) print len(bi_m_id) print len(homo_f_id) print len(bi_m_id) print len(heter_m_id) print len(heter_f_id) ''' #temporarily truncate for test reasons #heter_male = heter_male[:100] #heter_female = heter_female[:150] #heter_m_id = heter_m_id[:100] #heter_f_id = heter_f_id[:150] #call the iterated DA functions in 6 stages print colored("Computing matchings for homosexual & bisexual males...", 'magenta', attrs=['bold']) matches = mtm_da_within_group((homo_male + bi_male), (homo_m_id + bi_m_id), int(overall_target_min * mixing_ratio), matches, all_users_ids) print colored("Computing matchings for homosexual & bisexual females...", 'magenta', attrs=['bold']) matches = mtm_da_within_group( (homo_female + bi_female), (homo_f_id + bi_f_id), int(overall_target_min * mixing_ratio), matches, all_users_ids) print colored("Computing matchings for homosexual males...", 'magenta', attrs=['bold']) matches = mtm_da_within_group( homo_male, homo_m_id, int(overall_target_min * (1.0 - mixing_ratio)), matches, all_users_ids) print colored("Computing matchings for homosexual females...", 'magenta', attrs=['bold']) matches = mtm_da_within_group( homo_female, homo_f_id, int(overall_target_min * (1.0 - mixing_ratio)), matches, all_users_ids) print colored( "Computing matchings for bisexual & heterosexual males & females...", 'magenta', attrs=['bold']) matches = mtm_da_between_groups( (heter_male + bi_male), (heter_m_id + bi_m_id), (heter_female + bi_female), (heter_f_id + bi_f_id), int(overall_target_min * (1.0 - mixing_ratio)), matches, all_users_ids) # matches = mtm_da_between_groups((heter_female + bi_female), (heter_f_id + bi_f_id), (heter_male + bi_male), (heter_m_id + bi_m_id), int(overall_target_min * (1.0 - mixing_ratio)), matches, all_users_ids) print colored("Computing matchings for heterosexual males & females...", 'magenta', attrs=['bold']) matches = mtm_da_between_groups(heter_male, heter_m_id, heter_female, heter_f_id, int(overall_target_min * mixing_ratio), matches, all_users_ids) # matches = mtm_da_between_groups(heter_female, heter_f_id, heter_male, heter_m_id, int(overall_target_min * mixing_ratio), matches, all_users_ids) #create ranked list for each person by sorting their matches user.sort_all_match_lists(matches, users_dict) print colored("Matching completed!", 'red', 'on_green', attrs=['bold']) #check on how many matches people actually have user.analyze_num_matches(matches, users_dict) for i in range(1, 4, 1): print '\033[95m#####################################' print 'TOP %s MATCHES' % i print '#####################################\033[0m' #check the utility values from rank perspective and distance perspective -- in two separate functions user.analyze_rank_utility(matches, users_dict, i) user.analyze_distance_utility(matches, users_dict, i)
def run_iter_da_for_all(): # random users... # users = user.gen_users(500) # users = user.calc_prefs(users, save=False) # users = user.filter_prefs(users) # saved random users... users = user.load_users('random_data_1500.txt') users = user.load_features(users, 'random_features_1500.txt') users = user.load_prefs(users, 'random_prefs_1500.txt') users = user.filter_prefs(users) # or actual users...? # users = user.load_users('anon_data_2016.txt') # users = user.load_features(users, 'features_2016.txt') # users = user.load_prefs(users, 'preferences_2016.txt') # users = user.filter_prefs(users) users_dict = user.map_users_list_to_dict(users) all_users_ids = users_dict.keys() ######## PARAMETERS ############################### mixing_ratio = 0.6 # proportion of the matches that come from the different stages overall_female_min = 8 # overall_female_min * mixing_ratio & overall_female_min * (1-mixing_ratio) are lower bounds on # matches for females in between groups algo overall_male_min = 1 # overall_male_min * mixing_ratio & overall_male_min * (1-mixing_ratio) are lower bounds on # matches for males in between groups algo overall_within_group_min = 9 # overall_within_group_min * mixing_ratio & overall_within_group_min * (1-mixing_ratio) are lower bounds on # matches for within group algo # (These approximately translate to lower bounds on # matches overall) dropout_female_factor = 1.2 # scalar multiple to set diff. btwn. female min & max # of matches in between groups algo dropout_male_factor = 18.0 # scalar multiple to set diff. btwn. male min & max # of matches in between groups algo dropout_within_group_factor = 1.3 # scalar multiple to set diff. btwn. min & max # of matches in within groups algo # (max # of matches <=> user dropping out in the algo) ''' GROUPING Sort users into preference groups Record ids for each group Set up dictionary to hold everyone's matchings ''' matches = {} homo_male = [] homo_female = [] heter_male = [] heter_female = [] bi_male = [] bi_female = [] homo_m_id = [] homo_f_id = [] heter_m_id = [] heter_f_id = [] bi_m_id = [] bi_f_id = [] for u in users: matches[u.id] = [] if u.gender == 0: if u.seeking == 0: homo_male.append(u) homo_m_id.append(u.id) elif u.seeking == 1: heter_male.append(u) heter_m_id.append(u.id) else: bi_male.append(u) bi_m_id.append(u.id) else: if u.seeking == 0: heter_female.append(u) heter_f_id.append(u.id) elif u.seeking == 1: homo_female.append(u) homo_f_id.append(u.id) else: bi_female.append(u) bi_f_id.append(u.id) #print len(homo_m_id) #print len(bi_m_id) #print len(homo_f_id) #print len(bi_f_id) #print len(heter_m_id) #print len(heter_f_id) # temporarily truncate for test reasons #heter_male = heter_male[:163] #heter_female = heter_female[:199] #heter_m_id = heter_m_id[:163] #heter_f_id = heter_f_id[:199] ''' ITERATED DA Find matches in 6 stages ''' print colored("Computing matchings for homosexual & bisexual males...", 'magenta', attrs=['bold']) min_target = (overall_within_group_min * mixing_ratio) matches = iter_da_within_group((homo_male + bi_male), (homo_m_id + bi_m_id), min_target, min_target*dropout_within_group_factor, matches, all_users_ids) print colored("Computing matchings for homosexual & bisexual females...", 'magenta', attrs=['bold']) min_target = (overall_within_group_min * mixing_ratio) matches = iter_da_within_group((homo_female + bi_female), (homo_f_id + bi_f_id), min_target, min_target*dropout_within_group_factor, matches, all_users_ids) print colored("Computing matchings for homosexual males...", 'magenta', attrs=['bold']) min_target = (overall_within_group_min * (1.0 - mixing_ratio)) matches = iter_da_within_group(homo_male, homo_m_id, min_target, min_target*dropout_within_group_factor, matches, all_users_ids) print colored("Computing matchings for homosexual females...", 'magenta', attrs=['bold']) min_target = (overall_within_group_min * (1.0 - mixing_ratio)) matches = iter_da_within_group(homo_female, homo_f_id, min_target, min_target*dropout_within_group_factor, matches, all_users_ids) print colored("Computing matchings for bisexual & heterosexual males & females...", 'magenta', attrs=['bold']) min_target_female = (overall_female_min * (1.0 - mixing_ratio)) min_target_male = (overall_male_min * (1.0 - mixing_ratio)) #matches = iter_da_between_groups((heter_male + bi_male), (heter_m_id + bi_m_id), (heter_female + bi_female), (heter_f_id + bi_f_id), min_target_male, min_target_female, min_target_male * dropout_male_factor, min_target_female * dropout_female_factor, matches, all_users_ids) matches = iter_da_between_groups((heter_female + bi_female), (heter_f_id + bi_f_id), (heter_male + bi_male), (heter_m_id + bi_m_id), min_target_female, min_target_male, min_target_female * dropout_female_factor, min_target_male * dropout_male_factor, matches, all_users_ids) print colored("Computing matchings for heterosexual males & females...", 'magenta', attrs=['bold']) min_target_female = (overall_female_min * (mixing_ratio)) min_target_male = (overall_male_min * (mixing_ratio)) #matches = iter_da_between_groups(heter_male, heter_m_id, heter_female, heter_f_id, min_target_male, min_target_female, min_target_male * dropout_male_factor, min_target_female * dropout_female_factor, matches, all_users_ids) matches = iter_da_between_groups(heter_female, heter_f_id, heter_male, heter_m_id, min_target_female, min_target_male, min_target_female * dropout_female_factor, min_target_male * dropout_male_factor, matches, all_users_ids) # create ranked list for each person by sorting their matches user.sort_all_match_lists(matches, users_dict) print colored("Matching completed!", 'red', 'on_green', attrs=['bold']) # check on how many matches people actually have user.analyze_num_matches(matches, users_dict) for i in range(1, 4, 1): print '\033[95m#####################################' print 'TOP %s MATCHES' % i print '#####################################\033[0m' #check the utility values from rank perspective and distance perspective -- in two separate functions user.analyze_rank_utility(matches, users_dict, i) user.analyze_distance_utility(matches, users_dict, i)