Ejemplo n.º 1
0
def analyze_2016_algo():

    # real users
    # users = user.load_users('anon_data_2016.txt')
    # users = user.load_features(users, 'features_2016.txt')
    # users = user.load_prefs(users, 'preferences_2016.txt')
    # users = user.filter_prefs(users)
    #
    # users_dict = user.map_users_list_to_dict(users)
    #
    # matches = format_2016_matches('matches_2016.txt')
    # for u_id in matches:
    #     if (3148 in matches[u_id]):
    #         matches[u_id].remove(3148)
    #
    # del matches[3148]

    # or saved random users
    users = user.load_users('random_data_1500.txt')
    users = user.load_features(users, 'random_features_1500.txt')
    users = user.load_prefs(users, 'random_prefs_1500.txt')
    users = user.filter_prefs(users)
    users_dict = user.map_users_list_to_dict(users)
    matches = format_2016_matches('ip_matches_1500_real_False.txt')

    # double check that we have a valid match set
    for u in matches.keys():
        for m in matches[u]:
            assert(users_dict[u].is_compatibile(users_dict[m]))

    user.sort_all_match_lists(matches, users_dict)
    # check on how many matches people actually have
    user.analyze_num_matches(matches, users_dict)

    for i in range(1, 4, 1):
        print '\033[95m#####################################'
        print 'TOP %s MATCHES' % i
        print '#####################################\033[0m'
        #check the utility values from rank perspective and distance perspective -- in two separate functions
        user.analyze_rank_utility(matches, users_dict, i)
        user.analyze_distance_utility(matches, users_dict, i)
Ejemplo n.º 2
0
def analyze_2016_algo():

    # real users
    # users = user.load_users('anon_data_2016.txt')
    # users = user.load_features(users, 'features_2016.txt')
    # users = user.load_prefs(users, 'preferences_2016.txt')
    # users = user.filter_prefs(users)
    #
    # users_dict = user.map_users_list_to_dict(users)
    #
    # matches = format_2016_matches('matches_2016.txt')
    # for u_id in matches:
    #     if (3148 in matches[u_id]):
    #         matches[u_id].remove(3148)
    #
    # del matches[3148]

    # or saved random users
    users = user.load_users('random_data_1500.txt')
    users = user.load_features(users, 'random_features_1500.txt')
    users = user.load_prefs(users, 'random_prefs_1500.txt')
    users = user.filter_prefs(users)
    users_dict = user.map_users_list_to_dict(users)
    matches = format_2016_matches('ip_matches_1500_real_False.txt')

    # double check that we have a valid match set
    for u in matches.keys():
        for m in matches[u]:
            assert (users_dict[u].is_compatibile(users_dict[m]))

    user.sort_all_match_lists(matches, users_dict)
    # check on how many matches people actually have
    user.analyze_num_matches(matches, users_dict)

    for i in range(1, 4, 1):
        print '\033[95m#####################################'
        print 'TOP %s MATCHES' % i
        print '#####################################\033[0m'
        #check the utility values from rank perspective and distance perspective -- in two separate functions
        user.analyze_rank_utility(matches, users_dict, i)
        user.analyze_distance_utility(matches, users_dict, i)
Ejemplo n.º 3
0
Archivo: mtm_da.py Proyecto: frw/CS136
def run_mtm_da_for_all():

    # random users...
    # users = user.gen_users(500)
    # users = user.calc_prefs(users, save=False)
    # users = user.filter_prefs(users)

    # saved random users...
    users = user.load_users('random_data_1500.txt')
    users = user.load_features(users, 'random_features_1500.txt')
    users = user.load_prefs(users, 'random_prefs_1500.txt')
    users = user.filter_prefs(users)

    # or actual users...?
    # users = user.load_users('anon_data_2016.txt')
    # users = user.load_features(users, 'features_2016.txt')
    # users = user.load_prefs(users, 'preferences_2016.txt')
    # users = user.filter_prefs(users)

    users_dict = user.map_users_list_to_dict(users)
    all_users_ids = users_dict.keys()

    #print "id: %d, prefs: %s" % (users[0].id, users[0].prefs[0:10])
    #for u in users[0].prefs[0:40]:
    #    print "id: %d, dist: %s" % (u, users[0].dist(users_dict[u]))

    #define parameters
    overall_target_min = 10
    mixing_ratio = 0.5

    #set up dictionary to hold everyone's matchings
    #divide population into particular groups
    #record ids for different groups
    matches = {}
    homo_male = []
    homo_female = []
    heter_male = []
    heter_female = []
    bi_male = []
    bi_female = []
    homo_m_id = []
    homo_f_id = []
    heter_m_id = []
    heter_f_id = []
    bi_m_id = []
    bi_f_id = []
    for u in users:

        matches[u.id] = set()
        if u.gender == 0:
            if u.seeking == 0:
                homo_male.append(u)
                homo_m_id.append(u.id)
            elif u.seeking == 1:
                heter_male.append(u)
                heter_m_id.append(u.id)
            else:
                bi_male.append(u)
                bi_m_id.append(u.id)
        else:
            if u.seeking == 0:
                heter_female.append(u)
                heter_f_id.append(u.id)
            elif u.seeking == 1:
                homo_female.append(u)
                homo_f_id.append(u.id)
            else:
                bi_female.append(u)
                bi_f_id.append(u.id)
    '''
    print len(homo_m_id)
    print len(bi_m_id)
    print len(homo_f_id)
    print len(bi_m_id)
    print len(heter_m_id)
    print len(heter_f_id)
    '''

    #temporarily truncate for test reasons
    #heter_male = heter_male[:100]
    #heter_female = heter_female[:150]
    #heter_m_id = heter_m_id[:100]
    #heter_f_id = heter_f_id[:150]

    #call the iterated DA functions in 6 stages
    print colored("Computing matchings for homosexual & bisexual males...",
                  'magenta',
                  attrs=['bold'])
    matches = mtm_da_within_group((homo_male + bi_male), (homo_m_id + bi_m_id),
                                  int(overall_target_min * mixing_ratio),
                                  matches, all_users_ids)
    print colored("Computing matchings for homosexual & bisexual females...",
                  'magenta',
                  attrs=['bold'])
    matches = mtm_da_within_group(
        (homo_female + bi_female), (homo_f_id + bi_f_id),
        int(overall_target_min * mixing_ratio), matches, all_users_ids)
    print colored("Computing matchings for homosexual males...",
                  'magenta',
                  attrs=['bold'])
    matches = mtm_da_within_group(
        homo_male, homo_m_id, int(overall_target_min * (1.0 - mixing_ratio)),
        matches, all_users_ids)
    print colored("Computing matchings for homosexual females...",
                  'magenta',
                  attrs=['bold'])
    matches = mtm_da_within_group(
        homo_female, homo_f_id, int(overall_target_min * (1.0 - mixing_ratio)),
        matches, all_users_ids)
    print colored(
        "Computing matchings for bisexual & heterosexual males & females...",
        'magenta',
        attrs=['bold'])
    matches = mtm_da_between_groups(
        (heter_male + bi_male), (heter_m_id + bi_m_id),
        (heter_female + bi_female), (heter_f_id + bi_f_id),
        int(overall_target_min * (1.0 - mixing_ratio)), matches, all_users_ids)
    # matches = mtm_da_between_groups((heter_female + bi_female), (heter_f_id + bi_f_id), (heter_male + bi_male), (heter_m_id + bi_m_id), int(overall_target_min * (1.0 - mixing_ratio)), matches, all_users_ids)
    print colored("Computing matchings for heterosexual males & females...",
                  'magenta',
                  attrs=['bold'])
    matches = mtm_da_between_groups(heter_male, heter_m_id, heter_female,
                                    heter_f_id,
                                    int(overall_target_min * mixing_ratio),
                                    matches, all_users_ids)
    # matches = mtm_da_between_groups(heter_female, heter_f_id, heter_male, heter_m_id, int(overall_target_min * mixing_ratio), matches, all_users_ids)

    #create ranked list for each person by sorting their matches
    user.sort_all_match_lists(matches, users_dict)
    print colored("Matching completed!", 'red', 'on_green', attrs=['bold'])
    #check on how many matches people actually have
    user.analyze_num_matches(matches, users_dict)

    for i in range(1, 4, 1):
        print '\033[95m#####################################'
        print 'TOP %s MATCHES' % i
        print '#####################################\033[0m'
        #check the utility values from rank perspective and distance perspective -- in two separate functions
        user.analyze_rank_utility(matches, users_dict, i)
        user.analyze_distance_utility(matches, users_dict, i)
Ejemplo n.º 4
0
Archivo: iter_da.py Proyecto: frw/CS136
def run_iter_da_for_all():

    # random users...
    # users = user.gen_users(500)
    # users = user.calc_prefs(users, save=False)
    # users = user.filter_prefs(users)

    # saved random users...
    users = user.load_users('random_data_1500.txt')
    users = user.load_features(users, 'random_features_1500.txt')
    users = user.load_prefs(users, 'random_prefs_1500.txt')
    users = user.filter_prefs(users)

    # or actual users...?
    # users = user.load_users('anon_data_2016.txt')
    # users = user.load_features(users, 'features_2016.txt')
    # users = user.load_prefs(users, 'preferences_2016.txt')
    # users = user.filter_prefs(users)

    users_dict = user.map_users_list_to_dict(users)
    all_users_ids = users_dict.keys()

    ########    PARAMETERS    ###############################
    mixing_ratio = 0.6 # proportion of the matches that come from the different stages

    overall_female_min = 8 # overall_female_min * mixing_ratio & overall_female_min * (1-mixing_ratio) are lower bounds on # matches for females in between groups algo
    overall_male_min = 1 # overall_male_min * mixing_ratio & overall_male_min * (1-mixing_ratio) are lower bounds on # matches for males in between groups algo
    overall_within_group_min = 9 # overall_within_group_min * mixing_ratio & overall_within_group_min * (1-mixing_ratio) are lower bounds on # matches for within group algo
    # (These approximately translate to lower bounds on # matches overall)

    dropout_female_factor = 1.2 # scalar multiple to set diff. btwn. female min & max # of matches in between groups algo
    dropout_male_factor = 18.0 # scalar multiple to set diff. btwn. male min & max # of matches in between groups algo
    dropout_within_group_factor = 1.3 # scalar multiple to set diff. btwn. min & max # of matches in within groups algo
    # (max # of matches <=> user dropping out in the algo)

    '''
    GROUPING
    Sort users into preference groups
    Record ids for each group
    Set up dictionary to hold everyone's matchings
    '''

    matches = {}
    homo_male = []
    homo_female = []
    heter_male = []
    heter_female = []
    bi_male = []
    bi_female = []
    homo_m_id = []
    homo_f_id = []
    heter_m_id = []
    heter_f_id = []
    bi_m_id = []
    bi_f_id = []
    for u in users:
        matches[u.id] = []
        if u.gender == 0:
            if u.seeking == 0:
                homo_male.append(u)
                homo_m_id.append(u.id)
            elif u.seeking == 1:
                heter_male.append(u)
                heter_m_id.append(u.id)
            else:
                bi_male.append(u)
                bi_m_id.append(u.id)
        else:
            if u.seeking == 0:
                heter_female.append(u)
                heter_f_id.append(u.id)
            elif u.seeking == 1:
                homo_female.append(u)
                homo_f_id.append(u.id)
            else:
                bi_female.append(u)
                bi_f_id.append(u.id)


    #print len(homo_m_id)
    #print len(bi_m_id)
    #print len(homo_f_id)
    #print len(bi_f_id)
    #print len(heter_m_id)
    #print len(heter_f_id)

    # temporarily truncate for test reasons
    #heter_male = heter_male[:163]
    #heter_female = heter_female[:199]
    #heter_m_id = heter_m_id[:163]
    #heter_f_id = heter_f_id[:199]

    '''
    ITERATED DA
    Find matches in 6 stages
    '''
    print colored("Computing matchings for homosexual & bisexual males...", 'magenta', attrs=['bold'])
    min_target = (overall_within_group_min * mixing_ratio)
    matches = iter_da_within_group((homo_male + bi_male), (homo_m_id + bi_m_id), min_target, min_target*dropout_within_group_factor, matches, all_users_ids)
    print colored("Computing matchings for homosexual & bisexual females...", 'magenta', attrs=['bold'])
    min_target = (overall_within_group_min * mixing_ratio)
    matches = iter_da_within_group((homo_female + bi_female), (homo_f_id + bi_f_id), min_target, min_target*dropout_within_group_factor, matches, all_users_ids)
    print colored("Computing matchings for homosexual males...", 'magenta', attrs=['bold'])
    min_target = (overall_within_group_min * (1.0 - mixing_ratio))
    matches = iter_da_within_group(homo_male, homo_m_id, min_target, min_target*dropout_within_group_factor, matches, all_users_ids)
    print colored("Computing matchings for homosexual females...", 'magenta', attrs=['bold'])
    min_target = (overall_within_group_min * (1.0 - mixing_ratio))
    matches = iter_da_within_group(homo_female, homo_f_id, min_target, min_target*dropout_within_group_factor, matches, all_users_ids)
    print colored("Computing matchings for bisexual & heterosexual males & females...", 'magenta', attrs=['bold'])
    min_target_female = (overall_female_min * (1.0 - mixing_ratio))
    min_target_male = (overall_male_min * (1.0 - mixing_ratio))
    #matches = iter_da_between_groups((heter_male + bi_male), (heter_m_id + bi_m_id), (heter_female + bi_female), (heter_f_id + bi_f_id), min_target_male, min_target_female, min_target_male * dropout_male_factor, min_target_female * dropout_female_factor, matches, all_users_ids)
    matches = iter_da_between_groups((heter_female + bi_female), (heter_f_id + bi_f_id), (heter_male + bi_male), (heter_m_id + bi_m_id), min_target_female, min_target_male, min_target_female * dropout_female_factor, min_target_male * dropout_male_factor, matches, all_users_ids)
    print colored("Computing matchings for heterosexual males & females...", 'magenta', attrs=['bold'])
    min_target_female = (overall_female_min * (mixing_ratio))
    min_target_male = (overall_male_min * (mixing_ratio))
    #matches = iter_da_between_groups(heter_male, heter_m_id, heter_female, heter_f_id, min_target_male, min_target_female, min_target_male * dropout_male_factor, min_target_female * dropout_female_factor, matches, all_users_ids)
    matches = iter_da_between_groups(heter_female, heter_f_id, heter_male, heter_m_id, min_target_female, min_target_male, min_target_female * dropout_female_factor, min_target_male * dropout_male_factor, matches, all_users_ids)

    # create ranked list for each person by sorting their matches
    user.sort_all_match_lists(matches, users_dict)

    print colored("Matching completed!", 'red', 'on_green', attrs=['bold'])

    # check on how many matches people actually have
    user.analyze_num_matches(matches, users_dict)

    for i in range(1, 4, 1):
        print '\033[95m#####################################'
        print 'TOP %s MATCHES' % i
        print '#####################################\033[0m'
        #check the utility values from rank perspective and distance perspective -- in two separate functions
        user.analyze_rank_utility(matches, users_dict, i)
        user.analyze_distance_utility(matches, users_dict, i)