Exemple #1
0
    configParser = ConfigParser.RawConfigParser()
    configParser.read(configFilePath)
    dataset = configParser.get('shared', 'd')
    num_users = int(configParser.get('shared', 'num_users'))
    dimension = int(configParser.get('shared', 'dimension'))
    rec_per_user = int(configParser.get('shared', 'rec_per_user'))
    num_votes = int(configParser.get('shared', 'v'))
    train_ratio = float(configParser.get('experiments', 'train_ratio'))

    res_path = 'YOUR PATH'
    graph_prefix = 'graph_d_' + str(dimension)
    scores_prefix = 'pr_users_scores_d_' + str(dimension)
    if experiment_mode == 'S':
        graph_prefix += '_rec_' + str(rec_per_user) + '_v_' + str(num_votes)
        scores_prefix += '_rec_' + str(rec_per_user) + '_v_' + str(num_votes)
    graph_file = res_path + get_file_name(graph_prefix, num_users, train_ratio,
                                          'gml')
    graph = nx.read_gml(graph_file, destringizer=literal_destringizer)

    users = []
    items = []
    for node in graph.nodes():
        if graph.node[node]['type'] == 'user':
            users.append(node)
        if graph.node[node]['type'] == 'item':
            items.append(node)

    # ------------ user pagerank --------------
    print 'start computing pagerank'
    t1 = time.time()
    res = compute_pagerank(graph, users, alpha, epsilon)
    print 'finished user pagerank', time.time() - t1
Exemple #2
0
    configParser.read(configFilePath)
    dataset = configParser.get('shared', 'd')
    exp_per_rec = int(configParser.get('shared', 'e'))
    item_per_pair = int(configParser.get('shared', 'i'))
    num_votes = int(configParser.get('shared', 'v'))
    dimension = int(configParser.get('shared', 'dimension'))
    rec_per_user = int(configParser.get('shared', 'rec_per_user'))
    folder = configParser.get('shared', 'folder')
    num_users = configParser.get('shared', 'num_users')
    feedback_ratio = float(configParser.get('experiments', 'feedback_ratio'))
    train_ratio = float(configParser.get('experiments', 'train_ratio'))
    test_ratio = float(configParser.get('experiments', 'test_ratio'))

    res_path = 'YOUR PATH'
    setup_postfix = '_rec_' + str(rec_per_user) + '_v_' + str(num_votes)
    test_file = res_path + get_file_name('test', num_users, test_ratio)
    if test_strategy == "sample":
        sample_test_file = res_path + get_file_name('test_sample', num_users,
                                                    test_ratio)
        users_samples = read_random_recs(sample_test_file)
    #rwr_feedback_file = res_path + folder + get_file_name('RWR_simulated_feedback', num_users, feedback_ratio)
    graph_file = res_path + get_file_name('graph', num_users, train_ratio,
                                          'gml')
    graph = nx.read_gml(graph_file, destringizer=literal_destringizer)
    updated_graph_file = res_path + get_file_name(
        'graph_d_' + str(dimension) + setup_postfix, num_users, train_ratio,
        'gml')
    if os.path.exists(updated_graph_file):
        updated_graph = nx.read_gml(updated_graph_file,
                                    destringizer=literal_destringizer)
Exemple #3
0
        # read the book description
        book_desc_file = path + "books_descriptions.txt"
        books_desc_text = {}
        with open(book_desc_file, 'r') as f_in:
            next(f_in)
            for line in f_in:
                tabs = line.strip().split('\t')
                if len(tabs) >= 2:
                    books_desc_text[tabs[0]] = tabs[1].replace(''', '')
                else:
                    books_desc_text[tabs[0]] = 'No description found.'

    # ---------------------------------- phase2 ---------------------------------
    if phase == 2:
        # read recs and explanations for RWR
        users_scores_file = path + get_file_name('pr_users_scores_d_'+str(dimension), num_users, train_ratio)
        items_scores_file = path + get_file_name('pr_items_scores_d_'+str(dimension), num_users, train_ratio)
        graph_file = path + get_file_name('graph_d_'+str(dimension), num_users, train_ratio, 'gml')
        graph = nx.read_gml(graph_file, destringizer=literal_destringizer)
        train_data = get_users_data(path + get_file_name('train_d_' + str(dimension), num_users, train_ratio))
        rwr_ur = top_k_recs_rwr(users_scores_file, graph, rec_per_user)
        users = rwr_ur.keys()
        rwr_ure_good = explanation_items_rwr(items_scores_file, graph, rwr_ur, exp_per_rec)
        rwr_ure_bad = explanation_items_rwr(items_scores_file, graph, rwr_ur, exp_per_rec, location='bottom')

        # merging data {user: {rec: {explanations: {exp: {M: R: S: P: TW: BW: TF: BF:}} M: R:}}}
        output_data = {}
        for user in users:
            output_data[user] = {}
            # merge recs
            output_data = add_rec_exps(user, output_data, rwr_ure_good, model='R', location='TW')
Exemple #4
0
    feature_reduction = 'nmf'
    model = 'RWR'

    # read features
    if model == 'RWR':
        feature_file = path + dataset + "-data-" + feature_reduction + "-features-" + str(
            dimension) + ".csv"
        features = utils.read_features(feature_file, normalized=True)

    # ---------------- only items ------------------------
    # generate extended training
    recs_file = path + 'phase2/' + model + '_rated_recs_phase_2.txt'
    main_train_file = path + 'train_users_' + str(
        num_users) + '_partition_100.txt'
    file_prefix = 'train_rec_' + str(rec_per_user) + '_v_' + str(num_votes)
    new_train_file = path + get_file_name(file_prefix, num_users, 1)
    lines_1 = []
    with open(main_train_file, 'r') as f_in:
        for line in f_in:
            lines_1.append(line.strip())
    lines_2 = []
    with open(recs_file, 'r') as f_in:
        next(f_in)
        for line in f_in:
            lines_2.append(line.strip())
    with open(new_train_file, 'w') as f_out:
        for line in lines_1:
            f_out.write(line)
            f_out.write('\n')
        for line in lines_2:
            f_out.write(line)
Exemple #5
0
    folder_name = configParser.get('shared', 'folder')
    sim_threshold = float(configParser.get('feedback_inc', 'sim_threshold'))
    num_vectors = int(configParser.get('feedback_inc', 'num_vectors'))
    mode = configParser.get('feedback_inc', 'mode')
    beta = float(configParser.get('shared', 'beta'))
    feedback_ratio = float(configParser.get('experiments', 'feedback_ratio'))
    train_ratio = float(configParser.get('experiments', 'train_ratio'))
    num_users = int(configParser.get('shared', 'num_users'))
    res_path = 'YOUR PATH'
    input_output_path = res_path + folder_name

    # build interaction graph
    graph_prefix = 'graph_d_' + str(dimension)
    if experiment_mode == 'SP':
        graph_prefix += '_rec_' + str(rec_per_user) + '_v_' + str(num_votes)
    graph_file = res_path + get_file_name(graph_prefix, num_users, train_ratio,
                                          'gml')
    graph_nx = nx.read_gml(graph_file, destringizer=literal_destringizer)
    graph = InteractionGraph()
    graph.set_graph(graph_nx)

    # read the weight vectors and item festures
    weight_file = input_output_path + get_file_name(
        'user_weight_vector_learned', num_users, train_ratio)
    if exp_loc == 'bottom':
        weight_file = input_output_path + get_file_name(
            'user_weight_vector_learned_bottom', num_users, train_ratio)
    feature_file = res_path + dataset + "-" + feature_reduction + "-features-" + str(
        dimension) + ".csv"
    all_features = read_features(feature_file, normalized=True)
    user_weights = np.genfromtxt(weight_file, delimiter=',')
    print 'read features and weights'
    exp_per_rec = int(configParser.get('shared', 'e'))
    item_per_pair = int(configParser.get('shared', 'i'))
    num_votes = int(configParser.get('shared', 'v'))
    folder_name = configParser.get('shared', 'folder')
    learning_rate = float(configParser.get('update_sim_unconstr', 'learning_rate'))
    weight_decay = float(configParser.get('update_sim_unconstr', 'weight_decay'))
    n_epoch = int(configParser.get('update_sim_unconstr', 'n_epoch'))
    mode = configParser.get('feedback_inc', 'mode')
    feedback_ratio = float(configParser.get('experiments', 'feedback_ratio'))
    train_ratio = float(configParser.get('experiments', 'train_ratio'))


    path = 'YOUR PATH'
    res_path = path + folder_name
    feature_file = path + dataset + "-" + feature_reduction + "-features-" + str(dimension) + ".csv"
    simulated_feedback_file = res_path + get_file_name(model + '_simulated_feedback_d_'+ str(dimension), num_users,
                                                       feedback_ratio)
    if exp_loc == 'bottom':
        simulated_feedback_file = res_path + get_file_name(model + '_simulated_feedback_bottom', num_users,
                                                           feedback_ratio)
        print 'read bottom'
    points_numpy = utils.read_features(feature_file, normalized=True)
    item_id_map = {int(points_numpy[i, 0]): i for i in range(points_numpy.shape[0])}
    points = torch.from_numpy(points_numpy[:, 1:].T)
    print 'started computing cross products'
    points_cross_points = torch.matmul(torch.t(points), points)
    dimension = points.shape[0]
    num_items = points.shape[1]
    # read the feedback pairs
    users_feedback_pairs = {}
    users = []
    print 'started reading the pairs'
Exemple #7
0
    num_users = int(configParser.get('shared', 'num_users'))
    dimension = int(configParser.get('shared', 'dimension'))
    rec_per_user = int(configParser.get('shared', 'rec_per_user'))
    num_votes = int(configParser.get('shared', 'v'))
    train_partition = float(configParser.get('experiments', 'train_ratio'))
    sim_threshold = float(configParser.get('feedback_inc', 'sim_threshold'))
    beta = float(configParser.get('shared', 'beta'))

    sim_file = 'item-item-similarity-' + str(dimension) + '.txt'
    train_prefix = 'train'
    graph_prefix = 'graph_d_' + str(dimension)
    if experiment_mode == 'S':  # the graph contains user's feedback on recommendations
        train_prefix = 'train_d_' + str(dimension)
        train_prefix += '_rec_' + str(rec_per_user) + '_v_' + str(num_votes)
        graph_prefix += '_rec_' + str(rec_per_user) + '_v_' + str(num_votes)
    interactions_file = get_file_name(train_prefix, num_users, train_partition)
    print 'interaction file', interactions_file
    graph_file = get_file_name(graph_prefix, num_users, train_partition, 'gml')
    dataset_path = "YOUR_PATH" + dataset + "-data/"
    res_path = "YOUR_PATH" + dataset + "-data/"

    # load item names
    items_name = {}
    item_names_file = 'id_link_map.txt'  # items.txt file with reversed order of columns
    delimiter = '\t'
    name_location = 1

    with open(dataset_path + item_names_file, 'r') as f_in:
        next(f_in)
        for line in f_in:
            tabs = line.strip().split(delimiter)