configParser = ConfigParser.RawConfigParser() configParser.read(configFilePath) dataset = configParser.get('shared', 'd') num_users = int(configParser.get('shared', 'num_users')) dimension = int(configParser.get('shared', 'dimension')) rec_per_user = int(configParser.get('shared', 'rec_per_user')) num_votes = int(configParser.get('shared', 'v')) train_ratio = float(configParser.get('experiments', 'train_ratio')) res_path = 'YOUR PATH' graph_prefix = 'graph_d_' + str(dimension) scores_prefix = 'pr_users_scores_d_' + str(dimension) if experiment_mode == 'S': graph_prefix += '_rec_' + str(rec_per_user) + '_v_' + str(num_votes) scores_prefix += '_rec_' + str(rec_per_user) + '_v_' + str(num_votes) graph_file = res_path + get_file_name(graph_prefix, num_users, train_ratio, 'gml') graph = nx.read_gml(graph_file, destringizer=literal_destringizer) users = [] items = [] for node in graph.nodes(): if graph.node[node]['type'] == 'user': users.append(node) if graph.node[node]['type'] == 'item': items.append(node) # ------------ user pagerank -------------- print 'start computing pagerank' t1 = time.time() res = compute_pagerank(graph, users, alpha, epsilon) print 'finished user pagerank', time.time() - t1
configParser.read(configFilePath) dataset = configParser.get('shared', 'd') exp_per_rec = int(configParser.get('shared', 'e')) item_per_pair = int(configParser.get('shared', 'i')) num_votes = int(configParser.get('shared', 'v')) dimension = int(configParser.get('shared', 'dimension')) rec_per_user = int(configParser.get('shared', 'rec_per_user')) folder = configParser.get('shared', 'folder') num_users = configParser.get('shared', 'num_users') feedback_ratio = float(configParser.get('experiments', 'feedback_ratio')) train_ratio = float(configParser.get('experiments', 'train_ratio')) test_ratio = float(configParser.get('experiments', 'test_ratio')) res_path = 'YOUR PATH' setup_postfix = '_rec_' + str(rec_per_user) + '_v_' + str(num_votes) test_file = res_path + get_file_name('test', num_users, test_ratio) if test_strategy == "sample": sample_test_file = res_path + get_file_name('test_sample', num_users, test_ratio) users_samples = read_random_recs(sample_test_file) #rwr_feedback_file = res_path + folder + get_file_name('RWR_simulated_feedback', num_users, feedback_ratio) graph_file = res_path + get_file_name('graph', num_users, train_ratio, 'gml') graph = nx.read_gml(graph_file, destringizer=literal_destringizer) updated_graph_file = res_path + get_file_name( 'graph_d_' + str(dimension) + setup_postfix, num_users, train_ratio, 'gml') if os.path.exists(updated_graph_file): updated_graph = nx.read_gml(updated_graph_file, destringizer=literal_destringizer)
# read the book description book_desc_file = path + "books_descriptions.txt" books_desc_text = {} with open(book_desc_file, 'r') as f_in: next(f_in) for line in f_in: tabs = line.strip().split('\t') if len(tabs) >= 2: books_desc_text[tabs[0]] = tabs[1].replace(''', '') else: books_desc_text[tabs[0]] = 'No description found.' # ---------------------------------- phase2 --------------------------------- if phase == 2: # read recs and explanations for RWR users_scores_file = path + get_file_name('pr_users_scores_d_'+str(dimension), num_users, train_ratio) items_scores_file = path + get_file_name('pr_items_scores_d_'+str(dimension), num_users, train_ratio) graph_file = path + get_file_name('graph_d_'+str(dimension), num_users, train_ratio, 'gml') graph = nx.read_gml(graph_file, destringizer=literal_destringizer) train_data = get_users_data(path + get_file_name('train_d_' + str(dimension), num_users, train_ratio)) rwr_ur = top_k_recs_rwr(users_scores_file, graph, rec_per_user) users = rwr_ur.keys() rwr_ure_good = explanation_items_rwr(items_scores_file, graph, rwr_ur, exp_per_rec) rwr_ure_bad = explanation_items_rwr(items_scores_file, graph, rwr_ur, exp_per_rec, location='bottom') # merging data {user: {rec: {explanations: {exp: {M: R: S: P: TW: BW: TF: BF:}} M: R:}}} output_data = {} for user in users: output_data[user] = {} # merge recs output_data = add_rec_exps(user, output_data, rwr_ure_good, model='R', location='TW')
feature_reduction = 'nmf' model = 'RWR' # read features if model == 'RWR': feature_file = path + dataset + "-data-" + feature_reduction + "-features-" + str( dimension) + ".csv" features = utils.read_features(feature_file, normalized=True) # ---------------- only items ------------------------ # generate extended training recs_file = path + 'phase2/' + model + '_rated_recs_phase_2.txt' main_train_file = path + 'train_users_' + str( num_users) + '_partition_100.txt' file_prefix = 'train_rec_' + str(rec_per_user) + '_v_' + str(num_votes) new_train_file = path + get_file_name(file_prefix, num_users, 1) lines_1 = [] with open(main_train_file, 'r') as f_in: for line in f_in: lines_1.append(line.strip()) lines_2 = [] with open(recs_file, 'r') as f_in: next(f_in) for line in f_in: lines_2.append(line.strip()) with open(new_train_file, 'w') as f_out: for line in lines_1: f_out.write(line) f_out.write('\n') for line in lines_2: f_out.write(line)
folder_name = configParser.get('shared', 'folder') sim_threshold = float(configParser.get('feedback_inc', 'sim_threshold')) num_vectors = int(configParser.get('feedback_inc', 'num_vectors')) mode = configParser.get('feedback_inc', 'mode') beta = float(configParser.get('shared', 'beta')) feedback_ratio = float(configParser.get('experiments', 'feedback_ratio')) train_ratio = float(configParser.get('experiments', 'train_ratio')) num_users = int(configParser.get('shared', 'num_users')) res_path = 'YOUR PATH' input_output_path = res_path + folder_name # build interaction graph graph_prefix = 'graph_d_' + str(dimension) if experiment_mode == 'SP': graph_prefix += '_rec_' + str(rec_per_user) + '_v_' + str(num_votes) graph_file = res_path + get_file_name(graph_prefix, num_users, train_ratio, 'gml') graph_nx = nx.read_gml(graph_file, destringizer=literal_destringizer) graph = InteractionGraph() graph.set_graph(graph_nx) # read the weight vectors and item festures weight_file = input_output_path + get_file_name( 'user_weight_vector_learned', num_users, train_ratio) if exp_loc == 'bottom': weight_file = input_output_path + get_file_name( 'user_weight_vector_learned_bottom', num_users, train_ratio) feature_file = res_path + dataset + "-" + feature_reduction + "-features-" + str( dimension) + ".csv" all_features = read_features(feature_file, normalized=True) user_weights = np.genfromtxt(weight_file, delimiter=',') print 'read features and weights'
exp_per_rec = int(configParser.get('shared', 'e')) item_per_pair = int(configParser.get('shared', 'i')) num_votes = int(configParser.get('shared', 'v')) folder_name = configParser.get('shared', 'folder') learning_rate = float(configParser.get('update_sim_unconstr', 'learning_rate')) weight_decay = float(configParser.get('update_sim_unconstr', 'weight_decay')) n_epoch = int(configParser.get('update_sim_unconstr', 'n_epoch')) mode = configParser.get('feedback_inc', 'mode') feedback_ratio = float(configParser.get('experiments', 'feedback_ratio')) train_ratio = float(configParser.get('experiments', 'train_ratio')) path = 'YOUR PATH' res_path = path + folder_name feature_file = path + dataset + "-" + feature_reduction + "-features-" + str(dimension) + ".csv" simulated_feedback_file = res_path + get_file_name(model + '_simulated_feedback_d_'+ str(dimension), num_users, feedback_ratio) if exp_loc == 'bottom': simulated_feedback_file = res_path + get_file_name(model + '_simulated_feedback_bottom', num_users, feedback_ratio) print 'read bottom' points_numpy = utils.read_features(feature_file, normalized=True) item_id_map = {int(points_numpy[i, 0]): i for i in range(points_numpy.shape[0])} points = torch.from_numpy(points_numpy[:, 1:].T) print 'started computing cross products' points_cross_points = torch.matmul(torch.t(points), points) dimension = points.shape[0] num_items = points.shape[1] # read the feedback pairs users_feedback_pairs = {} users = [] print 'started reading the pairs'
num_users = int(configParser.get('shared', 'num_users')) dimension = int(configParser.get('shared', 'dimension')) rec_per_user = int(configParser.get('shared', 'rec_per_user')) num_votes = int(configParser.get('shared', 'v')) train_partition = float(configParser.get('experiments', 'train_ratio')) sim_threshold = float(configParser.get('feedback_inc', 'sim_threshold')) beta = float(configParser.get('shared', 'beta')) sim_file = 'item-item-similarity-' + str(dimension) + '.txt' train_prefix = 'train' graph_prefix = 'graph_d_' + str(dimension) if experiment_mode == 'S': # the graph contains user's feedback on recommendations train_prefix = 'train_d_' + str(dimension) train_prefix += '_rec_' + str(rec_per_user) + '_v_' + str(num_votes) graph_prefix += '_rec_' + str(rec_per_user) + '_v_' + str(num_votes) interactions_file = get_file_name(train_prefix, num_users, train_partition) print 'interaction file', interactions_file graph_file = get_file_name(graph_prefix, num_users, train_partition, 'gml') dataset_path = "YOUR_PATH" + dataset + "-data/" res_path = "YOUR_PATH" + dataset + "-data/" # load item names items_name = {} item_names_file = 'id_link_map.txt' # items.txt file with reversed order of columns delimiter = '\t' name_location = 1 with open(dataset_path + item_names_file, 'r') as f_in: next(f_in) for line in f_in: tabs = line.strip().split(delimiter)