def execute(self): # self.data_path_clean = './data/ml100k/ml100k_train.dat' # self.data_path_attacked = './results/data_attacked/ml100k/ml100k_AUSH_0.data' # path_test = self.data_path_clean.replace('train', 'test') # load real profile matrix dataset_class_real = DataLoader(self.data_path_clean, path_test) train_data_df_real, _, n_users_real, n_items_real = dataset_class_real.load_file_as_dataFrame() train_matrix_real, _ = dataset_class_real.dataFrame_to_matrix(train_data_df_real, n_users_real, n_items_real) train_matrix_real = train_matrix_real.toarray() # load fake profile matrix dataset_class_attacked = DataLoader(self.data_path_attacked, path_test) train_data_df_attacked, _, n_users_attacked, n_items_attacked = dataset_class_attacked.load_file_as_dataFrame() train_matrix_attacked, _ = dataset_class_attacked.dataFrame_to_matrix(train_data_df_attacked, n_users_attacked, n_items_attacked) train_matrix_fake = train_matrix_attacked.toarray()[n_users_real:, :] # cacu item distribution real_item_distribution = self.get_item_distribution(train_matrix_real) fake_item_distribution = self.get_item_distribution(train_matrix_fake) # TVD_distance = self.get_TVD_distance(real_item_distribution, fake_item_distribution) JS_distance = self.get_JS_distance(real_item_distribution, fake_item_distribution) # res_str = 'TVD:%.4f\tJS:%.4f' % (TVD_distance, JS_distance) print('result begin', res_str, 'result end') return TVD_distance, JS_distance
def prepare_data(self): self.path_train = './data/%s/%s_train.dat' % (self.data_set, self.data_set) path_test = './data/%s/%s_test.dat' % (self.data_set, self.data_set) dataset_class = DataLoader(self.path_train, path_test) self.train_data_df, self.test_data_df, self.n_users, self.n_items = dataset_class.load_file_as_dataFrame() train_matrix, _ = dataset_class.dataFrame_to_matrix(self.train_data_df, self.n_users, self.n_items) test_matrix, _ = dataset_class.dataFrame_to_matrix(self.test_data_df, self.n_users, self.n_items) self.train_array, self.test_array = train_matrix.toarray(), test_matrix.toarray() self.data_loader = torch.utils.data.DataLoader(dataset=torch.from_numpy(self.train_array).type(torch.float32), batch_size=self.batch_size_D, shuffle=True, drop_last=True) self.target_users = np.where(self.train_array[:, self.target_id] == 0)[0] attack_target = np.zeros((len(self.target_users), self.n_items)) attack_target[:, self.target_id] = 1.0 self.attack_target = torch.from_numpy(attack_target).type(torch.float32).to(self.device) pass
def execute(self): import numpy as np import matplotlib.pyplot as plt import time # load data # ================================= # path_dir = './results/performance/mid_results/%s' % (self.data_set) # user_embed_path = '%s/%s_NeuMF_%s_%d_user_embed.npy' % ( # path_dir, self.data_set, self.attacker, self.target_id) # # self.x = np.load(user_embed_path) # # # ================================= train_path = './results/data_attacked/%s/%s_%s_%d.data' % ( self.data_set, self.data_set, self.attacker, self.target_id) test_path = './data/%s/%s_test.dat' % (self.data_set, self.data_set) dataset_class_attacked = DataLoader(train_path, test_path) train_data_df_attacked, _, n_users_attacked, n_items_attacked = dataset_class_attacked.load_file_as_dataFrame() train_matrix_attacked, _ = dataset_class_attacked.dataFrame_to_matrix(train_data_df_attacked, n_users_attacked, n_items_attacked) self.x = train_matrix_attacked.toarray() # ================================= Y = np.ones(self.x.shape[0]) Y[-50:] = 0 from sklearn.decomposition import PCA pca = PCA(n_components=2) pca.fit(self.x) data_2d = pca.transform(self.x) # plt.scatter(data_2d[:, 0], data_2d[:, 1], c=Y) # # plt.show() # # exit() # fig_path = "./results/performance/figs/%s/Tsne_%s_%s_%d_profile_pca.png" \ # % (self.data_set, self.attacker, self.recommender, self.target_id) # plt.savefig(fig_path) data_path = "./results/performance/figs/%s/Tsne_%s_%s_%d_profile_pca" \ % (self.data_set, self.attacker, self.recommender, self.target_id) np.save(data_path, data_2d) exit() # ================================== # ================================== Y = np.ones(self.x.shape[0]) Y[-50:] = 0 # (n, d) = self.x.shape # 随机初始化Y y = np.random.randn(n, self.no_dims) # dy梯度 dy = np.zeros((n, self.no_dims)) # iy是什么 iy = np.zeros((n, self.no_dims)) gains = np.ones((n, self.no_dims)) # 对称化 P = self.seach_prob() P = P + np.transpose(P) P = P / np.sum(P) # pij # early exaggeration # pi\j print("T-SNE DURING:%s" % time.clock()) P = P * 4 P = np.maximum(P, 1e-12) # Run iterations for iter in range(self.max_iter): # Compute pairwise affinities sum_y = np.sum(np.square(y), 1) num = 1 / (1 + np.add(np.add(-2 * np.dot(y, y.T), sum_y).T, sum_y)) num[range(n), range(n)] = 0 Q = num / np.sum(num) # qij Q = np.maximum(Q, 1e-12) # Compute gradient # np.tile(A,N) [1],5 [1,1,1,1,1] # pij-qij PQ = P - Q for i in range(n): dy[i, :] = np.sum(np.tile(PQ[:, i] * num[:, i], (self.no_dims, 1)).T * (y[i, :] - y), 0) # Perform the update if iter < 20: momentum = self.initial_momentum else: momentum = self.final_momentum gains = (gains + 0.2) * ((dy > 0) != (iy > 0)) + (gains * 0.8) * ((dy > 0) == (iy > 0)) gains[gains < self.min_gain] = self.min_gain iy = momentum * iy - self.eta * (gains * dy) y = y + iy y = y - np.tile(np.mean(y, 0), (n, 1)) # Compute current value of cost function\ if (iter + 1) % 100 == 0: C = np.sum(P * np.log(P / Q)) print("Iteration ", (iter + 1), ": error is ", C) if (iter + 1) != 100: ratio = C / oldC print("ratio ", ratio) if ratio >= 0.95: break oldC = C # Stop lying about P-values if iter == 100: P = P / 4 print("finished training!") # data_2d = y # plt.scatter(data_2d[:, 0], data_2d[:, 1], c=Y) # plt.show() # fig_path = "./results/performance/figs/%s/Tsne_%s_%s_%d.png" \ # % (self.data_set, self.attacker, self.recommender, self.target_id) # plt.savefig(fig_path) data_path = "./results/performance/figs/%s/Tsne_%s_%s_%d_profile" \ % (self.data_set, self.attacker, self.recommender, self.target_id) np.save(data_path, data_2d) pass
class Recommender(object): def __init__(self): self.args = self.parse_args() # 路径 self.train_path = self.args.train_path self.test_path = self.args.test_path self.model_path = self.args.model_path self.target_prediction_path_prefix = self.args.target_prediction_path_prefix # 攻击 self.target_id_list = list(map(int, self.args.target_ids.split(','))) self.topk_list = list(map(int, self.args.topk.split(','))) # # os.environ["CUDA_VISIBLE_DEVICES"] = str(self.args.cuda_id) pass @staticmethod def parse_args(): parser = argparse.ArgumentParser(description="Run Recommender.") parser.add_argument('--data_set', type=str, default='ml100k') # , required=True) # 路径 parser.add_argument('--train_path', type=str, default='./data/ml100k/ml100k_train.dat') # , required=True) parser.add_argument('--test_path', type=str, default='./data/ml100k/ml100k_test.dat') # , required=True) parser.add_argument('--model_path', type=str, default='./results/model_saved/automotive/automotive_NeuMF_AUSHplus_round_119') # , required=True) parser.add_argument('--target_prediction_path_prefix', type=str, default='./results/performance/mid_results/ml100k_Recommender') # , required=True) # 攻击 parser.add_argument('--target_ids', type=str, default='0') # , required=True) parser.add_argument('--topk', type=str, default='5,10,20,50') # parser.add_argument('--cuda_id', type=int, default=0) return parser def prepare_data(self): self.dataset_class = DataLoader(self.train_path, self.test_path) self.train_data_df, self.test_data_df, self.n_users, self.n_items = self.dataset_class.load_file_as_dataFrame() self.train_matrix, _ = self.dataset_class.dataFrame_to_matrix(self.train_data_df, self.n_users, self.n_items) self.test_matrix, _ = self.dataset_class.dataFrame_to_matrix(self.test_data_df, self.n_users, self.n_items) pass def build_network(self): print('build Recommender model graph.') raise NotImplemented def train(self): print('train.') raise NotImplemented def test(self): print('test.') raise NotImplemented def execute(self): print('generate target item performace on a trained Recommender model.') raise NotImplemented def save(self, path): saver = tf.train.Saver() saver.save(self.sess, path) def restore(self, path): saver = tf.train.Saver() saver.restore(self.sess, path) def predict(self, user_id, item_id): raise NotImplemented def generate_target_result(self): train_data_array = self.train_matrix.toarray() for target_id in self.target_id_list: # mask掉已评分用户以及未评分用户的已评分商品 mask = np.zeros_like(train_data_array) mask[np.where(train_data_array[:, target_id])[0]] = float('inf') # 找到测试数据 test_uids, test_iids = np.where((train_data_array + mask) == 0) # 预测 test_predRatings = self.predict(test_uids, test_iids) # 构建dataframe predResults = pd.DataFrame({'user_id': test_uids, 'item_id': test_iids, 'rating': test_predRatings }) # 为每个未评分计算预测分和HR predResults_target = np.zeros([len(predResults.user_id.unique()), len(self.topk_list) + 2]) for idx, (user_id, pred_result) in enumerate(predResults.groupby('user_id')): pred_value = pred_result[pred_result.item_id == target_id].rating.values[0] sorted_recommend_list = pred_result.sort_values('rating', ascending=False).item_id.values new_line = [user_id, pred_value] + [1 if target_id in sorted_recommend_list[:k] else 0 for k in self.topk_list] predResults_target[idx] = new_line np.save('%s_%d' % (self.target_prediction_path_prefix, target_id), predResults_target)