Exemple #1
0
    def execute(self):
        # self.data_path_clean = './data/ml100k/ml100k_train.dat'
        # self.data_path_attacked = './results/data_attacked/ml100k/ml100k_AUSH_0.data'
        #
        path_test = self.data_path_clean.replace('train', 'test')
        # load real profile matrix
        dataset_class_real = DataLoader(self.data_path_clean, path_test)
        train_data_df_real, _, n_users_real, n_items_real = dataset_class_real.load_file_as_dataFrame()
        train_matrix_real, _ = dataset_class_real.dataFrame_to_matrix(train_data_df_real, n_users_real, n_items_real)
        train_matrix_real = train_matrix_real.toarray()

        # load fake profile matrix
        dataset_class_attacked = DataLoader(self.data_path_attacked, path_test)
        train_data_df_attacked, _, n_users_attacked, n_items_attacked = dataset_class_attacked.load_file_as_dataFrame()
        train_matrix_attacked, _ = dataset_class_attacked.dataFrame_to_matrix(train_data_df_attacked, n_users_attacked,
                                                                              n_items_attacked)
        train_matrix_fake = train_matrix_attacked.toarray()[n_users_real:, :]

        # cacu item distribution
        real_item_distribution = self.get_item_distribution(train_matrix_real)
        fake_item_distribution = self.get_item_distribution(train_matrix_fake)
        #
        TVD_distance = self.get_TVD_distance(real_item_distribution, fake_item_distribution)
        JS_distance = self.get_JS_distance(real_item_distribution, fake_item_distribution)
        #
        res_str = 'TVD:%.4f\tJS:%.4f' % (TVD_distance, JS_distance)
        print('result begin', res_str, 'result end')
        return TVD_distance, JS_distance
Exemple #2
0
    def prepare_data(self):

        self.path_train = './data/%s/%s_train.dat' % (self.data_set, self.data_set)
        path_test = './data/%s/%s_test.dat' % (self.data_set, self.data_set)

        dataset_class = DataLoader(self.path_train, path_test)
        self.train_data_df, self.test_data_df, self.n_users, self.n_items = dataset_class.load_file_as_dataFrame()
        train_matrix, _ = dataset_class.dataFrame_to_matrix(self.train_data_df, self.n_users, self.n_items)
        test_matrix, _ = dataset_class.dataFrame_to_matrix(self.test_data_df, self.n_users, self.n_items)
        self.train_array, self.test_array = train_matrix.toarray(), test_matrix.toarray()
 
        self.data_loader = torch.utils.data.DataLoader(dataset=torch.from_numpy(self.train_array).type(torch.float32),
                                                       batch_size=self.batch_size_D, shuffle=True, drop_last=True)
 
        self.target_users = np.where(self.train_array[:, self.target_id] == 0)[0]

        attack_target = np.zeros((len(self.target_users), self.n_items))
        attack_target[:, self.target_id] = 1.0
        self.attack_target = torch.from_numpy(attack_target).type(torch.float32).to(self.device)
        pass
Exemple #3
0
    def execute(self):

        import numpy as np
        import matplotlib.pyplot as plt
        import time
        # load data
        # =================================
        # path_dir = './results/performance/mid_results/%s' % (self.data_set)
        # user_embed_path = '%s/%s_NeuMF_%s_%d_user_embed.npy' % (
        #     path_dir, self.data_set, self.attacker, self.target_id)
        #
        # self.x = np.load(user_embed_path)
        # #

        # =================================
        train_path = './results/data_attacked/%s/%s_%s_%d.data' % (
            self.data_set, self.data_set, self.attacker, self.target_id)
        test_path = './data/%s/%s_test.dat' % (self.data_set, self.data_set)
        dataset_class_attacked = DataLoader(train_path, test_path)
        train_data_df_attacked, _, n_users_attacked, n_items_attacked = dataset_class_attacked.load_file_as_dataFrame()
        train_matrix_attacked, _ = dataset_class_attacked.dataFrame_to_matrix(train_data_df_attacked, n_users_attacked,
                                                                              n_items_attacked)
        self.x = train_matrix_attacked.toarray()
        # =================================
        Y = np.ones(self.x.shape[0])
        Y[-50:] = 0
        from sklearn.decomposition import PCA
        pca = PCA(n_components=2)
        pca.fit(self.x)
        data_2d = pca.transform(self.x)
        # plt.scatter(data_2d[:, 0], data_2d[:, 1], c=Y)
        # # plt.show()
        # # exit()
        # fig_path = "./results/performance/figs/%s/Tsne_%s_%s_%d_profile_pca.png" \
        #            % (self.data_set, self.attacker, self.recommender, self.target_id)
        # plt.savefig(fig_path)
        data_path = "./results/performance/figs/%s/Tsne_%s_%s_%d_profile_pca" \
                    % (self.data_set, self.attacker, self.recommender, self.target_id)
        np.save(data_path, data_2d)
        exit()
        # ==================================
        # ==================================
        Y = np.ones(self.x.shape[0])
        Y[-50:] = 0
        #
        (n, d) = self.x.shape
        # 随机初始化Y
        y = np.random.randn(n, self.no_dims)
        # dy梯度
        dy = np.zeros((n, self.no_dims))
        # iy是什么
        iy = np.zeros((n, self.no_dims))

        gains = np.ones((n, self.no_dims))
        # 对称化
        P = self.seach_prob()
        P = P + np.transpose(P)
        P = P / np.sum(P)  # pij
        # early exaggeration
        # pi\j
        print("T-SNE DURING:%s" % time.clock())
        P = P * 4
        P = np.maximum(P, 1e-12)
        # Run iterations
        for iter in range(self.max_iter):
            # Compute pairwise affinities
            sum_y = np.sum(np.square(y), 1)
            num = 1 / (1 + np.add(np.add(-2 * np.dot(y, y.T), sum_y).T, sum_y))
            num[range(n), range(n)] = 0
            Q = num / np.sum(num)  # qij
            Q = np.maximum(Q, 1e-12)

            # Compute gradient
            # np.tile(A,N)  [1],5 [1,1,1,1,1]
            # pij-qij
            PQ = P - Q

            for i in range(n):
                dy[i, :] = np.sum(np.tile(PQ[:, i] * num[:, i], (self.no_dims, 1)).T * (y[i, :] - y), 0)

            # Perform the update
            if iter < 20:
                momentum = self.initial_momentum
            else:
                momentum = self.final_momentum

            gains = (gains + 0.2) * ((dy > 0) != (iy > 0)) + (gains * 0.8) * ((dy > 0) == (iy > 0))
            gains[gains < self.min_gain] = self.min_gain

            iy = momentum * iy - self.eta * (gains * dy)
            y = y + iy
            y = y - np.tile(np.mean(y, 0), (n, 1))
            # Compute current value of cost function\
            if (iter + 1) % 100 == 0:
                C = np.sum(P * np.log(P / Q))
                print("Iteration ", (iter + 1), ": error is ", C)
                if (iter + 1) != 100:
                    ratio = C / oldC
                    print("ratio ", ratio)
                    if ratio >= 0.95:
                        break
                oldC = C
            # Stop lying about P-values
            if iter == 100:
                P = P / 4
        print("finished training!")
        #
        data_2d = y
        # plt.scatter(data_2d[:, 0], data_2d[:, 1], c=Y)
        # plt.show()
        # fig_path = "./results/performance/figs/%s/Tsne_%s_%s_%d.png" \
        #            % (self.data_set, self.attacker, self.recommender, self.target_id)
        # plt.savefig(fig_path)
        data_path = "./results/performance/figs/%s/Tsne_%s_%s_%d_profile" \
                    % (self.data_set, self.attacker, self.recommender, self.target_id)
        np.save(data_path, data_2d)
        pass
Exemple #4
0
class Recommender(object):

    def __init__(self):
        self.args = self.parse_args()
        # 路径
        self.train_path = self.args.train_path
        self.test_path = self.args.test_path
        self.model_path = self.args.model_path
        self.target_prediction_path_prefix = self.args.target_prediction_path_prefix
        # 攻击
        self.target_id_list = list(map(int, self.args.target_ids.split(',')))
        self.topk_list = list(map(int, self.args.topk.split(',')))
        #
        # os.environ["CUDA_VISIBLE_DEVICES"] = str(self.args.cuda_id)
        pass

    @staticmethod
    def parse_args():

        parser = argparse.ArgumentParser(description="Run Recommender.")
        parser.add_argument('--data_set', type=str, default='ml100k')  # , required=True)
        # 路径
        parser.add_argument('--train_path', type=str,
                            default='./data/ml100k/ml100k_train.dat')  # , required=True)
        parser.add_argument('--test_path', type=str,
                            default='./data/ml100k/ml100k_test.dat')  # , required=True)
        parser.add_argument('--model_path', type=str,
                            default='./results/model_saved/automotive/automotive_NeuMF_AUSHplus_round_119')  # , required=True)
        parser.add_argument('--target_prediction_path_prefix', type=str,
                            default='./results/performance/mid_results/ml100k_Recommender')  # , required=True)

        # 攻击
        parser.add_argument('--target_ids', type=str, default='0')  # , required=True)
        parser.add_argument('--topk', type=str, default='5,10,20,50')
        #
        parser.add_argument('--cuda_id', type=int, default=0)
        return parser

    def prepare_data(self):
        self.dataset_class = DataLoader(self.train_path, self.test_path)

        self.train_data_df, self.test_data_df, self.n_users, self.n_items = self.dataset_class.load_file_as_dataFrame()
        self.train_matrix, _ = self.dataset_class.dataFrame_to_matrix(self.train_data_df, self.n_users, self.n_items)
        self.test_matrix, _ = self.dataset_class.dataFrame_to_matrix(self.test_data_df, self.n_users, self.n_items)
        pass

    def build_network(self):
        print('build Recommender model graph.')
        raise NotImplemented

    def train(self):
        print('train.')
        raise NotImplemented

    def test(self):
        print('test.')
        raise NotImplemented

    def execute(self):
        print('generate target item performace on a trained Recommender model.')
        raise NotImplemented

    def save(self, path):
        saver = tf.train.Saver()
        saver.save(self.sess, path)

    def restore(self, path):
        saver = tf.train.Saver()
        saver.restore(self.sess, path)

    def predict(self, user_id, item_id):
        raise NotImplemented

    def generate_target_result(self):
        train_data_array = self.train_matrix.toarray()
        for target_id in self.target_id_list:
            # mask掉已评分用户以及未评分用户的已评分商品
            mask = np.zeros_like(train_data_array)
            mask[np.where(train_data_array[:, target_id])[0]] = float('inf')
            # 找到测试数据
            test_uids, test_iids = np.where((train_data_array + mask) == 0)
            # 预测
            test_predRatings = self.predict(test_uids, test_iids)
            # 构建dataframe
            predResults = pd.DataFrame({'user_id': test_uids,
                                        'item_id': test_iids,
                                        'rating': test_predRatings
                                        })
            # 为每个未评分计算预测分和HR
            predResults_target = np.zeros([len(predResults.user_id.unique()), len(self.topk_list) + 2])
            for idx, (user_id, pred_result) in enumerate(predResults.groupby('user_id')):
                pred_value = pred_result[pred_result.item_id == target_id].rating.values[0]
                sorted_recommend_list = pred_result.sort_values('rating', ascending=False).item_id.values
                new_line = [user_id, pred_value] + [1 if target_id in sorted_recommend_list[:k] else 0 for k in
                                                    self.topk_list]
                predResults_target[idx] = new_line
            np.save('%s_%d' % (self.target_prediction_path_prefix, target_id), predResults_target)