Exemple #1
0
    def __init__(self, select_anchor, train_anchor, random_hash, jaccard_hash,
                 kmeans_hash, hash_u_num, *args, **kwargs):
        HistoryDP.__init__(self, *args, **kwargs)

        self.anchor_users = None
        self.select_anchor = select_anchor
        self.train_anchor = train_anchor
        self.random_hash = random_hash
        self.hash_u_num = hash_u_num
        self.jaccard_hash = jaccard_hash
        self.kmeans_hash = kmeans_hash
        assert self.jaccard_hash + self.kmeans_hash < 2
        if self.random_hash == 1:
            self.anchor_users = {}
            for uid in range(self.data_loader.user_num):
                self.anchor_users[uid] = np.random.randint(self.hash_u_num)
        if self.select_anchor == 1:
            tmp_anchor_users = self.select_anchor_users(
                data_loader=self.data_loader, anchor_num=self.hash_u_num)
            if self.anchor_users is not None:
                for k in tmp_anchor_users:
                    self.anchor_users[k] = tmp_anchor_users[k]
            else:
                self.anchor_users = tmp_anchor_users
        if self.jaccard_hash == 1:
            self.anchor_users = self.jaccard_hash_users(
                data_loader=self.data_loader, anchor_num=self.hash_u_num)
        if self.kmeans_hash == 1:
            self.anchor_users = self.kmeans_hash_users(
                data_loader=self.data_loader, anchor_num=self.hash_u_num)
Exemple #2
0
 def parse_dp_args(parser):
     """
     数据处理生成batch的命令行参数
     :param parser:
     :return:
     """
     parser.add_argument('--select_anchor',
                         type=int,
                         default=1,
                         help='Whether select train anchor users.')
     parser.add_argument('--train_anchor',
                         type=int,
                         default=0,
                         help='Whether hash train anchor users.')
     parser.add_argument('--random_hash',
                         type=int,
                         default=0,
                         help='random hash all users.')
     parser.add_argument(
         '--jaccard_hash',
         type=int,
         default=0,
         help='select anchors and use jaccard similarity to match clusters.'
     )
     parser.add_argument(
         '--kmeans_hash',
         type=int,
         default=0,
         help='use kmeans to form clusters and use cluster id as bucket id.'
     )
     return HistoryDP.parse_dp_args(parser)
Exemple #3
0
 def parse_dp_args(parser):
     """
     数据处理生成batch的命令行参数
     :param parser:
     :return:
     """
     parser.add_argument(
         '--shuffle_his',
         type=int,
         default=0,
         help='whether shuffle the his-list of each sent during training.')
     return HistoryDP.parse_dp_args(parser)
Exemple #4
0
 def parse_dp_args(parser):
     """
     数据处理生成batch的命令行参数
     :param parser:
     :return:
     
     Command-line parameters to generate batches in data processing
     :param parser:
     :return:
     """
     parser.add_argument(
         '--shuffle_his',
         type=int,
         default=0,
         help='whether shuffle the his-list of each sent during training.')
     return HistoryDP.parse_dp_args(parser)
Exemple #5
0
    def get_feed_dict(self, *args, **kwargs):
        feed_dict = HistoryDP.get_feed_dict(self, *args, **kwargs)

        total_batch_size = feed_dict[TOTAL_BATCH_SIZE]
        # anchor_users
        if self.anchor_users is not None:
            uids = list(feed_dict[UID].cpu().numpy())
            anchor_uids = [
                self.anchor_users[i] if i in self.anchor_users else -1
                for i in uids
            ]
            feed_dict[K_ANCHOR_USER] = utils.numpy_to_torch(np.array(
                anchor_uids, dtype=np.int64),
                                                            gpu=False)
            # print(self.anchor_users)
            # print(feed_dict[K_ANCHOR_USER])
            # assert 1 == 2
        else:
            feed_dict[K_ANCHOR_USER] = utils.numpy_to_torch(
                -np.ones(total_batch_size, dtype=np.int64), gpu=False)
        return feed_dict
Exemple #6
0
 def __init__(self, shuffle_his, *args, **kwargs):
     self.shuffle_his = shuffle_his
     HistoryDP.__init__(self, *args, **kwargs)
     assert self.sparse_his == 0