def __init__(self, select_anchor, train_anchor, random_hash, jaccard_hash, kmeans_hash, hash_u_num, *args, **kwargs): HistoryDP.__init__(self, *args, **kwargs) self.anchor_users = None self.select_anchor = select_anchor self.train_anchor = train_anchor self.random_hash = random_hash self.hash_u_num = hash_u_num self.jaccard_hash = jaccard_hash self.kmeans_hash = kmeans_hash assert self.jaccard_hash + self.kmeans_hash < 2 if self.random_hash == 1: self.anchor_users = {} for uid in range(self.data_loader.user_num): self.anchor_users[uid] = np.random.randint(self.hash_u_num) if self.select_anchor == 1: tmp_anchor_users = self.select_anchor_users( data_loader=self.data_loader, anchor_num=self.hash_u_num) if self.anchor_users is not None: for k in tmp_anchor_users: self.anchor_users[k] = tmp_anchor_users[k] else: self.anchor_users = tmp_anchor_users if self.jaccard_hash == 1: self.anchor_users = self.jaccard_hash_users( data_loader=self.data_loader, anchor_num=self.hash_u_num) if self.kmeans_hash == 1: self.anchor_users = self.kmeans_hash_users( data_loader=self.data_loader, anchor_num=self.hash_u_num)
def parse_dp_args(parser): """ 数据处理生成batch的命令行参数 :param parser: :return: """ parser.add_argument('--select_anchor', type=int, default=1, help='Whether select train anchor users.') parser.add_argument('--train_anchor', type=int, default=0, help='Whether hash train anchor users.') parser.add_argument('--random_hash', type=int, default=0, help='random hash all users.') parser.add_argument( '--jaccard_hash', type=int, default=0, help='select anchors and use jaccard similarity to match clusters.' ) parser.add_argument( '--kmeans_hash', type=int, default=0, help='use kmeans to form clusters and use cluster id as bucket id.' ) return HistoryDP.parse_dp_args(parser)
def parse_dp_args(parser): """ 数据处理生成batch的命令行参数 :param parser: :return: """ parser.add_argument( '--shuffle_his', type=int, default=0, help='whether shuffle the his-list of each sent during training.') return HistoryDP.parse_dp_args(parser)
def parse_dp_args(parser): """ 数据处理生成batch的命令行参数 :param parser: :return: Command-line parameters to generate batches in data processing :param parser: :return: """ parser.add_argument( '--shuffle_his', type=int, default=0, help='whether shuffle the his-list of each sent during training.') return HistoryDP.parse_dp_args(parser)
def get_feed_dict(self, *args, **kwargs): feed_dict = HistoryDP.get_feed_dict(self, *args, **kwargs) total_batch_size = feed_dict[TOTAL_BATCH_SIZE] # anchor_users if self.anchor_users is not None: uids = list(feed_dict[UID].cpu().numpy()) anchor_uids = [ self.anchor_users[i] if i in self.anchor_users else -1 for i in uids ] feed_dict[K_ANCHOR_USER] = utils.numpy_to_torch(np.array( anchor_uids, dtype=np.int64), gpu=False) # print(self.anchor_users) # print(feed_dict[K_ANCHOR_USER]) # assert 1 == 2 else: feed_dict[K_ANCHOR_USER] = utils.numpy_to_torch( -np.ones(total_batch_size, dtype=np.int64), gpu=False) return feed_dict
def __init__(self, shuffle_his, *args, **kwargs): self.shuffle_his = shuffle_his HistoryDP.__init__(self, *args, **kwargs) assert self.sparse_his == 0