def YouTubeEvaluationSampler(dataset, max_seq_len, user_feature, seed=100, sort=True): random.seed(seed) def batch(dataset, user_feature=user_feature, max_seq_len=max_seq_len): while True: for user_id in dataset.warm_users(): input_npy = np.zeros(1, dtype=[('seq_item_id', (np.int32, max_seq_len)), ('seq_len', np.int32), ('user_gender', np.int32), ('user_geo', np.int32)]) item_list = dataset.get_positive_items(user_id, sort=sort) if len(item_list) <= 1: continue train_items = item_list[-max_seq_len-1:-1] pad_train_items = np.zeros(max_seq_len, np.int32) pad_train_items[:len(train_items)] = train_items input_npy[0] = (pad_train_items, len(train_items), user_feature[user_id]['user_gender'], user_feature[user_id]['user_geo']) yield [train_items[-1]], input_npy yield [], [] yield None, None s = Sampler(dataset=dataset, generate_batch=batch, num_process=1) return s
def TemporalSampler(dataset, batch_size, max_seq_len, num_process=5, seed=100): random.seed(seed) def batch(dataset, max_seq_len=max_seq_len, batch_size=batch_size): while True: input_npy = np.zeros(batch_size, dtype=[('seq_item_id', (np.int32, max_seq_len)), ('seq_len', np.int32), ('label', np.int32)]) for ind in range(batch_size): user_id = random.randint(0, dataset.total_users()-1) item_list = dataset.get_positive_items(user_id, sort=True) while len(item_list) <= 1: user_id = random.randint(0, dataset.total_users()-1) item_list = dataset.get_positive_items(user_id, sort=True) predict_pos = random.randint(1, len(item_list) - 1) train_items = item_list[max(0, predict_pos-max_seq_len):predict_pos] pad_train_items = np.zeros(max_seq_len, np.int32) pad_train_items[:len(train_items)] = train_items input_npy[ind] = (pad_train_items, len(train_items), item_list[predict_pos]) yield input_npy s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process) return s
def VBPREvaluationSampler(batch_size, dataset, item_vfeature, seed=100): random.seed(seed) def batch(dataset, batch_size=batch_size, item_vfeature=item_vfeature): _, dim_v = item_vfeature.shape while True: for user_id in dataset.warm_users(): positive_items = dataset.get_positive_items(user_id) negative_items = dataset.get_negative_items(user_id) all_items = positive_items + negative_items for batch_ind in range(int(math.ceil(float(len(all_items)) / batch_size))): current_batch_size = min(len(all_items)-batch_ind*batch_size, batch_size) input_npy = np.zeros(current_batch_size, dtype=[('user_id', np.int32), ('item_id', np.int32), ('item_vfeature', np.float32, (dim_v))]) for inst_ind in range(current_batch_size): item_id = all_items[batch_ind*batch_size+inst_ind] input_npy[inst_ind] = (user_id, item_id, item_vfeature[item_id]) num_positives = len(positive_items) - batch_ind*batch_size if num_positives > 0: yield range(num_positives), input_npy else: yield [], input_npy yield [], [] yield None, None s = Sampler(dataset=dataset, generate_batch=batch, num_process=1) return s
def StratifiedPointwiseSampler(dataset, batch_size, pos_ratio=0.5, num_process=5, seed=100): random.seed(seed) def batch(dataset, batch_size=batch_size, pos_ratio=pos_ratio, seed=seed): num_pos = int(batch_size * pos_ratio) while True: input_npy = np.zeros(batch_size, dtype=[('user_id', np.int32), ('item_id', np.int32), ('label', np.float32)]) for ind in range(num_pos): entry = dataset.next_random_record() input_npy[ind] = (entry['user_id'], entry['item_id'], 1.0) for ind in range(batch_size - num_pos): user_id = random.randint(0, dataset.total_users() - 1) item_id = random.randint(0, dataset.total_items() - 1) while dataset.is_positive(user_id, item_id): user_id = random.randint(0, dataset.total_users() - 1) item_id = random.randint(0, dataset.total_items() - 1) input_npy[ind + num_pos] = (user_id, item_id, 0.0) yield input_npy s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process) return s
def RandomPointwiseSampler(dataset, batch_size, num_process=5, seed=100): random.seed(seed) def batch(dataset, batch_size=batch_size): while True: input_npy = np.zeros(batch_size, dtype=[('user_id', np.int32), ('item_id', np.int32), ('label', np.float32)]) for ind in range(batch_size): user_id = random.randint(0, dataset.total_users()-1) item_id = random.randint(0, dataset.total_items()-1) label = 1.0 if dataset.is_positive(user_id, item_id) else 0.0 input_npy[ind] = (user_id, item_id, label) yield input_npy s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process) return s
def VBPRPairwiseSampler(dataset, batch_size, item_vfeature, num_process=5, seed=100): random.seed(seed) def batch(dataset, batch_size=batch_size, item_vfeature=item_vfeature, seed=seed): _, dim_v = item_vfeature.shape while True: input_npy = np.zeros(batch_size, dtype=[ ('user_id', np.int32), ('p_item_id', np.int32), ('n_item_id', np.int32), ('p_item_vfeature', np.float32, (dim_v)), ('n_item_vfeature', np.float32, (dim_v)) ]) for ind in range(batch_size): entry = dataset.next_random_record() user_id = entry['user_id'] p_item_id = entry['item_id'] n_item_id = dataset.sample_negative_items(user_id)[0] input_npy[ind] = (user_id, p_item_id, n_item_id, item_vfeature[p_item_id], item_vfeature[n_item_id]) yield input_npy s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process) return s
def RandomPairwiseSampler(dataset, batch_size, num_process=5, seed=100): random.seed(seed) def batch(dataset, batch_size=batch_size, seed=seed): while True: input_npy = np.zeros(batch_size, dtype=[('user_id', np.int32), ('p_item_id', np.int32), ('n_item_id', np.int32)]) for ind in range(batch_size): entry = dataset.next_random_record() user_id = entry['user_id'] p_item_id = entry['item_id'] n_item_id = dataset.sample_negative_items(user_id)[0] input_npy[ind] = (user_id, p_item_id, n_item_id) yield input_npy s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process) return s