def build_dev_iterator(): # Iterator for inference during training if config.bert: dev_context_buckets = get_buckets_bert( os.path.join(config.bert_dir, config.dev_bert_emb_context)) dev_ques_buckets = get_buckets_bert( os.path.join(config.bert_dir, config.dev_bert_emb_ques)) return DataIterator(dev_buckets, config.batch_size, config.para_limit, config.ques_limit, config.char_limit, False, config.sent_limit, bert=True, bert_buckets=list( zip(dev_context_buckets, dev_ques_buckets))) else: return DataIterator(dev_buckets, config.batch_size, config.para_limit, config.ques_limit, config.char_limit, False, config.sent_limit, bert=False)
def train_model(self): self.logger.info(self.evaluator.metrics_info()) g_iter = DataIterator(np.arange(self.num_users), batch_size=self.batchSize_G, shuffle=True, drop_last=False) d_iter = DataIterator(np.arange(self.num_users), batch_size=self.batchSize_D, shuffle=True, drop_last=False) total_epochs = self.epochs total_epochs = int(total_epochs / self.step_G) for epoch in range(total_epochs): train_matrix, zr_matrix, pm_matrix = self.get_train_data() # training discriminator for d_epoch in range(self.step_D): for idx in d_iter: train_data = train_matrix[idx].toarray() train_mask = pm_matrix[idx].toarray() feed = {self.real_data: train_data, self.mask: train_mask, self.condition: train_data} self.sess.run(self.trainer_d, feed_dict=feed) # training generator for g_epoch in range(self.step_G): for idx in g_iter: train_data = train_matrix[idx].toarray() train_z_mask = zr_matrix[idx].toarray() train_p_mask = pm_matrix[idx].toarray() feed = {self.real_data: train_data, self.condition: train_data, self.mask: train_p_mask, self.g_zr_dims: train_z_mask} self.sess.run(self.trainer_g, feed_dict=feed) if epoch % self.verbose == 0: self.logger.info("epoch %d:\t%s" % (epoch, self.evaluate()))
def build_train_iterator(): if config.bert: train_context_buckets = get_buckets_bert( os.path.join(config.bert_dir, config.train_bert_emb_context)) train_ques_buckets = get_buckets_bert( os.path.join(config.bert_dir, config.train_bert_emb_ques)) return DataIterator(train_buckets, config.batch_size, config.para_limit, config.ques_limit, config.char_limit, True, config.sent_limit, bert=True, bert_buckets=list( zip(train_context_buckets, train_ques_buckets)), new_spans=True) else: return DataIterator(train_buckets, config.batch_size, config.para_limit, config.ques_limit, config.char_limit, True, config.sent_limit, bert=False, new_spans=True)
def __init__(self, dataset, neg_num=1, batch_size=1, shuffle=False, drop_last=False): """Initializes a new `PairwiseSampler` instance. Args: dataset (data.Dataset): An instance of `Dataset`. neg_num (int): How many negative items for each positive item. Defaults to `1`. batch_size (int): How many samples per batch to load. Defaults to `1`. shuffle (bool): Whether reshuffling the samples at every epoch. Defaults to `False`. drop_last (bool): Whether dropping the last incomplete batch. Defaults to `False`. """ super(PairwiseSampler, self).__init__(batch_size=batch_size, drop_last=drop_last) self.shuffle = shuffle self.neg_num = neg_num self.item_unm = dataset.num_items user_pos_dict = dataset.get_user_train_dict() self.user_pos_dict = { user: set(items) for user, items in user_pos_dict.items() } user_list, item_list = dataset.get_train_interactions() self.ui_interactions = DataIterator(user_list, item_list, batch_size=1, shuffle=self.shuffle, drop_last=False)
def train_model(self): self.logger.info(self.evaluator.metrics_info()) for epoch in range(1, self.num_epochs + 1): user_input, num_idx, item_input, labels = \ data_generator._get_pointwise_all_likefism_data(self.dataset, self.num_negatives, self.train_dict) data_iter = DataIterator(user_input, num_idx, item_input, labels, batch_size=self.batch_size, shuffle=True) num_training_instances = len(user_input) total_loss = 0.0 training_start_time = time() for bat_users, bat_idx, bat_items, bat_labels in data_iter: bat_users = pad_sequences(bat_users, value=self.num_items) feed_dict = { self.user_input: bat_users, self.num_idx: bat_idx, self.item_input: bat_items, self.labels: bat_labels, self.is_train_phase: True } loss, _ = self.sess.run((self.loss, self.optimizer), feed_dict=feed_dict) total_loss += loss self.logger.info("[iter %d : loss : %f, time: %f]" % (epoch, total_loss / num_training_instances, time() - training_start_time)) if epoch % self.verbose == 0: self.logger.info("epoch %d:\t%s" % (epoch, self.evaluate()))
def _sampling_negative_items(user_pos_len, neg_num, item_num, user_pos_dict): if neg_num <= 0: raise ValueError("'neg_num' must be a positive integer.") users, n_pos = list(zip(*user_pos_len)) users_n_pos = DataIterator(users, n_pos, batch_size=1024, shuffle=False, drop_last=False) neg_items_list = [] for bat_user, batch_num in users_n_pos: batch_num = [num * neg_num for num in batch_num] exclusion = [user_pos_dict[u] for u in bat_user] bat_neg_items = batch_randint_choice(item_num, batch_num, replace=True, exclusion=exclusion) for user, neg_items, n_item in zip(bat_user, bat_neg_items, batch_num): if isinstance(neg_items, Iterable): if neg_num > 1: neg_items = np.reshape(neg_items, newshape=[-1, neg_num]) neg_items_list.extend(neg_items) else: neg_items_list.append(neg_items) return neg_items_list
def train_model(self): self.logger.info(self.evaluator.metrics_info()) self.user_pos_train = csr_to_user_dict_bytime( self.dataset.time_matrix, self.dataset.train_matrix) users_list, item_seq_list, item_pos_list = self._generate_sequences() for epoch in range(self.epochs): item_neg_list = self._sample_negative(users_list) data = DataIterator(users_list, item_seq_list, item_pos_list, item_neg_list, batch_size=self.batch_size, shuffle=True) for bat_user, bat_item_seq, bat_item_pos, bat_item_neg in data: feed = { self.user_ph: bat_user, self.item_seq_ph: bat_item_seq, self.item_pos_ph: bat_item_pos, self.item_neg_ph: bat_item_neg, self.is_training: True } self.sess.run(self.train_opt, feed_dict=feed) result = self.evaluate_model() self.logger.info("epoch %d:\t%s" % (epoch, result))
def train_model(self): self.logger.info(self.evaluator.metrics_info()) for epoch in range(self.epochs): item_seq_list, item_pos_list, item_neg_list = self.get_train_data( ) # , user_list data = DataIterator( item_seq_list, item_pos_list, item_neg_list, # user_list, batch_size=self.batch_size, shuffle=True) for bat_item_seq, bat_item_pos, bat_item_neg in data: # bat_user, feed = { # self.user_ph: bat_user, self.item_seq_ph: bat_item_seq, self.item_pos_ph: bat_item_pos, self.item_neg_ph: bat_item_neg, self.is_training: True } self.sess.run(self.train_opt, feed_dict=feed) result = self.evaluate_model() self.logger.info("epoch %d:\t%s" % (epoch, result))
def train_model(self): self.logger.info(self.evaluator.metrics_info()) for epoch in range(self.num_epochs): user_input, item_input_pos, item_input_social, item_input_neg, suk_input = self._get_pairwise_all_data( ) data_iter = DataIterator(user_input, item_input_pos, item_input_social, item_input_neg, suk_input, batch_size=self.batch_size, shuffle=True) total_loss = 0.0 training_start_time = time() num_training_instances = len(user_input) for bat_users, bat_items_pos, bat_items_social, bat_items_neg, bat_suk_input in data_iter: feed_dict = { self.user_input: bat_users, self.item_input_pos: bat_items_pos, self.item_input_social: bat_items_social, self.item_input_neg: bat_items_neg, self.suk: bat_suk_input } loss, _ = self.sess.run((self.loss, self.optimizer), feed_dict=feed_dict) total_loss += loss self.logger.info("[iter %d : loss : %f, time: %f]" % (epoch, total_loss / num_training_instances, time() - training_start_time)) if epoch % self.verbose == 0: self.logger.info("epoch %d:\t%s" % (epoch, self.evaluate()))
def train_model(self): self.logger.info(self.evaluator.metrics_info()) for epoch in range(self.num_epochs): # Generate training instances mask_corruption_np = np.random.binomial( 1, 1 - self.corruption_level, (self.num_users, self.num_items)) total_loss = 0.0 all_users = np.arange(self.num_users) users_iter = DataIterator(all_users, batch_size=self.batch_size, shuffle=True, drop_last=False) training_start_time = time() for batch_set_idx in users_iter: batch_matrix = np.zeros((len(batch_set_idx), self.num_items)) for idx, user_id in enumerate(batch_set_idx): items_by_user_id = self.train_dict[user_id] batch_matrix[idx, items_by_user_id] = 1 feed_dict = { self.mask_corruption: mask_corruption_np[batch_set_idx, :], self.input_R: batch_matrix } _, loss = self.sess.run([self.optimizer, self.loss], feed_dict=feed_dict) total_loss += loss self.logger.info("[iter %d : loss : %f, time: %f]" % (epoch, total_loss / self.num_users, time() - training_start_time)) if epoch % self.verbose == 0: self.logger.info("epoch %d:\t%s" % (epoch, self.evaluate()))
def _shuffle_index(self, seq_index): index_chunks = DataIterator(seq_index, batch_size=self.batch_size * 32, shuffle=False, drop_last=False) # chunking index_chunks = list(index_chunks) index_chunks_iter = DataIterator( index_chunks, batch_size=1, shuffle=True, drop_last=False) # shuffle index chunk for indexes in index_chunks_iter: indexes = indexes[0] indexes_iter = DataIterator(indexes, batch_size=self.batch_size, shuffle=True, drop_last=True) # shuffle batch index for bat_index in indexes_iter: yield bat_index
def __iter__(self): data_iter = DataIterator(self.users_list, self.pos_items_list, batch_size=self.batch_size, shuffle=self.shuffle, drop_last=self.drop_last) for bat_users, bat_items in data_iter: yield bat_users, bat_items
def build_dev_iterator(): dev_dataset = HotpotDataset(dev_buckets) return DataIterator(dev_dataset, config.para_limit, config.ques_limit, config.char_limit, config.sent_limit, batch_size=config.batch_size, num_workers=2)
def __iter__(self): neg_items_list = _sampling_negative_items(self.user_pos_len, self.neg_num, self.item_num, self.user_pos_dict) data_iter = DataIterator(self.users_list, self.recent_items_list, self.pos_items_list, neg_items_list, batch_size=self.batch_size, shuffle=self.shuffle, drop_last=self.drop_last) for bat_users, bat_recent_items, bat_pos_items, bat_neg_items in data_iter: yield bat_users, bat_recent_items, bat_pos_items, bat_neg_items
def evaluate(self, model, test_users=None): """Evaluate `model`. Args: model: The model need to be evaluated. This model must have a method `predict_for_eval(self, users)`, where the argument `users` is a list of users and the return is a 2-D array that contains `users` rating/ranking scores on all items. Returns: str: A single-line string consist of all results, such as `"0.18663847 0.11239596 0.35824192 0.21479650"`. """ # B: batch size # N: the number of items test_users = test_users if test_users is not None else list(self.user_pos_test.keys()) if not isinstance(test_users, (list, tuple, set, np.ndarray)): raise TypeError("'test_user' must be a list, tuple, set or numpy array!") test_users = DataIterator(test_users, batch_size=self.batch_size, shuffle=False, drop_last=False) batch_result = [] for batch_users in test_users: if self.user_neg_test is not None: candidate_items = [list(self.user_pos_test[u]) + self.user_neg_test[u] for u in batch_users] test_items = [set(range(len(self.user_pos_test[u]))) for u in batch_users] ranking_score = model.predict(batch_users, candidate_items) # (B,N) ranking_score = pad_sequences(ranking_score, value=-np.inf, dtype=np.float32) ranking_score = np.array(ranking_score) else: test_items = [self.user_pos_test[u] for u in batch_users] ranking_score = model.predict(batch_users, None) # (B,N) ranking_score = np.array(ranking_score) # set the ranking scores of training items to -inf, # then the training items will be sorted at the end of the ranking list. for idx, user in enumerate(batch_users): if user in self.user_pos_train and len(self.user_pos_train[user]) > 0: train_items = self.user_pos_train[user] ranking_score[idx][train_items] = -np.inf result = self.eval_score_matrix(ranking_score, test_items, self.metrics, top_k=self.max_top, thread_num=self.num_thread) # (B,k*metric_num) batch_result.append(result) # concatenate the batch results to a matrix all_user_result = np.concatenate(batch_result, axis=0) # (num_users, metrics_num*max_top) final_result = np.mean(all_user_result, axis=0) # (1, metrics_num*max_top) final_result = np.reshape(final_result, newshape=[self.metrics_num, self.max_top]) # (metrics_num, max_top) final_result = final_result[:, self.top_show - 1] final_result = np.reshape(final_result, newshape=[-1]) buf = '\t'.join([("%.8f" % x).ljust(12) for x in final_result]) return buf
def build_train_iterator(): train_dataset = HotpotDataset(train_buckets) return DataIterator(train_dataset, config.para_limit, config.ques_limit, config.char_limit, config.sent_limit, batch_size=config.batch_size, sampler=RandomSampler(train_dataset), num_workers=2)
def __init__(self, dataset, high_order=1, neg_num=1, batch_size=1, shuffle=False, drop_last=False): """Initializes a new `TimeOrderPairwiseSampler` instance. Args: dataset (data.Dataset): An instance of `Dataset`. high_order (int): The number of recent items. Defaults to `1`. neg_num (int): How many negative items for each positive item. Defaults to `1`. batch_size (int): How many samples per batch to load. Defaults to `1`. shuffle (bool): Whether reshuffling the samples at every epoch. Defaults to `False`. drop_last (bool): Whether dropping the last incomplete batch. Defaults to `False`. """ super(TimeOrderPairwiseSampler, self).__init__(batch_size=batch_size, drop_last=drop_last) if high_order < 0: raise ValueError("'high_order' can be a negative integer!") self.shuffle = shuffle self.neg_num = neg_num self.item_unm = dataset.num_items user_pos_dict = dataset.get_user_train_dict(by_time=True) self.user_pos_dict = { user: set(items) for user, items in user_pos_dict.items() } user_list, recent_items_list, next_item_list = [], [], [] for user, seq_items in user_pos_dict.items(): num_instance = len(seq_items) - high_order user_list.extend([user] * num_instance) if high_order == 1: r_items = [seq_items[idx] for idx in range(num_instance)] else: r_items = [ seq_items[idx:][:high_order] for idx in range(num_instance) ] recent_items_list.extend(r_items) next_item_list.extend(seq_items[high_order:]) self.ui_interactions = DataIterator(user_list, recent_items_list, next_item_list, batch_size=1, shuffle=self.shuffle, drop_last=False)
def __iter__(self): users_list, pos_items_list, neg_items_list = \ _pairwise_sampling_v2(self.user_pos_dict, self.num_trainings, self.item_num) data_iter = DataIterator(users_list, pos_items_list, neg_items_list, batch_size=self.batch_size, shuffle=self.shuffle, drop_last=self.drop_last) for bat_users, bat_pos_items, bat_neg_items in data_iter: yield bat_users, bat_pos_items, bat_neg_items
def __iter__(self): # uniformly sampling a batch of users, neg_num can larger than 1 users_list, pos_items_list, neg_items_list = \ _pairwise_sampling_v3(self.user_pos_dict, self.num_trainings, self.item_num, self.neg_num) data_iter = DataIterator(users_list, pos_items_list, neg_items_list, batch_size=self.batch_size, shuffle=self.shuffle, drop_last=self.drop_last) for bat_users, bat_pos_items, bat_neg_items in data_iter: yield bat_users, bat_pos_items, bat_neg_items
def __iter__(self): neg_items_list = _sampling_negative_items(self.user_pos_len, self.neg_num, self.item_num, self.user_pos_dict) neg_items = np.array(neg_items_list, dtype=np.int32) neg_items = np.reshape(neg_items.T, [-1]).tolist() all_next_items = self.pos_items_list + neg_items data_iter = DataIterator(self.users_list, self.recent_items_list, all_next_items, self.all_labels, batch_size=self.batch_size, shuffle=self.shuffle, drop_last=self.drop_last) for bat_users, bat_recent_items, bat_next_items, bat_labels in data_iter: yield bat_users, bat_recent_items, bat_next_items, bat_labels
def build_dev_iterator(): if config.bert: return DataIterator(dev_buckets, config.batch_size, para_limit, ques_limit, config.char_limit, False, config.sent_limit, bert=True, bert_buckets=list( zip(dev_context_buckets, dev_ques_buckets))) else: return DataIterator(dev_buckets, config.batch_size, para_limit, ques_limit, config.char_limit, False, config.sent_limit, bert=False)
def train_script(model): bce_criterion = torch.nn.BCEWithLogitsLoss() # torch.nn.BCELoss() adam_optimizer = torch.optim.Adam(model.parameters(), lr=model.lr, betas=(0.9, 0.98)) model = model.to(model.dev) for epoch in range(model.epochs): item_seq_list, item_pos_list, item_neg_list = model.get_train_data() data = DataIterator(item_seq_list, item_pos_list, item_neg_list, batch_size=model.batch_size, shuffle=True) # data = torch.tensor(data).cuda() for seq, pos, neg in data: seq, pos, neg = np.array(seq), np.array(pos), np.array(neg) # seq = torch.LongTensor(seq).cuda() # seq = torch.from_numpy(seq) # pos = torch.from_numpy(pos) # neg = torch.from_numpy(neg) # # seq = Variable(seq).cuda() # pos = Variable(pos).cuda() # neg = Variable(neg).cuda() pos_logits, neg_logits, all = model(seq, pos, neg) pos_labels, neg_labels = torch.ones(pos_logits.shape, device=model.dev), torch.zeros( neg_logits.shape, device=model.dev) adam_optimizer.zero_grad() indices = np.where(pos != 0) loss = bce_criterion(pos_logits[indices], pos_labels[indices]) loss += bce_criterion(neg_logits[indices], neg_labels[indices]) for param in model.item_emb.parameters(): loss += model.l2_emb * torch.norm(param) loss.backward() adam_optimizer.step() if epoch % 100 == 0: result = model.evaluate_model() model.logger.info("epoch %d:\t%s" % (epoch, result))
def evaluate(self, model): # B: batch size # N: the number of items test_users = DataIterator(list(self.user_pos_test.keys()), batch_size=self.batch_size, shuffle=False, drop_last=False) batch_result = [] for batch_users in test_users: if self.user_neg_test is not None: candidate_items = [] for user in batch_users: num_item = len(self.user_pos_test[user]) if num_item != 1: raise ValueError("the number of test item of user %d is %d" % (user, num_item)) candidate_items.append([self.user_pos_test[user][0]] + self.user_neg_test[user]) test_items = [0] * len(batch_users) ranking_score = model.predict(batch_users, candidate_items) # (B,N) ranking_score = np.array(ranking_score) else: test_items = [] for user in batch_users: num_item = len(self.user_pos_test[user]) if num_item != 1: raise ValueError("the number of test item of user %d is %d" % (user, num_item)) test_items.append(self.user_pos_test[user][0]) ranking_score = model.predict(batch_users, None) # (B,N) ranking_score = np.array(ranking_score) # set the ranking scores of training items to -inf, # then the training items will be sorted at the end of the ranking list. for idx, user in enumerate(batch_users): train_items = self.user_pos_train[user] ranking_score[idx][train_items] = -np.inf result = eval_score_matrix_loo(ranking_score, test_items, top_k=self.max_top, thread_num=None) # (B,k*metric_num) batch_result.append(result) # concatenate the batch results to a matrix all_user_result = np.concatenate(batch_result, axis=0) final_result = np.mean(all_user_result, axis=0) # mean final_result = np.reshape(final_result, newshape=[self.metrics_num, self.max_top]) final_result = final_result[:, self.top_show-1] final_result = np.reshape(final_result, newshape=[-1]) buf = '\t'.join([("%.8f" % x).ljust(12) for x in final_result]) return buf
def predict(self, users, items): users = DataIterator(users, batch_size=self.batch_size, shuffle=False, drop_last=False) all_ratings = [] for bat_user in users: cur_batch_size = len(bat_user) bat_items = [ self.user_pos_train[user][-self.max_seq_len:] for user in bat_user ] bat_adj_in, bat_adj_out, bat_alias, bat_items, bat_mask = self._build_session_graph( bat_items) if cur_batch_size < self.batch_size: # padding pad_size = self.batch_size - cur_batch_size bat_adj_in = np.concatenate( [bat_adj_in, [bat_adj_in[-1]] * pad_size], axis=0) bat_adj_out = np.concatenate( [bat_adj_out, [bat_adj_out[-1]] * pad_size], axis=0) bat_alias = np.concatenate( [bat_alias, [bat_alias[-1]] * pad_size], axis=0) bat_items = np.concatenate( [bat_items, [bat_items[-1]] * pad_size], axis=0) bat_mask = np.concatenate( [bat_mask, [bat_mask[-1]] * pad_size], axis=0) feed = { self.item_ph: bat_items, self.adj_in_ph: bat_adj_in, self.adj_out_ph: bat_adj_out, self.alias_ph: bat_alias, self.mask_ph: bat_mask } bat_ratings = self.sess.run(self.all_logits, feed_dict=feed) all_ratings.extend(bat_ratings[:cur_batch_size]) all_ratings = np.array(all_ratings) if items is not None: all_ratings = [ all_ratings[idx][u_item] for idx, u_item in enumerate(items) ] return all_ratings
def predict(self, user_ids, items=None): users = DataIterator(user_ids, batch_size=64, shuffle=False, drop_last=False) all_ratings = [] for bat_user in users: last_items = [self.train_dict[u][-1] for u in bat_user] feed = { self.user_input: bat_user, self.item_input_recent: last_items } bat_ratings = self.sess.run(self.prediction, feed_dict=feed) all_ratings.append(bat_ratings) all_ratings = np.vstack(all_ratings) if items is not None: all_ratings = [ all_ratings[idx][item] for idx, item in enumerate(items) ] return all_ratings
def predict(self, users, items=None): users = DataIterator(users, batch_size=512, shuffle=False, drop_last=False) all_ratings = [] for bat_user in users: bat_seq = [self.user_pos_train[u] for u in bat_user] bat_seq = pad_sequences(bat_seq, value=self.items_num, max_len=self.max_len, padding='pre', truncating='pre') feed = {self.item_seq_ph: bat_seq, self.is_training: False} bat_ratings = self.sess.run(self.all_logits, feed_dict=feed) all_ratings.extend(bat_ratings) all_ratings = np.array(all_ratings, dtype=np.float32) if items is not None: all_ratings = [ all_ratings[idx][item] for idx, item in enumerate(items) ] return all_ratings
def predict_script(model, users, items=None): users = DataIterator(users, batch_size=512, shuffle=False, drop_last=False) all_ratings = [] for bat_user in users: bat_seq = [model.user_pos_train[u] for u in bat_user] bat_seq = pad_sequences(bat_seq, value=model.items_num, max_len=model.max_len, padding='pre', truncating='pre') bat_pos = [model.user_pos_train[u][1:] for u in bat_user] n_neg_items = [len(pos) for pos in bat_pos] exclusion = [model.user_pos_train[u] for u in bat_user] bat_neg = batch_randint_choice(model.items_num, n_neg_items, replace=True, exclusion=exclusion) bat_pos = pad_sequences(bat_pos, value=model.items_num, max_len=model.max_len, padding='pre', truncating='pre') bat_neg = pad_sequences(bat_neg, value=model.items_num, max_len=model.max_len, padding='pre', truncating='pre') _, _x, bat_ratings = model(bat_seq, bat_pos, bat_neg) all_ratings.extend(bat_ratings) all_ratings = [t.detach().cpu().numpy() for t in all_ratings] # all_ratings = np.array(all_ratings, dtype=np.float32) if items is not None: all_ratings = [ all_ratings[idx][item] for idx, item in enumerate(items) ] return all_ratings
def _sample_negative(self, users_list): neg_items_list = [] user_neg_items_dict = {} all_uni_user, all_counts = np.unique(users_list, return_counts=True) user_count = DataIterator(all_uni_user, all_counts, batch_size=1024, shuffle=False) for bat_users, bat_counts in user_count: n_neg_items = [c * self.neg_samples for c in bat_counts] exclusion = [self.user_pos_train[u] for u in bat_users] bat_neg = batch_randint_choice(self.items_num, n_neg_items, replace=True, exclusion=exclusion) for u, neg in zip(bat_users, bat_neg): user_neg_items_dict[u] = neg for u, c in zip(all_uni_user, all_counts): neg_items = np.reshape(user_neg_items_dict[u], newshape=[c, self.neg_samples]) neg_items_list.extend(neg_items) return neg_items_list
def predict(self, users, items=None): users = DataIterator(users, batch_size=512, shuffle=False, drop_last=False) all_ratings = [] for bat_user in users: bat_seq = [self.user_test_seq[u] for u in bat_user] feed = { self.user_ph: bat_user, self.item_seq_ph: bat_seq, self.is_training: False } bat_ratings = self.sess.run(self.all_logits, feed_dict=feed) all_ratings.extend(bat_ratings) all_ratings = np.array(all_ratings, dtype=np.float32) if items is not None: all_ratings = [ all_ratings[idx][item] for idx, item in enumerate(items) ] return all_ratings
def get_train_data(self): item_seq_list, item_pos_list, item_neg_list = [], [], [] all_users = DataIterator(list(self.user_pos_train.keys()), batch_size=1024, shuffle=False) for bat_users in all_users: bat_seq = [self.user_pos_train[u][:-1] for u in bat_users] bat_pos = [self.user_pos_train[u][1:] for u in bat_users] n_neg_items = [len(pos) for pos in bat_pos] exclusion = [self.user_pos_train[u] for u in bat_users] bat_neg = batch_randint_choice(self.items_num, n_neg_items, replace=True, exclusion=exclusion) # padding bat_seq = pad_sequences(bat_seq, value=self.items_num, max_len=self.max_len, padding='pre', truncating='pre') bat_pos = pad_sequences(bat_pos, value=self.items_num, max_len=self.max_len, padding='pre', truncating='pre') bat_neg = pad_sequences(bat_neg, value=self.items_num, max_len=self.max_len, padding='pre', truncating='pre') item_seq_list.extend(bat_seq) item_pos_list.extend(bat_pos) item_neg_list.extend(bat_neg) return item_seq_list, item_pos_list, item_neg_list # , user_list