def _initialize_teacher(self, interactions): # initialize teacher model self._num_items = interactions.num_items self._num_users = interactions.num_users self._teacher_net = Caser(self._num_users, self._num_items, self._teacher_model_args) # load teacher model if os.path.isfile(self._teacher_model_path): output_str = ("loading teacher model from %s" % self._teacher_model_path) print(output_str) checkpoint = torch.load(self._teacher_model_path, map_location='cpu') self._teacher_net.load_state_dict(checkpoint['state_dict']) output_str = "loaded model %s (epoch %d)" % ( self._teacher_model_path, checkpoint['epoch_num']) print(output_str) else: output_str = "no model found at %s" % self._teacher_model_path print(output_str) # set teacher model to evaluation mode self._teacher_net.eval()
def _initialize(self, interactions): self._num_items = interactions.num_items self._num_users = interactions.num_users self.test_sequence = interactions.test_sequences self._net = Caser(self._num_users, self._num_items, self.args) self.sess = tf.Session() init = tf.global_variables_initializer() self.sess.run(init)
def _initialize(self, interactions): self._num_items = interactions.num_items self._num_users = interactions.num_users self.test_sequence = interactions.test_sequences self._net = Caser(self._num_users, self._num_items, self.model_args) self._optimizer = optim.Adam(self._net.parameters(), weight_decay=self._l2, lr=self._learning_rate)
def test(args): data = Interactions(args.test_root) data.to_sequence(args.L, args.T) sequences_np = data.sequences.sequences targets_np = data.sequences.targets users_np = data.sequences.user_ids.reshape(-1, 1) n_test = sequences_np.shape[0] print('total test instances: %d' % n_test) num_users = data.num_users num_items = data.num_items NDCG, HR, MRR = 0.0, 0.0, 0.0 item_ids = np.zeros((args.batch_size,num_items)) for i in range(args.batch_size): item_ids[i] = np.arange(num_items) test_batches = n_test // args.batch_size model=Caser(num_users,num_items,args) gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Session(config=gpu_config) as sess: saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(args.check_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print('Restore model from {} successfully!'.format(args.check_dir)) else: print('Restore model from {} failed!'.format(args.check_dir)) return for i in range(test_batches): sequences = sequences_np[i * args.batch_size: (i + 1) * args.batch_size] targets = targets_np[i * args.batch_size: (i + 1) * args.batch_size] users = users_np[i * args.batch_size: (i + 1) * args.batch_size] _, top_k_index = model.predict(sess, sequences, users, item_ids) hr, ndcg, mrr = 0.0, 0.0, 0.0 for i in range(args.batch_size): cur_user = top_k_index[i] for j in range(args.top_k): if targets[i][0] == cur_user[j]: hr += 1 mrr += 1 / (1 + j) dcg = 1 / np.log2(1 + 1 + j) idcg = 1 / np.log2(1 + 1) ndcg += dcg / idcg break HR += hr / args.batch_size NDCG += ndcg / args.batch_size MRR += mrr / args.batch_size return HR / test_batches, NDCG / test_batches, MRR / test_batches
def _initialize_student(self, interactions): self._num_items = interactions.num_items self._num_users = interactions.num_users self.test_sequence = interactions.test_sequences self._net = gpu( Caser(self._num_users, self._num_items, self._student_model_args), self._use_cuda) self._optimizer = optim.Adam(self._net.parameters(), weight_decay=self._l2, lr=self._learning_rate) self._loss_func = weighted_sigmoid_log_loss
def _initialize(self): print("train/test examination...") n_user, n_item, item_map = examination(self.train_path, self.test_path) self._num_items = len(item_map) + 1 # for 0 padding self._num_users = n_user self.item_map = item_map self.item_cumsum = self._get_item_cumsum() #items = [i[0] for i in self.item_map.values()] #print(max(items),self._num_items) # get pr_etrain embeddings print(self.pre_train_path) print(os.path.isfile(self.pre_train_path)) if self.pre_train_path and os.path.isfile(self.pre_train_path): print("loading pre_train value") w2v = Word2Vec.load(self.pre_train_path) dims = w2v.trainables.layer1_size pre_train_array = list() sort_index = list() for k, v in item_map.items(): sort_index.append(v[0]) try: pre_train_array.append(w2v.wv.get_vector(str(k))) except KeyError: pre_train_array.append(np.random.randn(dims)) # add 0 padding: if 0 not in sort_index: sort_index.append(0) pre_train_array.append(np.random.randn(dims)) pre_train_array = np.array(pre_train_array) pre_train_array = pre_train_array[np.argsort(sort_index)] else: print("no pre_train value") pre_train_array = None self._net = gpu( Caser(self._num_users, self._num_items, self.model_args, pre_train_array), self._use_cuda) par = filter(lambda p: p.requires_grad, self._net.parameters()) #self._net.parameters() self._optimizer = optim.Adam(par, weight_decay=self._l2, lr=self._learning_rate)
class Recommender(object): """ Contains attributes and methods that needed to train a sequential recommendation model. Models are trained by many tuples of (users, sequences, targets, negatives) and negatives are from negative sampling: for any known tuple of (user, sequence, targets), one or more items are randomly sampled to act as negatives. Parameters ---------- n_iter: int, Number of iterations to run. batch_size: int, Minibatch size. l2: float, L2 loss penalty, also known as the 'lambda' of l2 regularization. neg_samples: int, Number of negative samples to generate for each targets. If targets=3 and neg_samples=3, then it will sample 9 negatives. learning_rate: float, Initial learning rate. use_cuda: boolean, Run the model on a GPU or CPU. model_args: args, Model-related arguments, like latent dimensions. """ def __init__(self, n_iter=None, batch_size=None, l2=None, neg_samples=None, learning_rate=None, use_cuda=False, checkpoint=None, model_args=None): # model related self._num_items = None self._num_users = None self._net = None self.model_args = model_args # learning related self._batch_size = batch_size self._n_iter = n_iter self._learning_rate = learning_rate self._l2 = l2 self._neg_samples = neg_samples self._device = torch.device("cuda" if use_cuda else "cpu") self.checkpoint = checkpoint # rank evaluation related self.test_sequence = None self._candidate = dict() @property def _initialized(self): return self._net is not None def _initialize(self, interactions): self._num_items = interactions.num_items self._num_users = interactions.num_users self.test_sequence = interactions.test_sequences self._net = Caser(self._num_users, self._num_items, self.model_args) self._optimizer = optim.Adam(self._net.parameters(), weight_decay=self._l2, lr=self._learning_rate) def fit(self, train, test, verbose=False): """ The general training loop to fit the model Parameters ---------- train: :class:`interactions.Interactions` training instances, also contains test sequences test: :class:`interactions.Interactions` only contains targets for test sequences verbose: bool, optional print the logs """ # convert sequences, targets and users to numpy arrays sequences_np = train.sequences.sequences targets_np = train.sequences.targets users_np = train.sequences.user_ids.reshape(-1, 1) L, T = train.sequences.L, train.sequences.T n_train = sequences_np.shape[0] print('total training instances: %d' % n_train) if not self._initialized: self._initialize(train) start_epoch = 1 if self.checkpoint: print("loading checkpoint from %s" % self.checkpoint) checkpoint = torch.load(self.checkpoint) start_epoch = checkpoint['epoch_num'] self._net.load_state_dict(checkpoint['state_dict']) self._optimizer.load_state_dict(checkpoint['optimizer']) print("loaded checkpoint %s (epoch %d)" % (self.checkpoint, start_epoch)) # compute number of parameters print("Number of params: %d" % compute_model_size(self._net)) for epoch_num in range(start_epoch, self._n_iter + 1): t1 = time() # set model to training model and move it to the corresponding devices self._net.train() self._net = self._net.to(self._device) users_np, sequences_np, targets_np = shuffle( users_np, sequences_np, targets_np) negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples) # convert numpy arrays to PyTorch tensors and move it to the corresponding devices users, sequences, targets, negatives = ( torch.from_numpy(users_np).long(), torch.from_numpy(sequences_np).long(), torch.from_numpy(targets_np).long(), torch.from_numpy(negatives_np).long()) users, sequences, targets, negatives = (users.to(self._device), sequences.to(self._device), targets.to(self._device), negatives.to(self._device)) epoch_loss = 0.0 for (minibatch_num, (batch_users, batch_sequences, batch_targets, batch_negatives)) in enumerate( minibatch(users, sequences, targets, negatives, batch_size=self._batch_size)): # concatenate all variables to get predictions in one run items_to_predict = torch.cat((batch_targets, batch_negatives), 1) items_prediction = self._net(batch_sequences, batch_users, items_to_predict) (targets_prediction, negatives_prediction) = torch.split( items_prediction, [batch_targets.size(1), batch_negatives.size(1)], dim=1) self._optimizer.zero_grad() # compute the binary cross-entropy loss loss = sigmoid_log_loss(targets_prediction, negatives_prediction) epoch_loss += loss.item() loss.backward() self._optimizer.step() epoch_loss /= minibatch_num + 1 t2 = time() if verbose and epoch_num % 10 == 0: precision, recall, ndcg, mean_aps = evaluate_ranking( self, test, train, k=[3, 5, 10]) str_precs = "precisions=%.4f,%.4f,%.4f" % tuple( [np.mean(a) for a in precision]) str_recalls = "recalls=%.4f,%.4f,%.4f" % tuple( [np.mean(a) for a in recall]) str_ndcgs = "ndcgs=%.4f,%.4f,%.4f" % tuple( [np.mean(a) for a in ndcg]) output_str = "Epoch %d [%.1f s]\tloss=%.4f, " \ "map=%.4f, %s, %s, %s[%.1f s]" % (epoch_num, t2 - t1, epoch_loss, mean_aps, str_precs, str_recalls, str_ndcgs, time() - t2) print(output_str) else: output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % ( epoch_num, t2 - t1, epoch_loss, time() - t2) print(output_str) _save_checkpoint( { 'epoch_num': epoch_num, 'state_dict': self._net.state_dict(), 'optimizer': self._optimizer.state_dict(), }, 'checkpoints/gowalla-caser-dim=%d.pth.tar' % self.model_args.d) def _generate_negative_samples(self, users, interactions, n): """ Sample negative from a candidate set of each user. The candidate set of each user is defined by: {All Items} \ {Items Rated by User} Parameters ---------- users: array of np.int64 sequence users interactions: :class:`interactions.Interactions` training instances, used for generate candidates n: int total number of negatives to sample for each sequence """ users_ = users.squeeze() negative_samples = np.zeros((users_.shape[0], n), np.int64) if not self._candidate: all_items = np.arange(interactions.num_items - 1) + 1 # 0 for padding train = interactions.tocsr() for user, row in enumerate(train): self._candidate[user] = list(set(all_items) - set(row.indices)) for i, u in enumerate(users_): for j in range(n): x = self._candidate[u] negative_samples[i, j] = x[np.random.randint(len(x))] return negative_samples def predict(self, user_id, item_ids=None, model=None): """ Make predictions for evaluation: given a user id, it will first retrieve the test sequence associated with that user and compute the recommendation scores for items. Parameters ---------- user_id: int users id for which prediction scores needed. item_ids: array, optional Array containing the item ids for which prediction scores are desired. If not supplied, predictions for all items will be computed. """ if self.test_sequence is None: raise ValueError('Missing test sequences, cannot make predictions') if model is None: model = self._net # set model to evaluation model model.eval() with torch.no_grad(): sequences_np = self.test_sequence.sequences[user_id, :] sequences_np = np.atleast_2d(sequences_np) if item_ids is None: item_ids = np.arange(self._num_items).reshape(-1, 1) sequences = torch.from_numpy( sequences_np.astype(np.int64).reshape(1, -1)) item_ids = torch.from_numpy(item_ids.astype(np.int64)) user_id = torch.from_numpy(np.array([[user_id]]).astype(np.int64)) user, sequences, items = (user_id.to(self._device), sequences.to(self._device), item_ids.to(self._device)) out = model(sequences, user, items, for_pred=True) return cpu(out.data).numpy().flatten()
class DistilledRecommender(Recommender): """ Contains attributes and methods that needed to train a sequential recommendation model with ranking distillation[1]. Models are trained by many tuples of (users, sequences, targets, negatives) and negatives are from negative sampling: for any known tuple of (user, sequence, targets), one or more items are randomly sampled to act as negatives. [1] Ranking Distillation: Learning Compact Ranking Models With High Performance for Recommender System, Jiaxi Tang and Ke Wang , KDD '18 Parameters ---------- n_iter: int, Number of iterations to run. batch_size: int, Minibatch size. l2: float, L2 loss penalty, also known as the 'lambda' of l2 regularization. neg_samples: int, Number of negative samples to generate for each targets. learning_rate: float, Initial learning rate. use_cuda: boolean, Run the model on a GPU or CPU. teacher_model_path: string, Path to teacher's model checkpoint. teacher_topk_path: string, Path to teacher's top-K ranking cache for each training instance. lamda: float Hyperparameter for tuning the sharpness of position importance weight. mu: float Hyperparameter for tuning the sharpness of ranking discrepancy weight. num_dynamic_samples: int Number of samples used for estimating student's rank. dynamic_start_epoch: int Number of iteration to start using hybrid of two different weights. K: int Length of teacher's exemplary ranking. teach_alpha: float: Weight for balancing ranking loss and distillation loss. student_model_args: args, Student model related arguments, like latent dimensions. teacher_model_args: args, Teacher model related arguments, like latent dimensions. """ def __init__(self, n_iter=None, batch_size=None, l2=None, neg_samples=None, learning_rate=None, use_cuda=False, teacher_model_path=None, teacher_topk_path=None, lamda=None, mu=None, num_dynamic_samples=None, dynamic_start_epoch=None, K=None, teach_alpha=None, student_model_args=None, teacher_model_args=None): # data related self.L = None self.T = None # model related self._num_items = None self._num_users = None self._teacher_net = None # teacher model self._student_net = None # student model self._student_model_args = student_model_args self._teacher_model_args = teacher_model_args # learning related self._batch_size = batch_size self._n_iter = n_iter self._learning_rate = learning_rate self._l2 = l2 self._neg_samples = neg_samples self._device = torch.device("cuda" if use_cuda else "cpu") # ranking distillation related self._teach_alpha = teach_alpha self._lambda = lamda self._mu = mu self._num_dynamic_samples = num_dynamic_samples self._dynamic_start_epoch = dynamic_start_epoch self._K = K self._teacher_model_path = teacher_model_path self._teacher_topk_path = teacher_topk_path self._weight_renormalize = False # rank evaluation related self.test_sequence = None self._candidate = dict() @property def _teacher_initialized(self): return self._teacher_net is not None def _initialize_teacher(self, interactions): # initialize teacher model self._num_items = interactions.num_items self._num_users = interactions.num_users self._teacher_net = Caser(self._num_users, self._num_items, self._teacher_model_args) # load teacher model if os.path.isfile(self._teacher_model_path): output_str = ("loading teacher model from %s" % self._teacher_model_path) print(output_str) checkpoint = torch.load(self._teacher_model_path) self._teacher_net.load_state_dict(checkpoint['state_dict']) output_str = "loaded model %s (epoch %d)" % ( self._teacher_model_path, checkpoint['epoch_num']) print(output_str) else: output_str = "no model found at %s" % self._teacher_model_path print output_str # set teacher model to evaluation mode self._teacher_net.eval() @property def _student_initialized(self): return self._student_net is not None def _initialize_student(self, interactions): self._num_items = interactions.num_items self._num_users = interactions.num_users self.test_sequence = interactions.test_sequences self._student_net = Caser(self._num_users, self._num_items, self._student_model_args) self._optimizer = optim.Adam(self._student_net.parameters(), weight_decay=self._l2, lr=self._learning_rate) def fit(self, train, test, verbose=False): """ The general training loop to fit the model Parameters ---------- train: :class:`interactions.Interactions` training instances, also contains test sequences test: :class:`interactions.Interactions` only contains targets for test sequences verbose: bool, optional print the logs """ # convert sequences, targets and users to numpy arrays sequences_np = train.sequences.sequences targets_np = train.sequences.targets users_np = train.sequences.user_ids.reshape(-1, 1) self.L, self.T = train.sequences.L, train.sequences.T n_train = sequences_np.shape[0] output_str = 'total training instances: %d' % n_train print(output_str) if not self._teacher_initialized: self._initialize_teacher(train) if not self._student_initialized: self._initialize_student(train) # here we compute teacher top-K ranking for each training instance in advance for faster training speed # while we have to compute the top-K ranking on the fly if it is too large to keep in memory if os.path.isfile(self._teacher_topk_path): print('found teacher topk file, loading..') teacher_ranking = np.load(self._teacher_topk_path) else: print('teacher topk file not found, generating.. ') teacher_ranking = self._get_teacher_topk(sequences_np, users_np, targets_np, k=self._K) # initialize static weight (position importance weight) weight_static = np.array(range(1, self._K + 1), dtype=np.float32) weight_static = np.exp(-weight_static / self._lambda) weight_static = weight_static / np.sum(weight_static) weight_static = torch.from_numpy(weight_static).to(self._device) weight_static = weight_static.unsqueeze(0) # initialize dynamic weight (ranking discrepancy weight) weight_dynamic = None # count number of parameters print("Number of params in teacher model: %d" % compute_model_size(self._teacher_net)) print("Number of params in student model: %d" % compute_model_size(self._student_net)) indices = np.arange(n_train) start_epoch = 1 for epoch_num in range(start_epoch, self._n_iter + 1): t1 = time() # set teacher model to evaluation mode and move it to the corresponding devices self._teacher_net.eval() self._teacher_net = self._teacher_net.to(self._device) # set student model to training mode and move it to the corresponding devices self._student_net.train() self._student_net = self._student_net.to(self._device) (users_np, sequences_np, targets_np), shuffle_indices = shuffle(users_np, sequences_np, targets_np, indices=True) indices = indices[ shuffle_indices] # keep indices for retrieval teacher's top-K ranking from cache negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples) dynamic_samples_np = self._generate_negative_samples( users_np, train, n=self._num_dynamic_samples) # convert numpy arrays to PyTorch tensors and move it to the corresponding devices users, sequences, targets, negatives = ( torch.from_numpy(users_np).long(), torch.from_numpy(sequences_np).long(), torch.from_numpy(targets_np).long(), torch.from_numpy(negatives_np).long()) users, sequences, targets, negatives = (users.to(self._device), sequences.to(self._device), targets.to(self._device), negatives.to(self._device)) dynamic_samples = torch.from_numpy(dynamic_samples_np).long().to( self._device) epoch_loss = 0.0 epoch_regular_loss = 0.0 for (minibatch_num, (batch_indices, batch_users, batch_sequences, batch_targets, batch_negatives, batch_dynamics)) in enumerate( minibatch(indices, users, sequences, targets, negatives, dynamic_samples, batch_size=self._batch_size)): # retrieval teacher top-K ranking given indices batch_candidates = torch.from_numpy( teacher_ranking[batch_indices, :]).long().to(self._device) # concatenate all variables to get predictions in one run items_to_predict = torch.cat( (batch_targets, batch_negatives, batch_candidates, batch_dynamics), 1) items_prediction = self._student_net(batch_sequences, batch_users, items_to_predict) (targets_prediction, negatives_prediction, candidates_prediction, dynamics_prediction) = torch.split(items_prediction, [ batch_targets.size(1), batch_negatives.size(1), batch_candidates.size(1), batch_dynamics.size(1) ], dim=1) self._optimizer.zero_grad() if epoch_num > self._dynamic_start_epoch: # compute dynamic weight dynamic_weights = list() for col in range(self._K): col_prediction = candidates_prediction[:, col].unsqueeze( 1) num_smaller_than = torch.sum( col_prediction < dynamics_prediction, dim=1).float() relative_rank = num_smaller_than / self._num_dynamic_samples predicted_rank = torch.floor(self._num_items * relative_rank) dynamic_weight = torch.tanh(self._mu * (predicted_rank - col)) dynamic_weight = torch.clamp(dynamic_weight, min=0.0) dynamic_weights.append(dynamic_weight) weight_dynamic = torch.stack(dynamic_weights, 1) # hybrid two weights weight = weight_dynamic * weight_static if self._weight_renormalize: weight = F.normalize(weight, p=1, dim=1) else: weight = weight_static # detach the weight to stop the gradient flow to the weight weight = weight.detach() loss, regular_loss = weighted_sigmoid_log_loss( targets_prediction, negatives_prediction, candidates_prediction, weight, self._teach_alpha) epoch_loss += loss.item() epoch_regular_loss += regular_loss.item() loss.backward() # assert False self._optimizer.step() epoch_loss /= minibatch_num + 1 epoch_regular_loss /= minibatch_num + 1 t2 = time() if verbose and epoch_num % 10 == 0: precision, recall, ndcg, mean_aps = evaluate_ranking( self, test, train, k=[3, 5, 10]) str_precs = "precisions=%.4f,%.4f,%.4f" % tuple( [np.mean(a) for a in precision]) str_recalls = "recalls=%.4f,%.4f,%.4f" % tuple( [np.mean(a) for a in recall]) str_ndcgs = "ndcgs=%.4f,%.4f,%.4f" % tuple( [np.mean(a) for a in ndcg]) output_str = "Epoch %d [%.1f s]\tloss=%.4f, regular_loss=%.4f, " \ "map=%.4f, %s, %s, %s[%.1f s]" % (epoch_num, t2 - t1, epoch_loss, epoch_regular_loss, mean_aps, str_precs, str_recalls, str_ndcgs, time() - t2) print(output_str) else: output_str = "Epoch %d [%.1f s]\tloss=%.4f, regular_loss=%.4f[%.1f s]" % ( epoch_num, t2 - t1, epoch_loss, epoch_regular_loss, time() - t2) print(output_str) def _get_teacher_topk(self, sequences, users, targets, k): """ Pre-compute and cache teacher's top-K ranking for each training instance. By doing this we can make training with distillation much faster. Parameters ---------- sequences: array of np.int64 sequencces of items users: array of np.int64 users associated with each sequence targets: array of np.int64 target item that user interact with given the sequence k: int length of teacher's exemplary ranking """ with_targets = False n_train = sequences.shape[0] indices = np.arange(n_train) users, sequences = torch.from_numpy(users).long(), torch.from_numpy( sequences).long() # teacher topk results teacher_topk = np.zeros((n_train, k), dtype=np.int64) for (batch_indices, batch_users, batch_sequences, batch_targets) in minibatch(indices, users, sequences, targets, batch_size=16): cur_batch_size = batch_users.shape[0] all_items = torch.arange(start=0, end=self._num_items).repeat( cur_batch_size, 1).long() teacher_prediction = self._teacher_net(batch_sequences, batch_users, all_items).detach() _, tops = teacher_prediction.topk( k * 2, dim=1) # return the topk by column tops = tops.cpu().numpy() new_tops = np.concatenate((batch_targets, tops), axis=1) topks = np.zeros((cur_batch_size, k), dtype=np.int64) for i, row in enumerate(new_tops): _, idx = np.unique(row, return_index=True) # whether teacher's top-k ranking consider target items if with_targets: topk = row[np.sort(idx)][:k] else: topk = row[np.sort(idx)][self.T:k + self.T] topks[i, :] = topk teacher_topk[batch_indices, :] = topks np.save( 'gowalla-teacher-dim=%d-top=%d.npy' % (self._teacher_model_args.d, k), teacher_topk) return teacher_topk def predict(self, user_id, item_ids=None, model=None): return super(DistilledRecommender, self).predict(user_id, item_ids, model=self._student_net)
class Recommender(object): """ Contains attributes and methods that needed to train a sequential recommendation model. Models are trained by many tuples of (users, sequences, targets, negatives) and negatives are from negative sampling: for any known tuple of (user, sequence, targets), one or more items are randomly sampled to act as negatives. Parameters ---------- args: args, Model-related arguments, like latent dimensions. """ def __init__(self, args=None): # model related self._num_items = None self._num_users = None self._net = None self.args = args # learning related self._batch_size = self.args.batch_size self._n_iter = self.args.n_iter self._neg_samples = self.args.neg_samples # rank evaluation related self.test_sequence = None self._candidate = dict() @property def _initialized(self): return self._net is not None def _initialize(self, interactions): self._num_items = interactions.num_items self._num_users = interactions.num_users self.test_sequence = interactions.test_sequences self._net = Caser(self._num_users, self._num_items, self.args) self._net.build_model() self.sess = tf.Session() init = tf.global_variables_initializer() self.sess.run(init) def fit(self, train, test, verbose=False): """ The general training loop to fit the model Parameters ---------- train: :class:`spotlight.interactions.Interactions` training instances, also contains test sequences test: :class:`spotlight.interactions.Interactions` only contains targets for test sequences verbose: bool, optional print the logs """ # convert to sequences, targets and users sequences_np = train.sequences.sequences targets_np = train.sequences.targets users_np = train.sequences.user_ids.reshape(-1, 1) L, T = train.sequences.L, train.sequences.T n_train = sequences_np.shape[0] output_str = 'total training instances: %d' % n_train print(output_str) if not self._initialized: self._initialize(train) start_epoch = 0 for epoch_num in range(start_epoch, self._n_iter): t1 = time() users_np, sequences_np, targets_np = shuffle(users_np, sequences_np, targets_np) negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples) epoch_loss = 0.0 for (minibatch_num, (batch_users, batch_sequences, batch_targets, batch_negatives)) in enumerate(minibatch(users_np, sequences_np, targets_np, negatives_np, batch_size=self._batch_size)): items_to_predict = np.concatenate((batch_targets, batch_negatives), 1) loss = self._net.train(self.sess, batch_sequences, batch_users, items_to_predict) epoch_loss += loss epoch_loss /= minibatch_num + 1 t2 = time() if verbose and (epoch_num + 1) % 10 == 0: precision, recall, mean_aps = evaluate_ranking(self, test, train, k=[1, 5, 10]) output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \ "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \ "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f, [%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, mean_aps, np.mean(precision[0]), np.mean(precision[1]), np.mean(precision[2]), np.mean(recall[0]), np.mean(recall[1]), np.mean(recall[2]), time() - t2) print(output_str) else: output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, time() - t2) print(output_str) def _generate_negative_samples(self, users, interactions, n): """ Sample negative from a candidate set of each user. The candidate set of each user is defined by: {All Items} \ {Items Rated by User} Parameters ---------- users: array of np.int64 sequence users interactions: :class:`spotlight.interactions.Interactions` training instances, used for generate candidates n: int total number of negatives to sample for each sequence """ users_ = users.squeeze() negative_samples = np.zeros((users_.shape[0], n), np.int64) if not self._candidate: all_items = np.arange(interactions.num_items - 1) + 1 # 0 for padding train = interactions.tocsr() for user, row in enumerate(train): self._candidate[user] = list(set(all_items) - set(row.indices)) for i, u in enumerate(users_): for j in range(n): x = self._candidate[u] negative_samples[i, j] = x[ np.random.randint(len(x))] return negative_samples def predict(self, user_id, item_ids=None): """ Make predictions for evaluation: given a user id, it will first retrieve the test sequence associated with that user and compute the recommendation scores for items. Parameters ---------- user_id: int users id for which prediction scores needed. item_ids: array, optional Array containing the item ids for which prediction scores are desired. If not supplied, predictions for all items will be computed. """ sequences_np = self.test_sequence.sequences[user_id, :] sequences_np = np.atleast_2d(sequences_np) if item_ids is None: item_ids = np.arange(self._num_items).reshape(-1, 1) out = self._net.predict(self.sess, sequences_np, user_id, item_ids) return out
class Recommender(object): """ args: args,Model-related arguments, like latent dimensions. """ def __init__(self, args=None): # model related self._num_items = None self._num_users = None self._net = None self.args = args # learning related self._batch_size = self.args.batch_size self._n_iter = self.args.n_iter self._neg_samples = self.args.neg_samples # rank evaluation related self.test_sequence = None self._candidate = dict() self._top_k = args.top_k @property def _initialized(self): return self._net is not None def _initialize(self, interactions): self._num_items = interactions.num_items self._num_users = interactions.num_users self.test_sequence = interactions.test_sequences self._net = Caser(self._num_users, self._num_items, self.args) self.sess = tf.Session() init = tf.global_variables_initializer() self.sess.run(init) def fit(self, train, val, verbose=False): """ The general training loop to fit the model Parameters ---------- train: :class:`spotlight.interactions.Interactions` training instances, also contains test sequences val: :class:`spotlight.interactions.Interactions` only contains targets for test sequences verbose: bool, optional print the logs """ # convert to sequences, targets and users sequences_np = train.sequences.sequences targets_np = train.sequences.targets users_np = train.sequences.user_ids.reshape(-1, 1) L, T = train.sequences.L, train.sequences.T n_train = sequences_np.shape[0] print('total training instances: %d' % n_train) if not self._initialized: self._initialize(train) start_epoch = 0 for epoch_num in range(start_epoch, self._n_iter): t1 = time() users_np, sequences_np, targets_np = shuffle( users_np, sequences_np, targets_np) negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples) step_loss = 0.0 best_HR = 0.0 for (minibatch_num,(batch_users,batch_sequences,batch_targets,batch_negatives)) \ in enumerate(minibatch(users_np,sequences_np,targets_np,negatives_np,batch_size=self._batch_size)): items_to_predict = np.concatenate( (batch_targets, batch_negatives), 1) loss, global_step = self._net.train(self.sess, batch_sequences, batch_users, items_to_predict) step_loss += loss if global_step % 1000 == 0: print('epoch-{}\tstep-{}\tloss-{:.6f}'.format( epoch_num + 1, global_step, step_loss / global_step)) if verbose and global_step % 10000 == 0: t2 = time() HR, NDCG, MRR = self.predict(val) output_str = "Epoch %d step %d [%.1f s]\tloss=%.6f,HR@20=%.6f, " \ "NDCG@20=%.6f, MRR@20=%.6f,[%.1f s] " % (epoch_num + 1,global_step, t2 - t1, step_loss/global_step, HR, NDCG, MRR, time() - t2) print(output_str) if HR > best_HR: best_HR = HR ckpt_path = self.args.check_dir + 'model.ckpt' self._net.saver.save(self.sess, ckpt_path, global_step=global_step) print("model saved to {}".format(ckpt_path)) def _generate_negative_samples(self, users, interactions, n): """ Sample negative from a candidate set of each user. The candidate set of each user is defined by: {All Items} \ {Items Rated by User} Parameters ---------- users: array of np.int64 sequence users interactions: :class:`spotlight.interactions.Interactions` training instances, used for generate candidates n: int total number of negatives to sample for each sequence """ users_ = users.squeeze() negative_samples = np.zeros((users_.shape[0], n), np.int64) if not self._candidate: all_items = np.arange(interactions.num_items - 1) + 1 # 0 for padding train = interactions.tocsr() for user, row in enumerate(train): self._candidate[user] = list(set(all_items) - set(row.indices)) for i, u in enumerate(users_): for j in range(n): x = self._candidate[u] negative_samples[i, j] = x[np.random.randint(len(x))] return negative_samples def predict(self, val, item_ids=None): """ Make predictions for evaluation: given a user id, it will first retrieve the test sequence associated with that user and compute the recommendation scores for items. Parameters ---------- user_id: int users id for which prediction scores needed. item_ids: array, optional Array containing the item ids for which prediction scores are desired. If not supplied, predictions for all items will be computed. """ sequences_np = val.sequences.sequences targets_np = val.sequences.targets users_np = val.sequences.user_ids.reshape(-1, 1) n_val = sequences_np.shape[0] print('total validation instances: %d' % n_val) NDCG, HR, MRR = 0.0, 0.0, 0.0 item_ids = np.zeros((self._batch_size, self._num_items)) for i in range(self._batch_size): item_ids[i] = np.arange(self._num_items) valid_batches = n_val // self._batch_size for i in range(valid_batches): sequences = sequences_np[i * self._batch_size:(i + 1) * self._batch_size] targets = targets_np[i * self._batch_size:(i + 1) * self._batch_size] users = users_np[i * self._batch_size:(i + 1) * self._batch_size] _, top_k_index = self._net.predict(self.sess, sequences, users, item_ids) hr, ndcg, mrr = 0.0, 0.0, 0.0 for i in range(self._batch_size): cur_user = top_k_index[i] for j in range(self._top_k): if targets[i][0] == cur_user[j]: hr += 1 mrr += 1 / (1 + j) dcg = 1 / np.log2(1 + 1 + j) idcg = 1 / np.log2(1 + 1) ndcg += dcg / idcg break HR += hr / self._batch_size NDCG += ndcg / self._batch_size MRR += mrr / self._batch_size return HR / valid_batches, NDCG / valid_batches, MRR / valid_batches
class Recommender(object): """ Contains attributes and methods that needed to train a sequential recommendation model. Models are trained by many tuples of (users, sequences, targets, negatives) and negatives are from negative sampling: for any known tuple of (user, sequence, targets), one or more items are randomly sampled to act as negatives. Parameters ---------- n_iter: int, Number of iterations to run. batch_size: int, Minibatch size. l2: float, L2 loss penalty, also known as the 'lambda' of l2 regularization. neg_samples: int, Number of negative samples to generate for each targets. If targets=3 and neg_samples=3, then it will sample 9 negatives. learning_rate: float, Initial learning rate. use_cuda: boolean, Run the model on a GPU or CPU. model_args: args, Model-related arguments, like latent dimensions. """ def __init__(self, n_iter=None, batch_size=None, l2=None, neg_samples=None, learning_rate=None, use_cuda=False, model_args=None): # model related self._num_items = None self._num_users = None self._net = None self.model_args = model_args # learning related self._batch_size = batch_size self._n_iter = n_iter self._learning_rate = learning_rate self._l2 = l2 self._neg_samples = neg_samples self._device = torch.device("cuda" if use_cuda else "cpu") # rank evaluation related self.test_sequence = None self._candidate = dict() @property def _initialized(self): return self._net is not None def _initialize(self, interactions): self._num_items = interactions.num_items self._num_users = interactions.num_users self.test_sequence = interactions.test_sequences self._net = Caser(self._num_users, self._num_items, self.model_args).to(self._device) self._optimizer = optim.Adam(self._net.parameters(), weight_decay=self._l2, lr=self._learning_rate) def fit(self, train, test, verbose=False): """ The general training loop to fit the model Parameters ---------- train: :class:`spotlight.interactions.Interactions` training instances, also contains test sequences test: :class:`spotlight.interactions.Interactions` only contains targets for test sequences verbose: bool, optional print the logs """ # convert to sequences, targets and users sequences_np = train.sequences.sequences targets_np = train.sequences.targets users_np = train.sequences.user_ids.reshape(-1, 1) L, T = train.sequences.L, train.sequences.T n_train = sequences_np.shape[0] output_str = 'total training instances: %d' % n_train print(output_str) if not self._initialized: self._initialize(train) start_epoch = 0 for epoch_num in range(start_epoch, self._n_iter): t1 = time() # set model to training mode self._net.train() users_np, sequences_np, targets_np = shuffle(users_np, sequences_np, targets_np) negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples) # convert numpy arrays to PyTorch tensors and move it to the corresponding devices users, sequences, targets, negatives = (torch.from_numpy(users_np).long(), torch.from_numpy(sequences_np).long(), torch.from_numpy(targets_np).long(), torch.from_numpy(negatives_np).long()) users, sequences, targets, negatives = (users.to(self._device), sequences.to(self._device), targets.to(self._device), negatives.to(self._device)) epoch_loss = 0.0 for (minibatch_num, (batch_users, batch_sequences, batch_targets, batch_negatives)) in enumerate(minibatch(users, sequences, targets, negatives, batch_size=self._batch_size)): items_to_predict = torch.cat((batch_targets, batch_negatives), 1) items_prediction = self._net(batch_sequences, batch_users, items_to_predict) (targets_prediction, negatives_prediction) = torch.split(items_prediction, [batch_targets.size(1), batch_negatives.size(1)], dim=1) self._optimizer.zero_grad() # compute the binary cross-entropy loss positive_loss = -torch.mean( torch.log(torch.sigmoid(targets_prediction))) negative_loss = -torch.mean( torch.log(1 - torch.sigmoid(negatives_prediction))) loss = positive_loss + negative_loss epoch_loss += loss.item() loss.backward() self._optimizer.step() epoch_loss /= minibatch_num + 1 t2 = time() if verbose and (epoch_num == 0 or (epoch_num + 1) % 10 == 0): output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, time() - t2) print(output_str) hits, ndcg = evaluate_hits_ndcg(self, train, test) print(f'hits@10: {hits}, ndcg@10: {ndcg}') """ precision, recall, mean_aps, hits = evaluate_ranking(self, test, train, k=[1, 5, 10]) output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \ "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \ "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f, [%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, mean_aps, np.mean(precision[0]), np.mean(precision[1]), np.mean(precision[2]), np.mean(recall[0]), np.mean(recall[1]), np.mean(recall[2]), time() - t2) print(output_str) """ else: output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, time() - t2) print(output_str) if (epoch_num + 1) % 5 == 0: self._candidate = dict() def _generate_negative_samples(self, users, interactions, n): """ Sample negative from a candidate set of each user. The candidate set of each user is defined by: {All Items} \ {Items Rated by User} Parameters ---------- users: array of np.int64 sequence users interactions: :class:`spotlight.interactions.Interactions` training instances, used for generate candidates n: int total number of negatives to sample for each sequence """ users_ = users.squeeze() negative_samples = np.zeros((users_.shape[0], n), np.int64) sample_limit = 200 if not self._candidate: all_items = np.arange(interactions.num_items - 1) + 1 # 0 for padding train = interactions.tocsr() for user, row in enumerate(train): self._candidate[user] = [] while len(self._candidate[user]) < n: cur_items = all_items if len(cur_items) > 5000: cur_items = np.random.choice(all_items, size=sample_limit, replace=False) self._candidate[user] = list(set(cur_items) - set(row.indices)) for i, u in enumerate(users_): for j in range(n): x = self._candidate[u] negative_samples[i, j] = x[ np.random.randint(len(x))] return negative_samples def predict(self, user_id, item_ids=None): """ Make predictions for evaluation: given a user id, it will first retrieve the test sequence associated with that user and compute the recommendation scores for items. Parameters ---------- user_id: int users id for which prediction scores needed. item_ids: array, optional Array containing the item ids for which prediction scores are desired. If not supplied, predictions for all items will be computed. """ if self.test_sequence is None: raise ValueError('Missing test sequences, cannot make predictions') # set model to evaluation model self._net.eval() with torch.no_grad(): sequences_np = self.test_sequence.sequences[user_id, :] sequences_np = np.atleast_2d(sequences_np) if item_ids is None: item_ids = np.arange(self._num_items).reshape(-1, 1) sequences = torch.from_numpy(sequences_np).long() item_ids = torch.from_numpy(item_ids).long() user_id = torch.from_numpy(np.array([[user_id]])).long() user, sequences, items = (user_id.to(self._device), sequences.to(self._device), item_ids.to(self._device)) out = self._net(sequences, user, items, for_pred=True) return out.cpu().numpy().flatten()