Exemple #1
0
    def _initialize_teacher(self, interactions):
        # initialize teacher model
        self._num_items = interactions.num_items
        self._num_users = interactions.num_users

        self._teacher_net = Caser(self._num_users, self._num_items,
                                  self._teacher_model_args)
        # load teacher model
        if os.path.isfile(self._teacher_model_path):
            output_str = ("loading teacher model from %s" %
                          self._teacher_model_path)
            print(output_str)

            checkpoint = torch.load(self._teacher_model_path,
                                    map_location='cpu')
            self._teacher_net.load_state_dict(checkpoint['state_dict'])
            output_str = "loaded model %s (epoch %d)" % (
                self._teacher_model_path, checkpoint['epoch_num'])
            print(output_str)
        else:
            output_str = "no model found at %s" % self._teacher_model_path
            print(output_str)

        # set teacher model to evaluation mode
        self._teacher_net.eval()
    def _initialize(self, interactions):
        self._num_items = interactions.num_items
        self._num_users = interactions.num_users

        self.test_sequence = interactions.test_sequences

        self._net = Caser(self._num_users, self._num_items, self.args)
        self.sess = tf.Session()
        init = tf.global_variables_initializer()
        self.sess.run(init)
Exemple #3
0
    def _initialize(self, interactions):
        self._num_items = interactions.num_items
        self._num_users = interactions.num_users

        self.test_sequence = interactions.test_sequences

        self._net = Caser(self._num_users, self._num_items, self.model_args)

        self._optimizer = optim.Adam(self._net.parameters(),
                                     weight_decay=self._l2,
                                     lr=self._learning_rate)
def test(args):
    data = Interactions(args.test_root)
    data.to_sequence(args.L, args.T)
    sequences_np = data.sequences.sequences
    targets_np = data.sequences.targets
    users_np = data.sequences.user_ids.reshape(-1, 1)
    n_test = sequences_np.shape[0]
    print('total test instances: %d' % n_test)
    num_users = data.num_users
    num_items = data.num_items
    NDCG, HR, MRR = 0.0, 0.0, 0.0
    item_ids = np.zeros((args.batch_size,num_items))
    for i in range(args.batch_size):
        item_ids[i] = np.arange(num_items)
    test_batches = n_test // args.batch_size

    model=Caser(num_users,num_items,args)
    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    with tf.Session(config=gpu_config) as sess:
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(args.check_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print('Restore model from {} successfully!'.format(args.check_dir))
        else:
            print('Restore model from {} failed!'.format(args.check_dir))
            return
        for i in range(test_batches):
            sequences = sequences_np[i * args.batch_size: (i + 1) * args.batch_size]
            targets = targets_np[i * args.batch_size: (i + 1) * args.batch_size]
            users = users_np[i * args.batch_size: (i + 1) * args.batch_size]
            _, top_k_index = model.predict(sess, sequences, users, item_ids)
            hr, ndcg, mrr = 0.0, 0.0, 0.0
            for i in range(args.batch_size):
                cur_user = top_k_index[i]
                for j in range(args.top_k):
                    if targets[i][0] == cur_user[j]:
                        hr += 1
                        mrr += 1 / (1 + j)
                        dcg = 1 / np.log2(1 + 1 + j)
                        idcg = 1 / np.log2(1 + 1)
                        ndcg += dcg / idcg
                        break
            HR += hr / args.batch_size
            NDCG += ndcg / args.batch_size
            MRR += mrr / args.batch_size
    return HR / test_batches, NDCG / test_batches, MRR / test_batches
Exemple #5
0
    def _initialize_student(self, interactions):

        self._num_items = interactions.num_items
        self._num_users = interactions.num_users

        self.test_sequence = interactions.test_sequences

        self._net = gpu(
            Caser(self._num_users, self._num_items, self._student_model_args),
            self._use_cuda)
        self._optimizer = optim.Adam(self._net.parameters(),
                                     weight_decay=self._l2,
                                     lr=self._learning_rate)

        self._loss_func = weighted_sigmoid_log_loss
Exemple #6
0
    def _initialize(self):
        print("train/test examination...")
        n_user, n_item, item_map = examination(self.train_path, self.test_path)

        self._num_items = len(item_map) + 1  # for 0 padding
        self._num_users = n_user
        self.item_map = item_map
        self.item_cumsum = self._get_item_cumsum()
        #items = [i[0] for i in  self.item_map.values()]
        #print(max(items),self._num_items)

        # get pr_etrain embeddings
        print(self.pre_train_path)
        print(os.path.isfile(self.pre_train_path))
        if self.pre_train_path and os.path.isfile(self.pre_train_path):
            print("loading pre_train value")
            w2v = Word2Vec.load(self.pre_train_path)
            dims = w2v.trainables.layer1_size
            pre_train_array = list()
            sort_index = list()
            for k, v in item_map.items():
                sort_index.append(v[0])
                try:
                    pre_train_array.append(w2v.wv.get_vector(str(k)))
                except KeyError:
                    pre_train_array.append(np.random.randn(dims))
            # add 0 padding:
            if 0 not in sort_index:
                sort_index.append(0)
                pre_train_array.append(np.random.randn(dims))

            pre_train_array = np.array(pre_train_array)
            pre_train_array = pre_train_array[np.argsort(sort_index)]
        else:
            print("no pre_train value")
            pre_train_array = None

        self._net = gpu(
            Caser(self._num_users, self._num_items, self.model_args,
                  pre_train_array), self._use_cuda)

        par = filter(lambda p: p.requires_grad, self._net.parameters())
        #self._net.parameters()
        self._optimizer = optim.Adam(par,
                                     weight_decay=self._l2,
                                     lr=self._learning_rate)
Exemple #7
0
class Recommender(object):
    """
    Contains attributes and methods that needed to train a sequential
    recommendation model. Models are trained by many tuples of
    (users, sequences, targets, negatives) and negatives are from negative
    sampling: for any known tuple of (user, sequence, targets), one or more
    items are randomly sampled to act as negatives.


    Parameters
    ----------

    n_iter: int,
        Number of iterations to run.
    batch_size: int,
        Minibatch size.
    l2: float,
        L2 loss penalty, also known as the 'lambda' of l2 regularization.
    neg_samples: int,
        Number of negative samples to generate for each targets.
        If targets=3 and neg_samples=3, then it will sample 9 negatives.
    learning_rate: float,
        Initial learning rate.
    use_cuda: boolean,
        Run the model on a GPU or CPU.
    model_args: args,
        Model-related arguments, like latent dimensions.
    """
    def __init__(self,
                 n_iter=None,
                 batch_size=None,
                 l2=None,
                 neg_samples=None,
                 learning_rate=None,
                 use_cuda=False,
                 checkpoint=None,
                 model_args=None):

        # model related
        self._num_items = None
        self._num_users = None
        self._net = None
        self.model_args = model_args

        # learning related
        self._batch_size = batch_size
        self._n_iter = n_iter
        self._learning_rate = learning_rate
        self._l2 = l2
        self._neg_samples = neg_samples
        self._device = torch.device("cuda" if use_cuda else "cpu")
        self.checkpoint = checkpoint

        # rank evaluation related
        self.test_sequence = None
        self._candidate = dict()

    @property
    def _initialized(self):
        return self._net is not None

    def _initialize(self, interactions):
        self._num_items = interactions.num_items
        self._num_users = interactions.num_users

        self.test_sequence = interactions.test_sequences

        self._net = Caser(self._num_users, self._num_items, self.model_args)

        self._optimizer = optim.Adam(self._net.parameters(),
                                     weight_decay=self._l2,
                                     lr=self._learning_rate)

    def fit(self, train, test, verbose=False):
        """
        The general training loop to fit the model

        Parameters
        ----------

        train: :class:`interactions.Interactions`
            training instances, also contains test sequences
        test: :class:`interactions.Interactions`
            only contains targets for test sequences
        verbose: bool, optional
            print the logs
        """

        # convert sequences, targets and users to numpy arrays
        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids.reshape(-1, 1)

        L, T = train.sequences.L, train.sequences.T

        n_train = sequences_np.shape[0]

        print('total training instances: %d' % n_train)

        if not self._initialized:
            self._initialize(train)

        start_epoch = 1
        if self.checkpoint:
            print("loading checkpoint from %s" % self.checkpoint)
            checkpoint = torch.load(self.checkpoint)
            start_epoch = checkpoint['epoch_num']
            self._net.load_state_dict(checkpoint['state_dict'])
            self._optimizer.load_state_dict(checkpoint['optimizer'])
            print("loaded checkpoint %s (epoch %d)" %
                  (self.checkpoint, start_epoch))

        # compute number of parameters
        print("Number of params: %d" % compute_model_size(self._net))

        for epoch_num in range(start_epoch, self._n_iter + 1):

            t1 = time()

            # set model to training model and move it to the corresponding devices
            self._net.train()
            self._net = self._net.to(self._device)

            users_np, sequences_np, targets_np = shuffle(
                users_np, sequences_np, targets_np)

            negatives_np = self._generate_negative_samples(users_np,
                                                           train,
                                                           n=self._neg_samples)

            # convert numpy arrays to PyTorch tensors and move it to the corresponding devices
            users, sequences, targets, negatives = (
                torch.from_numpy(users_np).long(),
                torch.from_numpy(sequences_np).long(),
                torch.from_numpy(targets_np).long(),
                torch.from_numpy(negatives_np).long())

            users, sequences, targets, negatives = (users.to(self._device),
                                                    sequences.to(self._device),
                                                    targets.to(self._device),
                                                    negatives.to(self._device))

            epoch_loss = 0.0

            for (minibatch_num, (batch_users, batch_sequences, batch_targets,
                                 batch_negatives)) in enumerate(
                                     minibatch(users,
                                               sequences,
                                               targets,
                                               negatives,
                                               batch_size=self._batch_size)):
                # concatenate all variables to get predictions in one run
                items_to_predict = torch.cat((batch_targets, batch_negatives),
                                             1)
                items_prediction = self._net(batch_sequences, batch_users,
                                             items_to_predict)

                (targets_prediction, negatives_prediction) = torch.split(
                    items_prediction,
                    [batch_targets.size(1),
                     batch_negatives.size(1)],
                    dim=1)

                self._optimizer.zero_grad()
                # compute the binary cross-entropy loss
                loss = sigmoid_log_loss(targets_prediction,
                                        negatives_prediction)

                epoch_loss += loss.item()

                loss.backward()
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1

            t2 = time()
            if verbose and epoch_num % 10 == 0:
                precision, recall, ndcg, mean_aps = evaluate_ranking(
                    self, test, train, k=[3, 5, 10])
                str_precs = "precisions=%.4f,%.4f,%.4f" % tuple(
                    [np.mean(a) for a in precision])
                str_recalls = "recalls=%.4f,%.4f,%.4f" % tuple(
                    [np.mean(a) for a in recall])
                str_ndcgs = "ndcgs=%.4f,%.4f,%.4f" % tuple(
                    [np.mean(a) for a in ndcg])

                output_str = "Epoch %d [%.1f s]\tloss=%.4f, " \
                             "map=%.4f, %s, %s, %s[%.1f s]" % (epoch_num, t2 - t1,
                                                               epoch_loss,
                                                               mean_aps, str_precs, str_recalls, str_ndcgs,
                                                               time() - t2)
                print(output_str)
            else:
                output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (
                    epoch_num, t2 - t1, epoch_loss, time() - t2)
                print(output_str)

        _save_checkpoint(
            {
                'epoch_num': epoch_num,
                'state_dict': self._net.state_dict(),
                'optimizer': self._optimizer.state_dict(),
            }, 'checkpoints/gowalla-caser-dim=%d.pth.tar' % self.model_args.d)

    def _generate_negative_samples(self, users, interactions, n):
        """
        Sample negative from a candidate set of each user. The
        candidate set of each user is defined by:
        {All Items} \ {Items Rated by User}

        Parameters
        ----------

        users: array of np.int64
            sequence users
        interactions: :class:`interactions.Interactions`
            training instances, used for generate candidates
        n: int
            total number of negatives to sample for each sequence
        """

        users_ = users.squeeze()
        negative_samples = np.zeros((users_.shape[0], n), np.int64)
        if not self._candidate:
            all_items = np.arange(interactions.num_items -
                                  1) + 1  # 0 for padding
            train = interactions.tocsr()
            for user, row in enumerate(train):
                self._candidate[user] = list(set(all_items) - set(row.indices))

        for i, u in enumerate(users_):
            for j in range(n):
                x = self._candidate[u]
                negative_samples[i, j] = x[np.random.randint(len(x))]

        return negative_samples

    def predict(self, user_id, item_ids=None, model=None):
        """
        Make predictions for evaluation: given a user id, it will
        first retrieve the test sequence associated with that user
        and compute the recommendation scores for items.

        Parameters
        ----------

        user_id: int
           users id for which prediction scores needed.
        item_ids: array, optional
            Array containing the item ids for which prediction scores
            are desired. If not supplied, predictions for all items
            will be computed.
        """

        if self.test_sequence is None:
            raise ValueError('Missing test sequences, cannot make predictions')
        if model is None:
            model = self._net

        # set model to evaluation model
        model.eval()
        with torch.no_grad():
            sequences_np = self.test_sequence.sequences[user_id, :]
            sequences_np = np.atleast_2d(sequences_np)

            if item_ids is None:
                item_ids = np.arange(self._num_items).reshape(-1, 1)

            sequences = torch.from_numpy(
                sequences_np.astype(np.int64).reshape(1, -1))
            item_ids = torch.from_numpy(item_ids.astype(np.int64))
            user_id = torch.from_numpy(np.array([[user_id]]).astype(np.int64))

            user, sequences, items = (user_id.to(self._device),
                                      sequences.to(self._device),
                                      item_ids.to(self._device))

            out = model(sequences, user, items, for_pred=True)

            return cpu(out.data).numpy().flatten()
Exemple #8
0
class DistilledRecommender(Recommender):
    """
    Contains attributes and methods that needed to train a sequential
    recommendation model with ranking distillation[1]. Models are trained
    by many tuples of (users, sequences, targets, negatives) and negatives
    are from negative sampling: for any known tuple of (user, sequence, targets),
    one or more items are randomly sampled to act as negatives.

    [1] Ranking Distillation: Learning Compact Ranking Models With High
        Performance for Recommender System, Jiaxi Tang and Ke Wang , KDD '18

    Parameters
    ----------

    n_iter: int,
        Number of iterations to run.
    batch_size: int,
        Minibatch size.
    l2: float,
        L2 loss penalty, also known as the 'lambda' of l2 regularization.
    neg_samples: int,
        Number of negative samples to generate for each targets.
    learning_rate: float,
        Initial learning rate.
    use_cuda: boolean,
        Run the model on a GPU or CPU.
    teacher_model_path: string,
        Path to teacher's model checkpoint.
    teacher_topk_path: string,
        Path to teacher's top-K ranking cache for each training instance.
    lamda: float
        Hyperparameter for tuning the sharpness of position importance weight.
    mu: float
        Hyperparameter for tuning the sharpness of ranking discrepancy weight.
    num_dynamic_samples: int
        Number of samples used for estimating student's rank.
    dynamic_start_epoch: int
        Number of iteration to start using hybrid of two different weights.
    K: int
        Length of teacher's exemplary ranking.
    teach_alpha: float:
        Weight for balancing ranking loss and distillation loss.
    student_model_args: args,
        Student model related arguments, like latent dimensions.
    teacher_model_args: args,
        Teacher model related arguments, like latent dimensions.
    """
    def __init__(self,
                 n_iter=None,
                 batch_size=None,
                 l2=None,
                 neg_samples=None,
                 learning_rate=None,
                 use_cuda=False,
                 teacher_model_path=None,
                 teacher_topk_path=None,
                 lamda=None,
                 mu=None,
                 num_dynamic_samples=None,
                 dynamic_start_epoch=None,
                 K=None,
                 teach_alpha=None,
                 student_model_args=None,
                 teacher_model_args=None):

        # data related
        self.L = None
        self.T = None

        # model related
        self._num_items = None
        self._num_users = None
        self._teacher_net = None  # teacher model
        self._student_net = None  # student model
        self._student_model_args = student_model_args
        self._teacher_model_args = teacher_model_args

        # learning related
        self._batch_size = batch_size
        self._n_iter = n_iter
        self._learning_rate = learning_rate
        self._l2 = l2
        self._neg_samples = neg_samples
        self._device = torch.device("cuda" if use_cuda else "cpu")

        # ranking distillation related
        self._teach_alpha = teach_alpha
        self._lambda = lamda
        self._mu = mu
        self._num_dynamic_samples = num_dynamic_samples
        self._dynamic_start_epoch = dynamic_start_epoch
        self._K = K
        self._teacher_model_path = teacher_model_path
        self._teacher_topk_path = teacher_topk_path
        self._weight_renormalize = False

        # rank evaluation related
        self.test_sequence = None
        self._candidate = dict()

    @property
    def _teacher_initialized(self):
        return self._teacher_net is not None

    def _initialize_teacher(self, interactions):
        # initialize teacher model
        self._num_items = interactions.num_items
        self._num_users = interactions.num_users

        self._teacher_net = Caser(self._num_users, self._num_items,
                                  self._teacher_model_args)
        # load teacher model
        if os.path.isfile(self._teacher_model_path):
            output_str = ("loading teacher model from %s" %
                          self._teacher_model_path)
            print(output_str)

            checkpoint = torch.load(self._teacher_model_path)
            self._teacher_net.load_state_dict(checkpoint['state_dict'])
            output_str = "loaded model %s (epoch %d)" % (
                self._teacher_model_path, checkpoint['epoch_num'])
            print(output_str)
        else:
            output_str = "no model found at %s" % self._teacher_model_path
            print output_str

        # set teacher model to evaluation mode
        self._teacher_net.eval()

    @property
    def _student_initialized(self):
        return self._student_net is not None

    def _initialize_student(self, interactions):
        self._num_items = interactions.num_items
        self._num_users = interactions.num_users

        self.test_sequence = interactions.test_sequences

        self._student_net = Caser(self._num_users, self._num_items,
                                  self._student_model_args)

        self._optimizer = optim.Adam(self._student_net.parameters(),
                                     weight_decay=self._l2,
                                     lr=self._learning_rate)

    def fit(self, train, test, verbose=False):
        """
        The general training loop to fit the model

        Parameters
        ----------

        train: :class:`interactions.Interactions`
            training instances, also contains test sequences
        test: :class:`interactions.Interactions`
            only contains targets for test sequences
        verbose: bool, optional
            print the logs
        """

        # convert sequences, targets and users to numpy arrays
        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids.reshape(-1, 1)

        self.L, self.T = train.sequences.L, train.sequences.T

        n_train = sequences_np.shape[0]

        output_str = 'total training instances: %d' % n_train
        print(output_str)

        if not self._teacher_initialized:
            self._initialize_teacher(train)
        if not self._student_initialized:
            self._initialize_student(train)

        # here we compute teacher top-K ranking for each training instance in advance for faster training speed
        # while we have to compute the top-K ranking on the fly if it is too large to keep in memory
        if os.path.isfile(self._teacher_topk_path):
            print('found teacher topk file, loading..')
            teacher_ranking = np.load(self._teacher_topk_path)
        else:
            print('teacher topk file not found, generating.. ')
            teacher_ranking = self._get_teacher_topk(sequences_np,
                                                     users_np,
                                                     targets_np,
                                                     k=self._K)

        # initialize static weight (position importance weight)
        weight_static = np.array(range(1, self._K + 1), dtype=np.float32)
        weight_static = np.exp(-weight_static / self._lambda)
        weight_static = weight_static / np.sum(weight_static)

        weight_static = torch.from_numpy(weight_static).to(self._device)
        weight_static = weight_static.unsqueeze(0)

        # initialize dynamic weight (ranking discrepancy weight)
        weight_dynamic = None

        # count number of parameters
        print("Number of params in teacher model: %d" %
              compute_model_size(self._teacher_net))
        print("Number of params in student model: %d" %
              compute_model_size(self._student_net))

        indices = np.arange(n_train)
        start_epoch = 1

        for epoch_num in range(start_epoch, self._n_iter + 1):

            t1 = time()
            # set teacher model to evaluation mode and move it to the corresponding devices
            self._teacher_net.eval()
            self._teacher_net = self._teacher_net.to(self._device)
            # set student model to training mode and move it to the corresponding devices
            self._student_net.train()
            self._student_net = self._student_net.to(self._device)

            (users_np, sequences_np,
             targets_np), shuffle_indices = shuffle(users_np,
                                                    sequences_np,
                                                    targets_np,
                                                    indices=True)

            indices = indices[
                shuffle_indices]  # keep indices for retrieval teacher's top-K ranking from cache

            negatives_np = self._generate_negative_samples(users_np,
                                                           train,
                                                           n=self._neg_samples)

            dynamic_samples_np = self._generate_negative_samples(
                users_np, train, n=self._num_dynamic_samples)

            # convert numpy arrays to PyTorch tensors and move it to the corresponding devices
            users, sequences, targets, negatives = (
                torch.from_numpy(users_np).long(),
                torch.from_numpy(sequences_np).long(),
                torch.from_numpy(targets_np).long(),
                torch.from_numpy(negatives_np).long())

            users, sequences, targets, negatives = (users.to(self._device),
                                                    sequences.to(self._device),
                                                    targets.to(self._device),
                                                    negatives.to(self._device))

            dynamic_samples = torch.from_numpy(dynamic_samples_np).long().to(
                self._device)

            epoch_loss = 0.0
            epoch_regular_loss = 0.0

            for (minibatch_num,
                 (batch_indices, batch_users, batch_sequences, batch_targets,
                  batch_negatives, batch_dynamics)) in enumerate(
                      minibatch(indices,
                                users,
                                sequences,
                                targets,
                                negatives,
                                dynamic_samples,
                                batch_size=self._batch_size)):

                # retrieval teacher top-K ranking given indices
                batch_candidates = torch.from_numpy(
                    teacher_ranking[batch_indices, :]).long().to(self._device)
                # concatenate all variables to get predictions in one run
                items_to_predict = torch.cat(
                    (batch_targets, batch_negatives, batch_candidates,
                     batch_dynamics), 1)

                items_prediction = self._student_net(batch_sequences,
                                                     batch_users,
                                                     items_to_predict)

                (targets_prediction, negatives_prediction,
                 candidates_prediction,
                 dynamics_prediction) = torch.split(items_prediction, [
                     batch_targets.size(1),
                     batch_negatives.size(1),
                     batch_candidates.size(1),
                     batch_dynamics.size(1)
                 ],
                                                    dim=1)

                self._optimizer.zero_grad()

                if epoch_num > self._dynamic_start_epoch:
                    # compute dynamic weight
                    dynamic_weights = list()
                    for col in range(self._K):
                        col_prediction = candidates_prediction[:,
                                                               col].unsqueeze(
                                                                   1)

                        num_smaller_than = torch.sum(
                            col_prediction < dynamics_prediction,
                            dim=1).float()
                        relative_rank = num_smaller_than / self._num_dynamic_samples
                        predicted_rank = torch.floor(self._num_items *
                                                     relative_rank)

                        dynamic_weight = torch.tanh(self._mu *
                                                    (predicted_rank - col))
                        dynamic_weight = torch.clamp(dynamic_weight, min=0.0)

                        dynamic_weights.append(dynamic_weight)
                    weight_dynamic = torch.stack(dynamic_weights, 1)

                    # hybrid two weights
                    weight = weight_dynamic * weight_static
                    if self._weight_renormalize:
                        weight = F.normalize(weight, p=1, dim=1)
                else:
                    weight = weight_static

                # detach the weight to stop the gradient flow to the weight
                weight = weight.detach()

                loss, regular_loss = weighted_sigmoid_log_loss(
                    targets_prediction, negatives_prediction,
                    candidates_prediction, weight, self._teach_alpha)

                epoch_loss += loss.item()
                epoch_regular_loss += regular_loss.item()

                loss.backward()

                # assert False
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1
            epoch_regular_loss /= minibatch_num + 1

            t2 = time()

            if verbose and epoch_num % 10 == 0:
                precision, recall, ndcg, mean_aps = evaluate_ranking(
                    self, test, train, k=[3, 5, 10])

                str_precs = "precisions=%.4f,%.4f,%.4f" % tuple(
                    [np.mean(a) for a in precision])
                str_recalls = "recalls=%.4f,%.4f,%.4f" % tuple(
                    [np.mean(a) for a in recall])
                str_ndcgs = "ndcgs=%.4f,%.4f,%.4f" % tuple(
                    [np.mean(a) for a in ndcg])

                output_str = "Epoch %d [%.1f s]\tloss=%.4f, regular_loss=%.4f, " \
                             "map=%.4f, %s, %s, %s[%.1f s]" % (epoch_num, t2 - t1,
                                                               epoch_loss, epoch_regular_loss,
                                                               mean_aps, str_precs, str_recalls, str_ndcgs,
                                                               time() - t2)
                print(output_str)
            else:
                output_str = "Epoch %d [%.1f s]\tloss=%.4f, regular_loss=%.4f[%.1f s]" % (
                    epoch_num, t2 - t1, epoch_loss, epoch_regular_loss,
                    time() - t2)
                print(output_str)

    def _get_teacher_topk(self, sequences, users, targets, k):
        """
        Pre-compute and cache teacher's top-K ranking for each training instance.
        By doing this we can make training with distillation much faster.

        Parameters
        ----------

        sequences: array of np.int64
            sequencces of items
        users: array of np.int64
            users associated with each sequence
        targets: array of np.int64
            target item that user interact with given the sequence
        k: int
            length of teacher's exemplary ranking
        """
        with_targets = False

        n_train = sequences.shape[0]
        indices = np.arange(n_train)

        users, sequences = torch.from_numpy(users).long(), torch.from_numpy(
            sequences).long()

        # teacher topk results
        teacher_topk = np.zeros((n_train, k), dtype=np.int64)

        for (batch_indices, batch_users, batch_sequences,
             batch_targets) in minibatch(indices,
                                         users,
                                         sequences,
                                         targets,
                                         batch_size=16):

            cur_batch_size = batch_users.shape[0]
            all_items = torch.arange(start=0, end=self._num_items).repeat(
                cur_batch_size, 1).long()

            teacher_prediction = self._teacher_net(batch_sequences,
                                                   batch_users,
                                                   all_items).detach()

            _, tops = teacher_prediction.topk(
                k * 2, dim=1)  # return the topk by column
            tops = tops.cpu().numpy()

            new_tops = np.concatenate((batch_targets, tops), axis=1)
            topks = np.zeros((cur_batch_size, k), dtype=np.int64)

            for i, row in enumerate(new_tops):
                _, idx = np.unique(row, return_index=True)
                # whether teacher's top-k ranking consider target items
                if with_targets:
                    topk = row[np.sort(idx)][:k]
                else:
                    topk = row[np.sort(idx)][self.T:k + self.T]
                topks[i, :] = topk
            teacher_topk[batch_indices, :] = topks
        np.save(
            'gowalla-teacher-dim=%d-top=%d.npy' %
            (self._teacher_model_args.d, k), teacher_topk)
        return teacher_topk

    def predict(self, user_id, item_ids=None, model=None):
        return super(DistilledRecommender,
                     self).predict(user_id, item_ids, model=self._student_net)
class Recommender(object):
    """
    Contains attributes and methods that needed to train a sequential
    recommendation model. Models are trained by many tuples of
    (users, sequences, targets, negatives) and negatives are from negative
    sampling: for any known tuple of (user, sequence, targets), one or more
    items are randomly sampled to act as negatives.


    Parameters
    ----------

    args: args,
        Model-related arguments, like latent dimensions.
    """
    def __init__(self, args=None):
        # model related
        self._num_items = None
        self._num_users = None
        self._net = None
        self.args = args

        # learning related
        self._batch_size = self.args.batch_size
        self._n_iter = self.args.n_iter
        self._neg_samples = self.args.neg_samples

        # rank evaluation related
        self.test_sequence = None
        self._candidate = dict()

    @property
    def _initialized(self):
        return self._net is not None

    def _initialize(self, interactions):
        self._num_items = interactions.num_items
        self._num_users = interactions.num_users

        self.test_sequence = interactions.test_sequences

        self._net = Caser(self._num_users,
                          self._num_items,
                          self.args)

        self._net.build_model()
        
        self.sess = tf.Session()
        init = tf.global_variables_initializer()
        self.sess.run(init) 
        

    def fit(self, train, test, verbose=False):
        """
        The general training loop to fit the model

        Parameters
        ----------

        train: :class:`spotlight.interactions.Interactions`
            training instances, also contains test sequences
        test: :class:`spotlight.interactions.Interactions`
            only contains targets for test sequences
        verbose: bool, optional
            print the logs
        """

        # convert to sequences, targets and users
        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids.reshape(-1, 1)

        L, T = train.sequences.L, train.sequences.T

        n_train = sequences_np.shape[0]

        output_str = 'total training instances: %d' % n_train
        print(output_str)

        if not self._initialized:
            self._initialize(train)

            
        start_epoch = 0
        for epoch_num in range(start_epoch, self._n_iter):

            t1 = time()
            users_np, sequences_np, targets_np = shuffle(users_np,
                                                         sequences_np,
                                                         targets_np)

            negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples)

            epoch_loss = 0.0
            for (minibatch_num,
                 (batch_users,
                  batch_sequences,
                  batch_targets,
                  batch_negatives)) in enumerate(minibatch(users_np,
                                                           sequences_np,
                                                           targets_np,
                                                           negatives_np,
                                             batch_size=self._batch_size)):
                
                
                items_to_predict = np.concatenate((batch_targets, batch_negatives), 1)
                loss = self._net.train(self.sess, 
                                             batch_sequences,
                                             batch_users,
                                             items_to_predict)
                epoch_loss += loss
            epoch_loss /= minibatch_num + 1

            t2 = time()
            if verbose and (epoch_num + 1) % 10 == 0:
                precision, recall, mean_aps = evaluate_ranking(self, test, train, k=[1, 5, 10])
                output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \
                             "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \
                             "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f, [%.1f s]" % (epoch_num + 1,
                                                                                         t2 - t1,
                                                                                         epoch_loss,
                                                                                         mean_aps,
                                                                                         np.mean(precision[0]),
                                                                                         np.mean(precision[1]),
                                                                                         np.mean(precision[2]),
                                                                                         np.mean(recall[0]),
                                                                                         np.mean(recall[1]),
                                                                                         np.mean(recall[2]),
                                                                                         time() - t2)
                print(output_str)
            else:
                output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1,
                                                                        t2 - t1,
                                                                        epoch_loss,
                                                                        time() - t2)
                print(output_str)

                
    def _generate_negative_samples(self, users, interactions, n):
        """
        Sample negative from a candidate set of each user. The
        candidate set of each user is defined by:
        {All Items} \ {Items Rated by User}

        Parameters
        ----------

        users: array of np.int64
            sequence users
        interactions: :class:`spotlight.interactions.Interactions`
            training instances, used for generate candidates
        n: int
            total number of negatives to sample for each sequence
        """

        users_ = users.squeeze()
        negative_samples = np.zeros((users_.shape[0], n), np.int64)
        if not self._candidate:
            all_items = np.arange(interactions.num_items - 1) + 1  # 0 for padding
            train = interactions.tocsr()
            for user, row in enumerate(train):
                self._candidate[user] = list(set(all_items) - set(row.indices))

        for i, u in enumerate(users_):
            for j in range(n):
                x = self._candidate[u]
                negative_samples[i, j] = x[
                    np.random.randint(len(x))]

        return negative_samples

    
    def predict(self, user_id, item_ids=None):
        """
        Make predictions for evaluation: given a user id, it will
        first retrieve the test sequence associated with that user
        and compute the recommendation scores for items.

        Parameters
        ----------

        user_id: int
           users id for which prediction scores needed.
        item_ids: array, optional
            Array containing the item ids for which prediction scores
            are desired. If not supplied, predictions for all items
            will be computed.
        """

        sequences_np = self.test_sequence.sequences[user_id, :]
        sequences_np = np.atleast_2d(sequences_np)

        if item_ids is None:
            item_ids = np.arange(self._num_items).reshape(-1, 1)

        out = self._net.predict(self.sess,
                            sequences_np,
                            user_id,
                            item_ids)
        
        return out
class Recommender(object):
    """
    args: args,Model-related arguments, like latent dimensions.
    """
    def __init__(self, args=None):
        # model related
        self._num_items = None
        self._num_users = None
        self._net = None
        self.args = args

        # learning related
        self._batch_size = self.args.batch_size
        self._n_iter = self.args.n_iter
        self._neg_samples = self.args.neg_samples

        # rank evaluation related
        self.test_sequence = None
        self._candidate = dict()
        self._top_k = args.top_k

    @property
    def _initialized(self):
        return self._net is not None

    def _initialize(self, interactions):
        self._num_items = interactions.num_items
        self._num_users = interactions.num_users

        self.test_sequence = interactions.test_sequences

        self._net = Caser(self._num_users, self._num_items, self.args)
        self.sess = tf.Session()
        init = tf.global_variables_initializer()
        self.sess.run(init)

    def fit(self, train, val, verbose=False):
        """
        The general training loop to fit the model
        Parameters
        ----------
        train: :class:`spotlight.interactions.Interactions`
            training instances, also contains test sequences
        val: :class:`spotlight.interactions.Interactions`
            only contains targets for test sequences
        verbose: bool, optional
            print the logs
        """

        # convert to sequences, targets and users
        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids.reshape(-1, 1)

        L, T = train.sequences.L, train.sequences.T

        n_train = sequences_np.shape[0]

        print('total training instances: %d' % n_train)

        if not self._initialized:
            self._initialize(train)

        start_epoch = 0
        for epoch_num in range(start_epoch, self._n_iter):

            t1 = time()
            users_np, sequences_np, targets_np = shuffle(
                users_np, sequences_np, targets_np)

            negatives_np = self._generate_negative_samples(users_np,
                                                           train,
                                                           n=self._neg_samples)

            step_loss = 0.0
            best_HR = 0.0
            for (minibatch_num,(batch_users,batch_sequences,batch_targets,batch_negatives)) \
                    in enumerate(minibatch(users_np,sequences_np,targets_np,negatives_np,batch_size=self._batch_size)):
                items_to_predict = np.concatenate(
                    (batch_targets, batch_negatives), 1)
                loss, global_step = self._net.train(self.sess, batch_sequences,
                                                    batch_users,
                                                    items_to_predict)
                step_loss += loss
                if global_step % 1000 == 0:
                    print('epoch-{}\tstep-{}\tloss-{:.6f}'.format(
                        epoch_num + 1, global_step, step_loss / global_step))
                if verbose and global_step % 10000 == 0:
                    t2 = time()
                    HR, NDCG, MRR = self.predict(val)
                    output_str = "Epoch %d step %d [%.1f s]\tloss=%.6f,HR@20=%.6f, " \
                                 "NDCG@20=%.6f, MRR@20=%.6f,[%.1f s] " % (epoch_num + 1,global_step,
                                                                          t2 - t1, step_loss/global_step,
                                                                          HR, NDCG, MRR, time() - t2)
                    print(output_str)
                    if HR > best_HR:
                        best_HR = HR
                        ckpt_path = self.args.check_dir + 'model.ckpt'
                        self._net.saver.save(self.sess,
                                             ckpt_path,
                                             global_step=global_step)
                        print("model saved to {}".format(ckpt_path))

    def _generate_negative_samples(self, users, interactions, n):
        """
        Sample negative from a candidate set of each user. The
        candidate set of each user is defined by:
        {All Items} \ {Items Rated by User}

        Parameters
        ----------

        users: array of np.int64
            sequence users
        interactions: :class:`spotlight.interactions.Interactions`
            training instances, used for generate candidates
        n: int
            total number of negatives to sample for each sequence
        """

        users_ = users.squeeze()
        negative_samples = np.zeros((users_.shape[0], n), np.int64)
        if not self._candidate:
            all_items = np.arange(interactions.num_items -
                                  1) + 1  # 0 for padding
            train = interactions.tocsr()
            for user, row in enumerate(train):
                self._candidate[user] = list(set(all_items) - set(row.indices))

        for i, u in enumerate(users_):
            for j in range(n):
                x = self._candidate[u]
                negative_samples[i, j] = x[np.random.randint(len(x))]

        return negative_samples

    def predict(self, val, item_ids=None):
        """
        Make predictions for evaluation: given a user id, it will
        first retrieve the test sequence associated with that user
        and compute the recommendation scores for items.

        Parameters
        ----------

        user_id: int
           users id for which prediction scores needed.
        item_ids: array, optional
            Array containing the item ids for which prediction scores
            are desired. If not supplied, predictions for all items
            will be computed.
        """
        sequences_np = val.sequences.sequences
        targets_np = val.sequences.targets
        users_np = val.sequences.user_ids.reshape(-1, 1)
        n_val = sequences_np.shape[0]
        print('total validation instances: %d' % n_val)
        NDCG, HR, MRR = 0.0, 0.0, 0.0
        item_ids = np.zeros((self._batch_size, self._num_items))
        for i in range(self._batch_size):
            item_ids[i] = np.arange(self._num_items)
        valid_batches = n_val // self._batch_size
        for i in range(valid_batches):
            sequences = sequences_np[i * self._batch_size:(i + 1) *
                                     self._batch_size]
            targets = targets_np[i * self._batch_size:(i + 1) *
                                 self._batch_size]
            users = users_np[i * self._batch_size:(i + 1) * self._batch_size]
            _, top_k_index = self._net.predict(self.sess, sequences, users,
                                               item_ids)
            hr, ndcg, mrr = 0.0, 0.0, 0.0
            for i in range(self._batch_size):
                cur_user = top_k_index[i]
                for j in range(self._top_k):
                    if targets[i][0] == cur_user[j]:
                        hr += 1
                        mrr += 1 / (1 + j)
                        dcg = 1 / np.log2(1 + 1 + j)
                        idcg = 1 / np.log2(1 + 1)
                        ndcg += dcg / idcg
                        break
            HR += hr / self._batch_size
            NDCG += ndcg / self._batch_size
            MRR += mrr / self._batch_size
        return HR / valid_batches, NDCG / valid_batches, MRR / valid_batches
Exemple #11
0
class Recommender(object):
    """
    Contains attributes and methods that needed to train a sequential
    recommendation model. Models are trained by many tuples of
    (users, sequences, targets, negatives) and negatives are from negative
    sampling: for any known tuple of (user, sequence, targets), one or more
    items are randomly sampled to act as negatives.


    Parameters
    ----------

    n_iter: int,
        Number of iterations to run.
    batch_size: int,
        Minibatch size.
    l2: float,
        L2 loss penalty, also known as the 'lambda' of l2 regularization.
    neg_samples: int,
        Number of negative samples to generate for each targets.
        If targets=3 and neg_samples=3, then it will sample 9 negatives.
    learning_rate: float,
        Initial learning rate.
    use_cuda: boolean,
        Run the model on a GPU or CPU.
    model_args: args,
        Model-related arguments, like latent dimensions.
    """

    def __init__(self,
                 n_iter=None,
                 batch_size=None,
                 l2=None,
                 neg_samples=None,
                 learning_rate=None,
                 use_cuda=False,
                 model_args=None):

        # model related
        self._num_items = None
        self._num_users = None
        self._net = None
        self.model_args = model_args

        # learning related
        self._batch_size = batch_size
        self._n_iter = n_iter
        self._learning_rate = learning_rate
        self._l2 = l2
        self._neg_samples = neg_samples
        self._device = torch.device("cuda" if use_cuda else "cpu")

        # rank evaluation related
        self.test_sequence = None
        self._candidate = dict()

    @property
    def _initialized(self):
        return self._net is not None

    def _initialize(self, interactions):
        self._num_items = interactions.num_items
        self._num_users = interactions.num_users

        self.test_sequence = interactions.test_sequences

        self._net = Caser(self._num_users,
                          self._num_items,
                          self.model_args).to(self._device)

        self._optimizer = optim.Adam(self._net.parameters(),
                                     weight_decay=self._l2,
                                     lr=self._learning_rate)

    def fit(self, train, test, verbose=False):
        """
        The general training loop to fit the model

        Parameters
        ----------

        train: :class:`spotlight.interactions.Interactions`
            training instances, also contains test sequences
        test: :class:`spotlight.interactions.Interactions`
            only contains targets for test sequences
        verbose: bool, optional
            print the logs
        """

        # convert to sequences, targets and users
        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids.reshape(-1, 1)

        L, T = train.sequences.L, train.sequences.T

        n_train = sequences_np.shape[0]

        output_str = 'total training instances: %d' % n_train
        print(output_str)

        if not self._initialized:
            self._initialize(train)

        start_epoch = 0

        for epoch_num in range(start_epoch, self._n_iter):

            t1 = time()

            # set model to training mode
            self._net.train()

            users_np, sequences_np, targets_np = shuffle(users_np,
                                                         sequences_np,
                                                         targets_np)

            negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples)

            # convert numpy arrays to PyTorch tensors and move it to the corresponding devices
            users, sequences, targets, negatives = (torch.from_numpy(users_np).long(),
                                                    torch.from_numpy(sequences_np).long(),
                                                    torch.from_numpy(targets_np).long(),
                                                    torch.from_numpy(negatives_np).long())

            users, sequences, targets, negatives = (users.to(self._device),
                                                    sequences.to(self._device),
                                                    targets.to(self._device),
                                                    negatives.to(self._device))

            epoch_loss = 0.0

            for (minibatch_num,
                 (batch_users,
                  batch_sequences,
                  batch_targets,
                  batch_negatives)) in enumerate(minibatch(users,
                                                           sequences,
                                                           targets,
                                                           negatives,
                                                           batch_size=self._batch_size)):
                items_to_predict = torch.cat((batch_targets, batch_negatives), 1)
                items_prediction = self._net(batch_sequences,
                                             batch_users,
                                             items_to_predict)

                (targets_prediction,
                 negatives_prediction) = torch.split(items_prediction,
                                                     [batch_targets.size(1),
                                                      batch_negatives.size(1)], dim=1)

                self._optimizer.zero_grad()
                # compute the binary cross-entropy loss
                positive_loss = -torch.mean(
                    torch.log(torch.sigmoid(targets_prediction)))
                negative_loss = -torch.mean(
                    torch.log(1 - torch.sigmoid(negatives_prediction)))
                loss = positive_loss + negative_loss

                epoch_loss += loss.item()

                loss.backward()
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1

            t2 = time()
            if verbose and (epoch_num == 0 or (epoch_num + 1) % 10 == 0):
                output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1,
                                                                        t2 - t1,
                                                                        epoch_loss,
                                                                        time() - t2)
                print(output_str)
                hits, ndcg = evaluate_hits_ndcg(self, train, test)
                print(f'hits@10: {hits}, ndcg@10: {ndcg}')
                
                """
                precision, recall, mean_aps, hits = evaluate_ranking(self, test, train, k=[1, 5, 10])
                output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \
                             "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \
                             "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f, [%.1f s]" % (epoch_num + 1,
                                                                                         t2 - t1,
                                                                                         epoch_loss,
                                                                                         mean_aps,
                                                                                         np.mean(precision[0]),
                                                                                         np.mean(precision[1]),
                                                                                         np.mean(precision[2]),
                                                                                         np.mean(recall[0]),
                                                                                         np.mean(recall[1]),
                                                                                         np.mean(recall[2]),
                                                                                         time() - t2)
                print(output_str)
                """
            else:
                output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1,
                                                                        t2 - t1,
                                                                        epoch_loss,
                                                                        time() - t2)
                print(output_str)
            if (epoch_num + 1) % 5 == 0:
                self._candidate = dict()
            

    def _generate_negative_samples(self, users, interactions, n):
        """
        Sample negative from a candidate set of each user. The
        candidate set of each user is defined by:
        {All Items} \ {Items Rated by User}

        Parameters
        ----------

        users: array of np.int64
            sequence users
        interactions: :class:`spotlight.interactions.Interactions`
            training instances, used for generate candidates
        n: int
            total number of negatives to sample for each sequence
        """

        users_ = users.squeeze()
        negative_samples = np.zeros((users_.shape[0], n), np.int64)
        
        sample_limit = 200
        
        if not self._candidate:
            all_items = np.arange(interactions.num_items - 1) + 1  # 0 for padding
            train = interactions.tocsr()
            for user, row in enumerate(train):
                self._candidate[user] = []
                
                while len(self._candidate[user]) < n:
                    cur_items = all_items
                    if len(cur_items) > 5000:
                        cur_items = np.random.choice(all_items, size=sample_limit, replace=False)
                
                    self._candidate[user] = list(set(cur_items) - set(row.indices))

        for i, u in enumerate(users_):
            for j in range(n):
                x = self._candidate[u]
                negative_samples[i, j] = x[
                    np.random.randint(len(x))]

        return negative_samples

    def predict(self, user_id, item_ids=None):
        """
        Make predictions for evaluation: given a user id, it will
        first retrieve the test sequence associated with that user
        and compute the recommendation scores for items.

        Parameters
        ----------

        user_id: int
           users id for which prediction scores needed.
        item_ids: array, optional
            Array containing the item ids for which prediction scores
            are desired. If not supplied, predictions for all items
            will be computed.
        """

        if self.test_sequence is None:
            raise ValueError('Missing test sequences, cannot make predictions')

        # set model to evaluation model
        self._net.eval()
        with torch.no_grad():
            sequences_np = self.test_sequence.sequences[user_id, :]
            sequences_np = np.atleast_2d(sequences_np)

            if item_ids is None:
                item_ids = np.arange(self._num_items).reshape(-1, 1)

            sequences = torch.from_numpy(sequences_np).long()
            item_ids = torch.from_numpy(item_ids).long()
            user_id = torch.from_numpy(np.array([[user_id]])).long()

            user, sequences, items = (user_id.to(self._device),
                                      sequences.to(self._device),
                                      item_ids.to(self._device))

            out = self._net(sequences,
                            user,
                            items,
                            for_pred=True)

        return out.cpu().numpy().flatten()