Ejemplo n.º 1
0
def test(args):
    data = Interactions(args.test_root)
    data.to_sequence(args.L, args.T)
    sequences_np = data.sequences.sequences
    targets_np = data.sequences.targets
    users_np = data.sequences.user_ids.reshape(-1, 1)
    n_test = sequences_np.shape[0]
    print('total test instances: %d' % n_test)
    num_users = data.num_users
    num_items = data.num_items
    NDCG, HR, MRR = 0.0, 0.0, 0.0
    item_ids = np.zeros((args.batch_size,num_items))
    for i in range(args.batch_size):
        item_ids[i] = np.arange(num_items)
    test_batches = n_test // args.batch_size

    model=Caser(num_users,num_items,args)
    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    with tf.Session(config=gpu_config) as sess:
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(args.check_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print('Restore model from {} successfully!'.format(args.check_dir))
        else:
            print('Restore model from {} failed!'.format(args.check_dir))
            return
        for i in range(test_batches):
            sequences = sequences_np[i * args.batch_size: (i + 1) * args.batch_size]
            targets = targets_np[i * args.batch_size: (i + 1) * args.batch_size]
            users = users_np[i * args.batch_size: (i + 1) * args.batch_size]
            _, top_k_index = model.predict(sess, sequences, users, item_ids)
            hr, ndcg, mrr = 0.0, 0.0, 0.0
            for i in range(args.batch_size):
                cur_user = top_k_index[i]
                for j in range(args.top_k):
                    if targets[i][0] == cur_user[j]:
                        hr += 1
                        mrr += 1 / (1 + j)
                        dcg = 1 / np.log2(1 + 1 + j)
                        idcg = 1 / np.log2(1 + 1)
                        ndcg += dcg / idcg
                        break
            HR += hr / args.batch_size
            NDCG += ndcg / args.batch_size
            MRR += mrr / args.batch_size
    return HR / test_batches, NDCG / test_batches, MRR / test_batches
Ejemplo n.º 2
0
class Recommender(object):
    """
    Contains attributes and methods that needed to train a sequential
    recommendation model. Models are trained by many tuples of
    (users, sequences, targets, negatives) and negatives are from negative
    sampling: for any known tuple of (user, sequence, targets), one or more
    items are randomly sampled to act as negatives.


    Parameters
    ----------

    args: args,
        Model-related arguments, like latent dimensions.
    """
    def __init__(self, args=None):
        # model related
        self._num_items = None
        self._num_users = None
        self._net = None
        self.args = args

        # learning related
        self._batch_size = self.args.batch_size
        self._n_iter = self.args.n_iter
        self._neg_samples = self.args.neg_samples

        # rank evaluation related
        self.test_sequence = None
        self._candidate = dict()

    @property
    def _initialized(self):
        return self._net is not None

    def _initialize(self, interactions):
        self._num_items = interactions.num_items
        self._num_users = interactions.num_users

        self.test_sequence = interactions.test_sequences

        self._net = Caser(self._num_users,
                          self._num_items,
                          self.args)

        self._net.build_model()
        
        self.sess = tf.Session()
        init = tf.global_variables_initializer()
        self.sess.run(init) 
        

    def fit(self, train, test, verbose=False):
        """
        The general training loop to fit the model

        Parameters
        ----------

        train: :class:`spotlight.interactions.Interactions`
            training instances, also contains test sequences
        test: :class:`spotlight.interactions.Interactions`
            only contains targets for test sequences
        verbose: bool, optional
            print the logs
        """

        # convert to sequences, targets and users
        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids.reshape(-1, 1)

        L, T = train.sequences.L, train.sequences.T

        n_train = sequences_np.shape[0]

        output_str = 'total training instances: %d' % n_train
        print(output_str)

        if not self._initialized:
            self._initialize(train)

            
        start_epoch = 0
        for epoch_num in range(start_epoch, self._n_iter):

            t1 = time()
            users_np, sequences_np, targets_np = shuffle(users_np,
                                                         sequences_np,
                                                         targets_np)

            negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples)

            epoch_loss = 0.0
            for (minibatch_num,
                 (batch_users,
                  batch_sequences,
                  batch_targets,
                  batch_negatives)) in enumerate(minibatch(users_np,
                                                           sequences_np,
                                                           targets_np,
                                                           negatives_np,
                                             batch_size=self._batch_size)):
                
                
                items_to_predict = np.concatenate((batch_targets, batch_negatives), 1)
                loss = self._net.train(self.sess, 
                                             batch_sequences,
                                             batch_users,
                                             items_to_predict)
                epoch_loss += loss
            epoch_loss /= minibatch_num + 1

            t2 = time()
            if verbose and (epoch_num + 1) % 10 == 0:
                precision, recall, mean_aps = evaluate_ranking(self, test, train, k=[1, 5, 10])
                output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \
                             "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \
                             "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f, [%.1f s]" % (epoch_num + 1,
                                                                                         t2 - t1,
                                                                                         epoch_loss,
                                                                                         mean_aps,
                                                                                         np.mean(precision[0]),
                                                                                         np.mean(precision[1]),
                                                                                         np.mean(precision[2]),
                                                                                         np.mean(recall[0]),
                                                                                         np.mean(recall[1]),
                                                                                         np.mean(recall[2]),
                                                                                         time() - t2)
                print(output_str)
            else:
                output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1,
                                                                        t2 - t1,
                                                                        epoch_loss,
                                                                        time() - t2)
                print(output_str)

                
    def _generate_negative_samples(self, users, interactions, n):
        """
        Sample negative from a candidate set of each user. The
        candidate set of each user is defined by:
        {All Items} \ {Items Rated by User}

        Parameters
        ----------

        users: array of np.int64
            sequence users
        interactions: :class:`spotlight.interactions.Interactions`
            training instances, used for generate candidates
        n: int
            total number of negatives to sample for each sequence
        """

        users_ = users.squeeze()
        negative_samples = np.zeros((users_.shape[0], n), np.int64)
        if not self._candidate:
            all_items = np.arange(interactions.num_items - 1) + 1  # 0 for padding
            train = interactions.tocsr()
            for user, row in enumerate(train):
                self._candidate[user] = list(set(all_items) - set(row.indices))

        for i, u in enumerate(users_):
            for j in range(n):
                x = self._candidate[u]
                negative_samples[i, j] = x[
                    np.random.randint(len(x))]

        return negative_samples

    
    def predict(self, user_id, item_ids=None):
        """
        Make predictions for evaluation: given a user id, it will
        first retrieve the test sequence associated with that user
        and compute the recommendation scores for items.

        Parameters
        ----------

        user_id: int
           users id for which prediction scores needed.
        item_ids: array, optional
            Array containing the item ids for which prediction scores
            are desired. If not supplied, predictions for all items
            will be computed.
        """

        sequences_np = self.test_sequence.sequences[user_id, :]
        sequences_np = np.atleast_2d(sequences_np)

        if item_ids is None:
            item_ids = np.arange(self._num_items).reshape(-1, 1)

        out = self._net.predict(self.sess,
                            sequences_np,
                            user_id,
                            item_ids)
        
        return out
class Recommender(object):
    """
    args: args,Model-related arguments, like latent dimensions.
    """
    def __init__(self, args=None):
        # model related
        self._num_items = None
        self._num_users = None
        self._net = None
        self.args = args

        # learning related
        self._batch_size = self.args.batch_size
        self._n_iter = self.args.n_iter
        self._neg_samples = self.args.neg_samples

        # rank evaluation related
        self.test_sequence = None
        self._candidate = dict()
        self._top_k = args.top_k

    @property
    def _initialized(self):
        return self._net is not None

    def _initialize(self, interactions):
        self._num_items = interactions.num_items
        self._num_users = interactions.num_users

        self.test_sequence = interactions.test_sequences

        self._net = Caser(self._num_users, self._num_items, self.args)
        self.sess = tf.Session()
        init = tf.global_variables_initializer()
        self.sess.run(init)

    def fit(self, train, val, verbose=False):
        """
        The general training loop to fit the model
        Parameters
        ----------
        train: :class:`spotlight.interactions.Interactions`
            training instances, also contains test sequences
        val: :class:`spotlight.interactions.Interactions`
            only contains targets for test sequences
        verbose: bool, optional
            print the logs
        """

        # convert to sequences, targets and users
        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids.reshape(-1, 1)

        L, T = train.sequences.L, train.sequences.T

        n_train = sequences_np.shape[0]

        print('total training instances: %d' % n_train)

        if not self._initialized:
            self._initialize(train)

        start_epoch = 0
        for epoch_num in range(start_epoch, self._n_iter):

            t1 = time()
            users_np, sequences_np, targets_np = shuffle(
                users_np, sequences_np, targets_np)

            negatives_np = self._generate_negative_samples(users_np,
                                                           train,
                                                           n=self._neg_samples)

            step_loss = 0.0
            best_HR = 0.0
            for (minibatch_num,(batch_users,batch_sequences,batch_targets,batch_negatives)) \
                    in enumerate(minibatch(users_np,sequences_np,targets_np,negatives_np,batch_size=self._batch_size)):
                items_to_predict = np.concatenate(
                    (batch_targets, batch_negatives), 1)
                loss, global_step = self._net.train(self.sess, batch_sequences,
                                                    batch_users,
                                                    items_to_predict)
                step_loss += loss
                if global_step % 1000 == 0:
                    print('epoch-{}\tstep-{}\tloss-{:.6f}'.format(
                        epoch_num + 1, global_step, step_loss / global_step))
                if verbose and global_step % 10000 == 0:
                    t2 = time()
                    HR, NDCG, MRR = self.predict(val)
                    output_str = "Epoch %d step %d [%.1f s]\tloss=%.6f,HR@20=%.6f, " \
                                 "NDCG@20=%.6f, MRR@20=%.6f,[%.1f s] " % (epoch_num + 1,global_step,
                                                                          t2 - t1, step_loss/global_step,
                                                                          HR, NDCG, MRR, time() - t2)
                    print(output_str)
                    if HR > best_HR:
                        best_HR = HR
                        ckpt_path = self.args.check_dir + 'model.ckpt'
                        self._net.saver.save(self.sess,
                                             ckpt_path,
                                             global_step=global_step)
                        print("model saved to {}".format(ckpt_path))

    def _generate_negative_samples(self, users, interactions, n):
        """
        Sample negative from a candidate set of each user. The
        candidate set of each user is defined by:
        {All Items} \ {Items Rated by User}

        Parameters
        ----------

        users: array of np.int64
            sequence users
        interactions: :class:`spotlight.interactions.Interactions`
            training instances, used for generate candidates
        n: int
            total number of negatives to sample for each sequence
        """

        users_ = users.squeeze()
        negative_samples = np.zeros((users_.shape[0], n), np.int64)
        if not self._candidate:
            all_items = np.arange(interactions.num_items -
                                  1) + 1  # 0 for padding
            train = interactions.tocsr()
            for user, row in enumerate(train):
                self._candidate[user] = list(set(all_items) - set(row.indices))

        for i, u in enumerate(users_):
            for j in range(n):
                x = self._candidate[u]
                negative_samples[i, j] = x[np.random.randint(len(x))]

        return negative_samples

    def predict(self, val, item_ids=None):
        """
        Make predictions for evaluation: given a user id, it will
        first retrieve the test sequence associated with that user
        and compute the recommendation scores for items.

        Parameters
        ----------

        user_id: int
           users id for which prediction scores needed.
        item_ids: array, optional
            Array containing the item ids for which prediction scores
            are desired. If not supplied, predictions for all items
            will be computed.
        """
        sequences_np = val.sequences.sequences
        targets_np = val.sequences.targets
        users_np = val.sequences.user_ids.reshape(-1, 1)
        n_val = sequences_np.shape[0]
        print('total validation instances: %d' % n_val)
        NDCG, HR, MRR = 0.0, 0.0, 0.0
        item_ids = np.zeros((self._batch_size, self._num_items))
        for i in range(self._batch_size):
            item_ids[i] = np.arange(self._num_items)
        valid_batches = n_val // self._batch_size
        for i in range(valid_batches):
            sequences = sequences_np[i * self._batch_size:(i + 1) *
                                     self._batch_size]
            targets = targets_np[i * self._batch_size:(i + 1) *
                                 self._batch_size]
            users = users_np[i * self._batch_size:(i + 1) * self._batch_size]
            _, top_k_index = self._net.predict(self.sess, sequences, users,
                                               item_ids)
            hr, ndcg, mrr = 0.0, 0.0, 0.0
            for i in range(self._batch_size):
                cur_user = top_k_index[i]
                for j in range(self._top_k):
                    if targets[i][0] == cur_user[j]:
                        hr += 1
                        mrr += 1 / (1 + j)
                        dcg = 1 / np.log2(1 + 1 + j)
                        idcg = 1 / np.log2(1 + 1)
                        ndcg += dcg / idcg
                        break
            HR += hr / self._batch_size
            NDCG += ndcg / self._batch_size
            MRR += mrr / self._batch_size
        return HR / valid_batches, NDCG / valid_batches, MRR / valid_batches