Beispiel #1
0
    def fit(self):
        # train
        valid_aps = 0
        for e in range(args.n_epochs):
            t1 = time()
            avg_loss = self.train_one_epoch()
            t2 = time()
            if e % 5 == 0 or e == self.args.n_epochs - 1:
                precision, recall, mean_aps = evaluate_ranking(self,
                                                               self.test,
                                                               self.train,
                                                               k=[1, 5, 10])
                precs = [np.mean(p) for p in precision]
                recalls = [np.mean(r) for r in recall]
                output_str = f"Epoch {e+1} [{t2-t1:.1f}s]\tloss={avg_loss:.4f}, map={mean_aps:.4f}, " \
                             f"prec@1={precs[0]:.4f}, prec@5={precs[1]:.4f}, prec@10={precs[2]:.4f}, " \
                             f"recall@1={recalls[0]:.4f}, recall@5={recalls[1]:.4f}, recall@10={recalls[2]:.4f}, [{time()-t2:.1f}s]"

                if mean_aps >= valid_aps:
                    mean_aps = valid_aps
                else:
                    break

        print(output_str)
        return {
            'epochs': e,
            'loss': avg_loss,
            'mAP': mean_aps,
            'prec1': precs[0],
            'prec5': precs[1],
            'prec10': precs[2],
            'recall1': recalls[0],
            'recall5': recalls[1],
            'recall10': recalls[2]
        }
Beispiel #2
0
    def fit(self, train, test, verbose=False):
        """
        The general training loop to fit the model

        Parameters
        ----------

        train: :class:`interactions.Interactions`
            training instances, also contains test sequences
        test: :class:`interactions.Interactions`
            only contains targets for test sequences
        verbose: bool, optional
            print the logs
        """

        # convert sequences, targets and users to numpy arrays
        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids.reshape(-1, 1)

        L, T = train.sequences.L, train.sequences.T

        n_train = sequences_np.shape[0]

        print('total training instances: %d' % n_train)

        if not self._initialized:
            self._initialize(train)

        start_epoch = 1
        if self.checkpoint:
            print("loading checkpoint from %s" % self.checkpoint)
            checkpoint = torch.load(self.checkpoint)
            start_epoch = checkpoint['epoch_num']
            self._net.load_state_dict(checkpoint['state_dict'])
            self._optimizer.load_state_dict(checkpoint['optimizer'])
            print("loaded checkpoint %s (epoch %d)" %
                  (self.checkpoint, start_epoch))

        # compute number of parameters
        print("Number of params: %d" % compute_model_size(self._net))

        for epoch_num in range(start_epoch, self._n_iter + 1):

            t1 = time()

            # set model to training model and move it to the corresponding devices
            self._net.train()
            self._net = self._net.to(self._device)

            users_np, sequences_np, targets_np = shuffle(
                users_np, sequences_np, targets_np)

            negatives_np = self._generate_negative_samples(users_np,
                                                           train,
                                                           n=self._neg_samples)

            # convert numpy arrays to PyTorch tensors and move it to the corresponding devices
            users, sequences, targets, negatives = (
                torch.from_numpy(users_np).long(),
                torch.from_numpy(sequences_np).long(),
                torch.from_numpy(targets_np).long(),
                torch.from_numpy(negatives_np).long())

            users, sequences, targets, negatives = (users.to(self._device),
                                                    sequences.to(self._device),
                                                    targets.to(self._device),
                                                    negatives.to(self._device))

            epoch_loss = 0.0

            for (minibatch_num, (batch_users, batch_sequences, batch_targets,
                                 batch_negatives)) in enumerate(
                                     minibatch(users,
                                               sequences,
                                               targets,
                                               negatives,
                                               batch_size=self._batch_size)):
                # concatenate all variables to get predictions in one run
                items_to_predict = torch.cat((batch_targets, batch_negatives),
                                             1)
                items_prediction = self._net(batch_sequences, batch_users,
                                             items_to_predict)

                (targets_prediction, negatives_prediction) = torch.split(
                    items_prediction,
                    [batch_targets.size(1),
                     batch_negatives.size(1)],
                    dim=1)

                self._optimizer.zero_grad()
                # compute the binary cross-entropy loss
                loss = sigmoid_log_loss(targets_prediction,
                                        negatives_prediction)

                epoch_loss += loss.item()

                loss.backward()
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1

            t2 = time()
            if verbose and epoch_num % 10 == 0:
                precision, recall, ndcg, mean_aps = evaluate_ranking(
                    self, test, train, k=[3, 5, 10])
                str_precs = "precisions=%.4f,%.4f,%.4f" % tuple(
                    [np.mean(a) for a in precision])
                str_recalls = "recalls=%.4f,%.4f,%.4f" % tuple(
                    [np.mean(a) for a in recall])
                str_ndcgs = "ndcgs=%.4f,%.4f,%.4f" % tuple(
                    [np.mean(a) for a in ndcg])

                output_str = "Epoch %d [%.1f s]\tloss=%.4f, " \
                             "map=%.4f, %s, %s, %s[%.1f s]" % (epoch_num, t2 - t1,
                                                               epoch_loss,
                                                               mean_aps, str_precs, str_recalls, str_ndcgs,
                                                               time() - t2)
                print(output_str)
            else:
                output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (
                    epoch_num, t2 - t1, epoch_loss, time() - t2)
                print(output_str)

        _save_checkpoint(
            {
                'epoch_num': epoch_num,
                'state_dict': self._net.state_dict(),
                'optimizer': self._optimizer.state_dict(),
            }, 'checkpoints/gowalla-caser-dim=%d.pth.tar' % self.model_args.d)
Beispiel #3
0
    def fit(self, train, test, verbose=False):
        """
        The general training loop to fit the model

        Parameters
        ----------

        train: :class:`spotlight.interactions.Interactions`
            training instances, also contains test sequences
        test: :class:`spotlight.interactions.Interactions`
            only contains targets for test sequences
        verbose: bool, optional
            print the logs
        """

        # convert to sequences, targets and users
        sequences = train.sequences.sequences
        targets = train.sequences.targets
        users = train.sequences.user_ids.reshape(-1, 1)

        L, T = train.sequences.L, train.sequences.T

        n_train = sequences.shape[0]

        output_str = 'total training instances: %d' % n_train
        print(output_str)

        if not self._initialized:
            self._initialize(train)

        start_epoch = 0

        for epoch_num in range(start_epoch, self._n_iter):

            t1 = time()

            # set model to training model
            self._net.train()

            users, sequences, targets = shuffle(users,
                                                sequences,
                                                targets)

            negative_samples = self._generate_negative_samples(users, train, n=self._neg_samples * T)

            sequences_tensor = gpu(torch.from_numpy(sequences),
                                   self._use_cuda)
            user_tensor = gpu(torch.from_numpy(users),
                              self._use_cuda)
            item_target_tensor = gpu(torch.from_numpy(targets),
                                     self._use_cuda)
            item_negative_tensor = gpu(torch.from_numpy(negative_samples),
                                       self._use_cuda)

            epoch_loss = 0.0

            for minibatch_num, \
                (batch_sequence,
                 batch_user,
                 batch_target,
                 batch_negative) in enumerate(minibatch(sequences_tensor,
                                                        user_tensor,
                                                        item_target_tensor,
                                                        item_negative_tensor,
                                                        batch_size=self._batch_size)):
                sequence_var = Variable(batch_sequence)
                user_var = Variable(batch_user)
                item_target_var = Variable(batch_target)
                item_negative_var = Variable(batch_negative)

                target_prediction = self._net(sequence_var,
                                              user_var,
                                              item_target_var)
                negative_prediction = self._net(sequence_var,
                                                user_var,
                                                item_negative_var,
                                                use_cache=True)

                self._optimizer.zero_grad()
                # compute the binary cross-entropy loss
                positive_loss = -torch.mean(torch.log(F.sigmoid(target_prediction)))
                negative_loss = -torch.mean(torch.log(1 - F.sigmoid(negative_prediction)))
                loss = positive_loss + negative_loss

                epoch_loss += loss.data[0]

                loss.backward()
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1

            t2 = time()
            if verbose and (epoch_num + 1) % 1 == 0:
                precision, recall, mean_aps = evaluate_ranking(self, test, train, k=[1, 5, 10])
                output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \
                             "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \
                             "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f, [%.1f s]" % (epoch_num + 1,
                                                                                         t2 - t1,
                                                                                         epoch_loss,
                                                                                         mean_aps,
                                                                                         np.mean(precision[0]),
                                                                                         np.mean(precision[1]),
                                                                                         np.mean(precision[2]),
                                                                                         np.mean(recall[0]),
                                                                                         np.mean(recall[1]),
                                                                                         np.mean(recall[2]),
                                                                                         time() - t2)
                print(output_str)
            else:
                output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1,
                                                                        t2 - t1,
                                                                        epoch_loss,
                                                                        time() - t2)
                print(output_str)
Beispiel #4
0
 def fit(self, train, test, verbose=False):
     sequences_np = train.sequences.sequences
     targets_np = train.sequences.targets
     users_np = train.sequences.user_ids.reshape(-1, 1)
     L, T = train.sequences.L, train.sequences.T
     n_train = sequences_np.shape[0]
     output_str = 'total training instances: %d' % n_train
     print(output_str)
     if not self._initialized:
         self._initialize(train)
     start_epoch = 0
     best_p1, best_p5, best_p10, best_r1, best_r5, best_r10, best_map, best_n5, best_h5, best_f5 \
         = [0 for _ in range(10)]
     for epoch_num in range(start_epoch, self._n_iter):
         t1 = time()
         self._net.train()
         users_np, sequences_np, targets_np = shuffle(
             users_np, sequences_np, targets_np)
         negatives_np = self._generate_negative_samples(users_np,
                                                        train,
                                                        n=self._neg_samples)
         users, sequences, targets, negatives = (
             torch.from_numpy(users_np).long(),
             torch.from_numpy(sequences_np).long(),
             torch.from_numpy(targets_np).long(),
             torch.from_numpy(negatives_np).long())
         users, sequences, targets, negatives = (users.to(self._device),
                                                 sequences.to(self._device),
                                                 targets.to(self._device),
                                                 negatives.to(self._device))
         epoch_loss = 0.0
         for (minibatch_num, (batch_users, batch_sequences, batch_targets,
                              batch_negatives)) in enumerate(
                                  minibatch(users,
                                            sequences,
                                            targets,
                                            negatives,
                                            batch_size=self._batch_size)):
             items_to_predict = torch.cat((batch_targets, batch_negatives),
                                          1)
             items_prediction = self._net(batch_sequences, batch_users,
                                          items_to_predict)
             (targets_prediction, negatives_prediction) = torch.split(
                 items_prediction,
                 [batch_targets.size(1),
                  batch_negatives.size(1)],
                 dim=1)
             self._optimizer.zero_grad()
             positive_loss = -torch.mean(
                 torch.log(torch.sigmoid(targets_prediction)))
             negative_loss = -torch.mean(
                 torch.log(1 - torch.sigmoid(negatives_prediction)))
             loss = positive_loss + negative_loss
             epoch_loss += loss.item()
             loss.backward()
             self._optimizer.step()
         epoch_loss /= minibatch_num + 1
         t2 = time()
         if verbose and (epoch_num + 1) % 1 == 0:
             precision, recall, mean_aps, ndcgs, hrs, f1s = evaluate_ranking(
                 self, test, train, k=[1, 5, 10])
             output_str = "Epoch %d [%.1f s]\tloss=%.5f, map=%.5f, " \
                          "NDCG@5=%.5f, HR@5=%.5f, f1@5=%.5f, "\
                          "prec@5=%.5f, recall@5=%.5f, [%.1f s]" % (epoch_num + 1,
                                                                      t2 - t1,
                                                                      epoch_loss,
                                                                      mean_aps,
                                                                      np.mean(ndcgs[1]),
                                                                      np.mean(hrs[1]),
                                                                      np.mean(f1s[1]),
                                                                      np.mean(precision[1]),
                                                                      np.mean(recall[1]),
                                                                      time() - t2)
             print(output_str)
             best_p1 = np.mean(precision[0]) if np.mean(
                 precision[0]) > best_p1 else best_p1
             best_p5 = np.mean(precision[1]) if np.mean(
                 precision[1]) > best_p5 else best_p5
             best_p10 = np.mean(precision[2]) if np.mean(
                 precision[2]) > best_p10 else best_p10
             best_r1 = np.mean(
                 recall[0]) if np.mean(recall[0]) > best_r1 else best_r1
             best_r5 = np.mean(
                 recall[1]) if np.mean(recall[1]) > best_r5 else best_r5
             best_r10 = np.mean(
                 recall[2]) if np.mean(recall[2]) > best_r10 else best_r10
             best_map = mean_aps if mean_aps > best_map else best_map
             best_n5 = np.mean(
                 ndcgs[1]) if np.mean(ndcgs[1]) > best_n5 else best_n5
             best_h5 = np.mean(
                 hrs[1]) if np.mean(hrs[1]) > best_h5 else best_h5
             best_f5 = np.mean(
                 f1s[1]) if np.mean(f1s[1]) > best_f5 else best_f5
         else:
             output_str = "Epoch %d [%.1f s]\tloss=%.5f [%.1f s]" % (
                 epoch_num + 1, t2 - t1, epoch_loss, time() - t2)
             print(output_str)
     best_str = "best_p5=%.5f, best_r5=%.5f, best_f5=%.5f, best_n5=%.5f, best_h5=%.5f" \
                % (best_p5, best_r5, best_f5, best_n5, best_h5)
     print(best_str)
    def fit(self, train, test, verbose=False):
        """
        The general training loop to fit the model

        Parameters
        ----------

        train: :class:`spotlight.interactions.Interactions`
            training instances, also contains test sequences
        test: :class:`spotlight.interactions.Interactions`
            only contains targets for test sequences
        verbose: bool, optional
            print the logs
        """

        # convert to sequences, targets and users
        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids.reshape(-1, 1)

        L, T = train.sequences.L, train.sequences.T

        n_train = sequences_np.shape[0]

        output_str = 'total training instances: %d' % n_train
        print(output_str)

        if not self._initialized:
            self._initialize(train)

        start_epoch = 0

        for epoch_num in range(start_epoch, self._n_iter):

            t1 = time()

            # set model to training mode
            self._net.train()

            users_np, sequences_np, targets_np = shuffle(users_np,
                                                         sequences_np,
                                                         targets_np)

            negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples)

            # convert numpy arrays to PyTorch tensors and move it to the corresponding devices
            users, sequences, targets, negatives = (torch.from_numpy(users_np).long(),
                                                    torch.from_numpy(sequences_np).long(),
                                                    torch.from_numpy(targets_np).long(),
                                                    torch.from_numpy(negatives_np).long())

            users, sequences, targets, negatives = (users.to(self._device),
                                                    sequences.to(self._device),
                                                    targets.to(self._device),
                                                    negatives.to(self._device))

            epoch_loss = 0.0

            for (minibatch_num,
                 (batch_users,
                  batch_sequences,
                  batch_targets,
                  batch_negatives)) in enumerate(minibatch(users,
                                                           sequences,
                                                           targets,
                                                           negatives,
                                                           batch_size=self._batch_size)):
                items_to_predict = torch.cat((batch_targets, batch_negatives), 1)
                items_prediction = self._net(batch_sequences,
                                             batch_users,
                                             items_to_predict)

                (targets_prediction,
                 negatives_prediction) = torch.split(items_prediction,
                                                     [batch_targets.size(1),
                                                      batch_negatives.size(1)], dim=1)

                self._optimizer.zero_grad()
                # compute the binary cross-entropy loss
                positive_loss = -torch.mean(
                    torch.log(torch.sigmoid(targets_prediction)))
                negative_loss = -torch.mean(
                    torch.log(1 - torch.sigmoid(negatives_prediction)))
                loss = positive_loss + negative_loss

                epoch_loss += loss.item()

                loss.backward()
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1

            t2 = time()
            if verbose and (epoch_num + 1) % 10 == 0:
                precision, recall, mean_aps = evaluate_ranking(self, test, train, k=[1, 5, 10])
                output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \
                             "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \
                             "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f, [%.1f s]" % (epoch_num + 1,
                                                                                         t2 - t1,
                                                                                         epoch_loss,
                                                                                         mean_aps,
                                                                                         np.mean(precision[0]),
                                                                                         np.mean(precision[1]),
                                                                                         np.mean(precision[2]),
                                                                                         np.mean(recall[0]),
                                                                                         np.mean(recall[1]),
                                                                                         np.mean(recall[2]),
                                                                                         time() - t2)
                print(output_str)
            else:
                output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1,
                                                                        t2 - t1,
                                                                        epoch_loss,
                                                                        time() - t2)
                print(output_str)
Beispiel #6
0
    def fit(self, train, test, verbose=False):
        """
        The general training loop to fit the model

        Parameters
        ----------

        train: :class:`spotlight.interactions.Interactions`
            training instances, also contains test sequences
        test: :class:`spotlight.interactions.Interactions`
            only contains targets for test sequences
        verbose: bool, optional
            print the logs
        """

        # convert to sequences, targets and users
        sequences = train.sequences.sequences
        targets = train.sequences.targets
        users = train.sequences.user_ids.reshape(-1, 1)

        L, T = train.sequences.L, train.sequences.T

        n_train = sequences.shape[0]

        output_str = 'total training instances: %d' % n_train
        print(output_str)

        if not self._initialized:
            self._initialize(train)

        start_epoch = 0

        for epoch_num in range(start_epoch, self._n_iter):

            t1 = time()

            # set model to training model
            self._net.train()

            users, sequences, targets = shuffle(users,
                                                sequences,
                                                targets)

            negative_samples = self._generate_negative_samples(users, train, n=self._neg_samples * T)

            sequences_tensor = gpu(torch.from_numpy(sequences),
                                   self._use_cuda)
            user_tensor = gpu(torch.from_numpy(users),
                              self._use_cuda)
            item_target_tensor = gpu(torch.from_numpy(targets),
                                     self._use_cuda)
            item_negative_tensor = gpu(torch.from_numpy(negative_samples),
                                       self._use_cuda)

            epoch_loss = 0.0

            for minibatch_num, \
                (batch_sequence,
                 batch_user,
                 batch_target,
                 batch_negative) in enumerate(minibatch(sequences_tensor,
                                                        user_tensor,
                                                        item_target_tensor,
                                                        item_negative_tensor,
                                                        batch_size=self._batch_size)):
                sequence_var = Variable(batch_sequence)
                user_var = Variable(batch_user)
                item_target_var = Variable(batch_target)
                item_negative_var = Variable(batch_negative)

                # concatenate all variables to get predictions in one run
                items_var = torch.cat((item_target_var, item_negative_var), 1)

                items_prediction = self._net(sequence_var,
                                             user_var,
                                             items_var)
                (positive_prediction,
                 negative_prediction) = torch.split(items_prediction, [item_target_var.size(1),
                                                                       item_negative_var.size(1)], dim=1)

                self._optimizer.zero_grad()
                # compute the binary cross-entropy loss
                loss = sigmoid_log_loss(positive_prediction, negative_prediction)

                epoch_loss += loss.data[0]

                loss.backward()
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1

            t2 = time()
            if verbose and (epoch_num + 1) % 10 == 0:
                precision, recall, ndcg, mean_aps = evaluate_ranking(self, test, train, k=[3, 5, 10])
                str_precs = "precisions=%.4f,%.4f,%.4f" % tuple([np.mean(a) for a in precision])
                str_recalls = "recalls=%.4f,%.4f,%.4f" % tuple([np.mean(a) for a in recall])
                str_ndcgs = "ndcgs=%.4f,%.4f,%.4f" % tuple([np.mean(a) for a in ndcg])

                output_str = "Epoch %d [%.1f s]\tloss=%.4f, " \
                             "map=%.4f, %s, %s, %s[%.1f s]" % (epoch_num + 1, t2 - t1,
                                                               epoch_loss,
                                                               mean_aps, str_precs, str_recalls, str_ndcgs,
                                                               time() - t2)
                print(output_str)
            else:
                output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1,
                                                                        t2 - t1,
                                                                        epoch_loss,
                                                                        time() - t2)
                print(output_str)

        self._save_checkpoint({
            'epoch_num' : epoch_num + 1,
            'state_dict': self._net.state_dict(),
            'optimizer' : self._optimizer.state_dict(),
        }, 'gcaser-%d-%d-L5T1.pth.tar' % (self.model_args.d, epoch_num + 1))
Beispiel #7
0
    def fit(self, train, test, verbose=False):
        """
        The general training loop to fit the model

        Parameters
        ----------

        train: :class:`interactions.Interactions`
            training instances, also contains test sequences
        test: :class:`interactions.Interactions`
            only contains targets for test sequences
        verbose: bool, optional
            print the logs
        """

        # convert sequences, targets and users to numpy arrays
        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids.reshape(-1, 1)

        self.L, self.T = train.sequences.L, train.sequences.T

        n_train = sequences_np.shape[0]

        output_str = 'total training instances: %d' % n_train
        print(output_str)

        if not self._teacher_initialized:
            self._initialize_teacher(train)
        if not self._student_initialized:
            self._initialize_student(train)

        # here we compute teacher top-K ranking for each training instance in advance for faster training speed
        # while we have to compute the top-K ranking on the fly if it is too large to keep in memory
        if os.path.isfile(self._teacher_topk_path):
            print('found teacher topk file, loading..')
            teacher_ranking = np.load(self._teacher_topk_path)
        else:
            print('teacher topk file not found, generating.. ')
            teacher_ranking = self._get_teacher_topk(sequences_np,
                                                     users_np,
                                                     targets_np,
                                                     k=self._K)

        # initialize static weight (position importance weight)
        weight_static = np.array(range(1, self._K + 1), dtype=np.float32)
        weight_static = np.exp(-weight_static / self._lambda)
        weight_static = weight_static / np.sum(weight_static)

        weight_static = torch.from_numpy(weight_static).to(self._device)
        weight_static = weight_static.unsqueeze(0)

        # initialize dynamic weight (ranking discrepancy weight)
        weight_dynamic = None

        # count number of parameters
        print("Number of params in teacher model: %d" %
              compute_model_size(self._teacher_net))
        print("Number of params in student model: %d" %
              compute_model_size(self._student_net))

        indices = np.arange(n_train)
        start_epoch = 1

        for epoch_num in range(start_epoch, self._n_iter + 1):

            t1 = time()
            # set teacher model to evaluation mode and move it to the corresponding devices
            self._teacher_net.eval()
            self._teacher_net = self._teacher_net.to(self._device)
            # set student model to training mode and move it to the corresponding devices
            self._student_net.train()
            self._student_net = self._student_net.to(self._device)

            (users_np, sequences_np,
             targets_np), shuffle_indices = shuffle(users_np,
                                                    sequences_np,
                                                    targets_np,
                                                    indices=True)

            indices = indices[
                shuffle_indices]  # keep indices for retrieval teacher's top-K ranking from cache

            negatives_np = self._generate_negative_samples(users_np,
                                                           train,
                                                           n=self._neg_samples)

            dynamic_samples_np = self._generate_negative_samples(
                users_np, train, n=self._num_dynamic_samples)

            # convert numpy arrays to PyTorch tensors and move it to the corresponding devices
            users, sequences, targets, negatives = (
                torch.from_numpy(users_np).long(),
                torch.from_numpy(sequences_np).long(),
                torch.from_numpy(targets_np).long(),
                torch.from_numpy(negatives_np).long())

            users, sequences, targets, negatives = (users.to(self._device),
                                                    sequences.to(self._device),
                                                    targets.to(self._device),
                                                    negatives.to(self._device))

            dynamic_samples = torch.from_numpy(dynamic_samples_np).long().to(
                self._device)

            epoch_loss = 0.0
            epoch_regular_loss = 0.0

            for (minibatch_num,
                 (batch_indices, batch_users, batch_sequences, batch_targets,
                  batch_negatives, batch_dynamics)) in enumerate(
                      minibatch(indices,
                                users,
                                sequences,
                                targets,
                                negatives,
                                dynamic_samples,
                                batch_size=self._batch_size)):

                # retrieval teacher top-K ranking given indices
                batch_candidates = torch.from_numpy(
                    teacher_ranking[batch_indices, :]).long().to(self._device)
                # concatenate all variables to get predictions in one run
                items_to_predict = torch.cat(
                    (batch_targets, batch_negatives, batch_candidates,
                     batch_dynamics), 1)

                items_prediction = self._student_net(batch_sequences,
                                                     batch_users,
                                                     items_to_predict)

                (targets_prediction, negatives_prediction,
                 candidates_prediction,
                 dynamics_prediction) = torch.split(items_prediction, [
                     batch_targets.size(1),
                     batch_negatives.size(1),
                     batch_candidates.size(1),
                     batch_dynamics.size(1)
                 ],
                                                    dim=1)

                self._optimizer.zero_grad()

                if epoch_num > self._dynamic_start_epoch:
                    # compute dynamic weight
                    dynamic_weights = list()
                    for col in range(self._K):
                        col_prediction = candidates_prediction[:,
                                                               col].unsqueeze(
                                                                   1)

                        num_smaller_than = torch.sum(
                            col_prediction < dynamics_prediction,
                            dim=1).float()
                        relative_rank = num_smaller_than / self._num_dynamic_samples
                        predicted_rank = torch.floor(self._num_items *
                                                     relative_rank)

                        dynamic_weight = torch.tanh(self._mu *
                                                    (predicted_rank - col))
                        dynamic_weight = torch.clamp(dynamic_weight, min=0.0)

                        dynamic_weights.append(dynamic_weight)
                    weight_dynamic = torch.stack(dynamic_weights, 1)

                    # hybrid two weights
                    weight = weight_dynamic * weight_static
                    if self._weight_renormalize:
                        weight = F.normalize(weight, p=1, dim=1)
                else:
                    weight = weight_static

                # detach the weight to stop the gradient flow to the weight
                weight = weight.detach()

                loss, regular_loss = weighted_sigmoid_log_loss(
                    targets_prediction, negatives_prediction,
                    candidates_prediction, weight, self._teach_alpha)

                epoch_loss += loss.item()
                epoch_regular_loss += regular_loss.item()

                loss.backward()

                # assert False
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1
            epoch_regular_loss /= minibatch_num + 1

            t2 = time()

            if verbose and epoch_num % 10 == 0:
                precision, recall, ndcg, mean_aps = evaluate_ranking(
                    self, test, train, k=[3, 5, 10])

                str_precs = "precisions=%.4f,%.4f,%.4f" % tuple(
                    [np.mean(a) for a in precision])
                str_recalls = "recalls=%.4f,%.4f,%.4f" % tuple(
                    [np.mean(a) for a in recall])
                str_ndcgs = "ndcgs=%.4f,%.4f,%.4f" % tuple(
                    [np.mean(a) for a in ndcg])

                output_str = "Epoch %d [%.1f s]\tloss=%.4f, regular_loss=%.4f, " \
                             "map=%.4f, %s, %s, %s[%.1f s]" % (epoch_num, t2 - t1,
                                                               epoch_loss, epoch_regular_loss,
                                                               mean_aps, str_precs, str_recalls, str_ndcgs,
                                                               time() - t2)
                print(output_str)
            else:
                output_str = "Epoch %d [%.1f s]\tloss=%.4f, regular_loss=%.4f[%.1f s]" % (
                    epoch_num, t2 - t1, epoch_loss, epoch_regular_loss,
                    time() - t2)
                print(output_str)
    def fit(self, train, test, verbose=False):
        """
        The general training loop to fit the model

        Parameters
        ----------

        train: :class:`spotlight.interactions.Interactions`
            training instances, also contains test sequences
        test: :class:`spotlight.interactions.Interactions`
            only contains targets for test sequences
        verbose: bool, optional
            print the logs
        """

        # convert to sequences, targets and users
        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids.reshape(-1, 1)

        L, T = train.sequences.L, train.sequences.T

        n_train = sequences_np.shape[0]

        output_str = 'total training instances: %d' % n_train
        print(output_str)

        if not self._initialized:
            self._initialize(train)

            
        start_epoch = 0
        for epoch_num in range(start_epoch, self._n_iter):

            t1 = time()
            users_np, sequences_np, targets_np = shuffle(users_np,
                                                         sequences_np,
                                                         targets_np)

            negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples)

            epoch_loss = 0.0
            for (minibatch_num,
                 (batch_users,
                  batch_sequences,
                  batch_targets,
                  batch_negatives)) in enumerate(minibatch(users_np,
                                                           sequences_np,
                                                           targets_np,
                                                           negatives_np,
                                             batch_size=self._batch_size)):
                
                
                items_to_predict = np.concatenate((batch_targets, batch_negatives), 1)
                loss = self._net.train(self.sess, 
                                             batch_sequences,
                                             batch_users,
                                             items_to_predict)
                epoch_loss += loss
            epoch_loss /= minibatch_num + 1

            t2 = time()
            if verbose and (epoch_num + 1) % 10 == 0:
                precision, recall, mean_aps = evaluate_ranking(self, test, train, k=[1, 5, 10])
                output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \
                             "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \
                             "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f, [%.1f s]" % (epoch_num + 1,
                                                                                         t2 - t1,
                                                                                         epoch_loss,
                                                                                         mean_aps,
                                                                                         np.mean(precision[0]),
                                                                                         np.mean(precision[1]),
                                                                                         np.mean(precision[2]),
                                                                                         np.mean(recall[0]),
                                                                                         np.mean(recall[1]),
                                                                                         np.mean(recall[2]),
                                                                                         time() - t2)
                print(output_str)
            else:
                output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1,
                                                                        t2 - t1,
                                                                        epoch_loss,
                                                                        time() - t2)
                print(output_str)
Beispiel #9
0
    def fit(self, train, test, verbose=False):

        sequences = train.sequences.sequences
        targets = train.sequences.targets
        users = train.sequences.user_ids.reshape(-1, 1)

        self.L, self.T = train.sequences.L, train.sequences.T

        n_train = sequences.shape[0]

        output_str = 'total training instances: %d' % n_train
        print(output_str)

        if not self._teacher_initialized:
            self._initialize_teacher(train)
        if not self._student_initialized:
            self._initialize_student(train)

        # make teacher top-K ranking
        candidates = self._get_teacher_topk(sequences, users, targets, self._K,
                                            self._teacher_topk_path)

        # initialize static weight
        weight_static = np.array(range(1, self._K + 1), dtype=np.float32)
        weight_static = np.exp(-weight_static / self._lambda)
        weight_static = weight_static / np.sum(weight_static)

        weight_static = Variable(
            gpu(torch.from_numpy(weight_static), self._use_cuda)).unsqueeze(0)

        # initialize dynamic weight
        weight_warp = None

        # count number of parameters
        num_params = 0
        for param in self._net.parameters():
            num_params += param.view(-1).size()[0]
        print("Number of params: %d" % num_params)

        indices = np.arange(n_train)
        start_epoch = 0

        for epoch_num in range(start_epoch, self._n_iter):

            t1 = time()

            # set model to training model
            self._net.train()

            (users, sequences,
             targets), shuffle_indices = shuffle(users,
                                                 sequences,
                                                 targets,
                                                 indices=True)

            indices = indices[
                shuffle_indices]  # keep indices for retrieval teacher's top-K ranking

            negative_samples = self._generate_negative_samples(
                users, train, n=self._neg_samples * self.T)

            dynamic_samples = self._generate_samples(users,
                                                     n=self._dynamic_samples)

            sequences_tensor = gpu(torch.from_numpy(sequences), self._use_cuda)
            user_tensor = gpu(torch.from_numpy(users), self._use_cuda)
            item_target_tensor = gpu(torch.from_numpy(targets), self._use_cuda)
            item_negative_tensor = gpu(torch.from_numpy(negative_samples),
                                       self._use_cuda)
            dynamic_sample_tensor = gpu(torch.from_numpy(dynamic_samples),
                                        self._use_cuda)

            epoch_loss = 0.0
            epoch_regular_loss = 0.0

            for minibatch_num, \
                (batch_indices,
                 batch_sequence,
                 batch_user,
                 batch_target,
                 batch_negative,
                 batch_dynamic) in enumerate(minibatch(indices,
                                                       sequences_tensor,
                                                       user_tensor,
                                                       item_target_tensor,
                                                       item_negative_tensor,
                                                       dynamic_sample_tensor,
                                                       batch_size=self._batch_size)):

                sequence_var = Variable(batch_sequence)
                user_var = Variable(batch_user)
                item_target_var = Variable(batch_target)

                item_negative_var = Variable(batch_negative)
                dynamic_sample_var = Variable(batch_dynamic)

                # retrieval teacher top-K ranking for given indices
                teacher_topk_var = Variable(
                    gpu(torch.from_numpy(candidates[batch_indices, :]),
                        self._use_cuda))

                # concatenate all variables to get predictions in one run
                items_var = torch.cat((item_target_var, item_negative_var,
                                       teacher_topk_var, dynamic_sample_var),
                                      1)

                items_prediction = self._net(sequence_var, user_var, items_var)
                (positive_prediction, negative_prediction,
                 teacher_topk_prediction,
                 dynamic_sample_prediction) = torch.split(items_prediction, [
                     item_target_var.size(1),
                     item_negative_var.size(1),
                     teacher_topk_var.size(1),
                     dynamic_sample_var.size(1)
                 ],
                                                          dim=1)

                self._optimizer.zero_grad()

                # compute dynamic weight
                dynamic_weights = list()
                for col in range(self._K):
                    col_prediction = teacher_topk_prediction[:,
                                                             col].unsqueeze(1)

                    _dynamic_weight = torch.sum(
                        col_prediction < dynamic_sample_prediction,
                        dim=1).float() / self._dynamic_samples
                    _dynamic_weight = torch.floor(self._num_items *
                                                  _dynamic_weight)

                    dynamic_weight = F.tanh(self._mu * (_dynamic_weight - col))
                    dynamic_weight = torch.clamp(dynamic_weight, min=0.0)

                    dynamic_weights.append(dynamic_weight)
                weight_dynamic = torch.stack(dynamic_weights, 1)

                if epoch_num + 1 >= self._dynamic_start_epoch:
                    weight = weight_dynamic * weight_static
                    weight = F.normalize(weight, p=1, dim=1)
                else:
                    weight = weight_dynamic
                weight = weight.detach()

                loss, regular_loss = self._loss_func(positive_prediction,
                                                     negative_prediction,
                                                     teacher_topk_prediction,
                                                     weight, self._teach_alpha)

                epoch_loss += loss.data[0]
                epoch_regular_loss += regular_loss.data[0]

                loss.backward()

                # assert False
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1
            epoch_regular_loss /= minibatch_num + 1

            t2 = time()

            if verbose and (epoch_num + 1) % 10 == 0:
                precision, recall, ndcg, mean_aps = evaluate_ranking(
                    self, test, train, k=[3, 5, 10])

                str_precs = "precisions=%.4f,%.4f,%.4f" % tuple(
                    [np.mean(a) for a in precision])
                str_recalls = "recalls=%.4f,%.4f,%.4f" % tuple(
                    [np.mean(a) for a in recall])
                str_ndcgs = "ndcgs=%.4f,%.4f,%.4f" % tuple(
                    [np.mean(a) for a in ndcg])

                output_str = "Epoch %d [%.1f s]\tloss=%.4f, regular_loss=%.4f, " \
                             "map=%.4f, %s, %s, %s[%.1f s]" % (epoch_num + 1, t2 - t1,
                                                               epoch_loss, epoch_regular_loss,
                                                               mean_aps, str_precs, str_recalls, str_ndcgs,
                                                               time() - t2)
                print(output_str)
            else:
                output_str = "Epoch %d [%.1f s]\tloss=%.4f, regular_loss=%.4f[%.1f s]" % (
                    epoch_num + 1, t2 - t1, epoch_loss, epoch_regular_loss,
                    time() - t2)
                print(output_str)
Beispiel #10
0
    def fit(self, train, test, verbose=False):
        """
        The general training loop to fit the model

        Parameters
        ----------

        train: :class:`spotlight.interactions.Interactions`
            training instances, also contains test sequences
        test: :class:`spotlight.interactions.Interactions`
            only contains targets for test sequences
        verbose: bool, optional
            print the logs
        """

        # convert to sequences, targets and users
        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids.reshape(-1, 1)

        L, T = train.sequences.L, train.sequences.T

        n_train = sequences_np.shape[0]

        output_str = 'total training instances: %d' % n_train
        print(output_str)

        if not self._initialized:
            self._initialize(train)

        start_epoch = 0 
        best_map = 0 

        ### create directory if not exists
        save_dir = args.save_root + args.dataset + '/'
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        results = pd.DataFrame()
        #results_odd = pd.DataFrame()
        for epoch_num in range(start_epoch, self._n_iter):

            t1 = time()

            # set model to training mode
            self._net.train()

            users_np, sequences_np, targets_np = shuffle(users_np,
                                                         sequences_np,
                                                         targets_np)

            negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples)

            # convert numpy arrays to PyTorch tensors and move it to the corresponding devices
            users, sequences, targets, negatives = (torch.from_numpy(users_np).long(),
                                                    torch.from_numpy(sequences_np).long(),
                                                    torch.from_numpy(targets_np).long(),
                                                    torch.from_numpy(negatives_np).long())

            users, sequences, targets, negatives = (users.to(self._device),
                                                    sequences.to(self._device),
                                                    targets.to(self._device),
                                                    negatives.to(self._device))

            epoch_loss = 0.0

            for (minibatch_num,
                 (batch_users,
                  batch_sequences,
                  batch_targets,
                  batch_negatives)) in enumerate(minibatch(users,
                                                           sequences,
                                                           targets,
                                                           negatives,
                                                           batch_size=self._batch_size)):
                items_to_predict = torch.cat((batch_targets, batch_negatives), 1)
                items_prediction = self._net(batch_sequences,
                                             batch_users,
                                             items_to_predict)

                (targets_prediction,
                 negatives_prediction) = torch.split(items_prediction,
                                                     [batch_targets.size(1),
                                                      batch_negatives.size(1)], dim=1)

                self._optimizer.zero_grad()
                # compute the binary cross-entropy loss
                positive_loss = -torch.mean(
                    torch.log(torch.sigmoid(targets_prediction)))
                negative_loss = -torch.mean(
                    torch.log(1 - torch.sigmoid(negatives_prediction)))
                loss = positive_loss + negative_loss

                epoch_loss += loss.item()

                loss.backward()
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1


            parameterset = {}
            t2 = time()
            if verbose: #and (epoch_num + 1) % 2 == 0:
                precision, recall, mean_aps = evaluate_ranking(self, test, train, k=[1, 5, 10])
                output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \
                             "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \
                             "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f,"\
                             "f1_score@1=%.4f,f1_score@5=%.4f,f1_score@10=%.4f,[%.1f s]" % (epoch_num + 1,
                                                                                         t2 - t1,
                                                                                         epoch_loss,
                                                                                         mean_aps,
                                                                                         np.mean(precision[0]),
                                                                                         np.mean(precision[1]),
                                                                                         np.mean(precision[2]),
                                                                                         np.mean(recall[0]),
                                                                                         np.mean(recall[1]),
                                                                                         np.mean(recall[2]),
                                                                                         f1_score(np.mean(precision[0]),np.mean(recall[0])),
                                                                                         f1_score(np.mean(precision[1]),np.mean(recall[1])),
                                                                                         f1_score(np.mean(precision[2]),np.mean(recall[2])),
                                                                                         time() - t2)
                parameterset["Epoch"] = epoch_num + 1
                parameterset["time1"] = t2 - t1
                parameterset["loss"] = epoch_loss
                parameterset["map"] = mean_aps
                parameterset["prec@1"] = np.mean(precision[0])
                parameterset["prec@5"] = np.mean(precision[1])
                parameterset["prec@10"] = np.mean(precision[2])
                parameterset["recall@1"] = np.mean(recall[0])
                parameterset["recall@5"] = np.mean(recall[1])
                parameterset["recall@10"] = np.mean(recall[2])
                parameterset["f1_score@1"] = f1_score(np.mean(precision[0]),np.mean(recall[0]))
                parameterset["f1_score@5"] = f1_score(np.mean(precision[1]),np.mean(recall[1]))
                parameterset["f1_score@10"] = f1_score(np.mean(precision[2]),np.mean(recall[2]))
                parameterset["time2"] = time() - t2
                results = results.append(parameterset, ignore_index=True)

                print(output_str)
                if mean_aps > best_map:
                    best_map = mean_aps
                    checkpoint_name = "best_model.pth.tar"
                    save_checkpoint({
                    'epoch': epoch_num+1,
                    'state_dict': self._net.state_dict(),
                    'optimizer': self._optimizer.state_dict(),
                    }, checkpoint_name, save_dir)

            #else:
            #    output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1,
            #                                                            t2 - t1,
            #                                                            epoch_loss,
            #                                                            time() - t2)
            #    parameterset["Epoch"] = epoch_num + 1
            #    parameterset["time1"] = t2 - t1
            #    parameterset["loss"] = epoch_loss
            #    parameterset["time2"] = time() - t2
            #    results_odd = results_odd.append(parameterset, ignore_index=True)
            #    print(output_str)
        print ('***** Best map:{0:.4f} *****'.format(best_map))
        #results_odd.to_csv("results/Odd_ml1m", index=False)
        results.to_csv("results/ml1m_hold", index=False)