Esempio n. 1
0
def build_sequence_model(hyperparameters, train, random_state):

    h = hyperparameters

    set_seed(42, CUDA)

    if h['compression_ratio'] < 1.0:
        item_embeddings = BloomEmbedding(
            train.num_items,
            h['embedding_dim'],
            compression_ratio=h['compression_ratio'],
            num_hash_functions=4,
            padding_idx=0)
    else:
        item_embeddings = ScaledEmbedding(train.num_items,
                                          h['embedding_dim'],
                                          padding_idx=0)

    network = LSTMNet(train.num_items,
                      h['embedding_dim'],
                      item_embedding_layer=item_embeddings)

    model = ImplicitSequenceModel(loss=h['loss'],
                                  n_iter=h['n_iter'],
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  embedding_dim=h['embedding_dim'],
                                  l2=h['l2'],
                                  representation=network,
                                  use_cuda=CUDA,
                                  random_state=np.random.RandomState(42))

    return model
Esempio n. 2
0
def test_bloom_lstm(compression_ratio, expected_mrr):

    random_state = np.random.RandomState(RANDOM_SEED)
    train, test = _get_synthetic_data(randomness=1e-03,
                                      num_interactions=20000,
                                      random_state=random_state)

    embedding = BloomEmbedding(train.num_items,
                               32,
                               compression_ratio=compression_ratio,
                               num_hash_functions=4)

    representation = LSTMNet(train.num_items,
                             embedding_dim=EMBEDDING_DIM,
                             item_embedding_layer=embedding)

    model = ImplicitSequenceModel(loss=LOSS,
                                  representation=representation,
                                  batch_size=BATCH_SIZE,
                                  learning_rate=1e-2,
                                  l2=1e-7,
                                  n_iter=NUM_EPOCHS * 5,
                                  random_state=random_state,
                                  use_cuda=CUDA)

    model.fit(train, verbose=VERBOSE)

    mrr = _evaluate(model, test)

    assert mrr.mean() > expected_mrr
Esempio n. 3
0
    def _initialize(self, interactions):

        self._num_items = interactions.num_items

        if self._representation == 'pooling':
            self._net = PoolNet(self._num_items, self._embedding_dim, sparse=self._sparse)
        elif self._representation == 'cnn':
            self._net = CNNNet(self._num_items, self._embedding_dim, sparse=self._sparse)
        elif self._representation == 'lstm':
            self._net = LSTMNet(self._num_items, self._embedding_dim, sparse=self._sparse)
        elif self._representation == 'mixture':
            self._net = MixtureLSTMNet(self._num_items, self._embedding_dim, sparse=self._sparse)
        else:
            self._net = self._representation

        self._net = gpu(self._net, self._use_cuda)

        if self._optimizer_func is None:
            self._optimizer = optim.Adam(self._net.parameters(), weight_decay=self._l2, lr=self._learning_rate)
        else:
            self._optimizer = self._optimizer_func(self._net.parameters())

        if self._loss == 'pointwise':
            self._loss_func = pointwise_loss
        elif self._loss == 'bpr':
            self._loss_func = bpr_loss
        elif self._loss == 'hinge':
            self._loss_func = hinge_loss
        else:
            self._loss_func = adaptive_hinge_loss
Esempio n. 4
0
    def objective(hyper):

        print(hyper)

        start = time.clock()

        if hyper['model']['type'] == 'lsh':
            num_hashes = int(hyper['model']['num_hash_functions'])
            num_layers = int(hyper['model']['num_layers'])
            nonlinearity = hyper['model']['nonlinearity']
            residual = hyper['model']['residual']
            embed = hyper['model']['embed']

            item_embeddings = LSHEmbedding(train.num_items,
                                           int(hyper['embedding_dim']),
                                           embed=embed,
                                           residual_connections=residual,
                                           nonlinearity=nonlinearity,
                                           num_layers=num_layers,
                                           num_hash_functions=num_hashes)
            item_embeddings.fit(train_nonsequence.tocsr().T)
        else:
            item_embeddings = ScaledEmbedding(train.num_items,
                                              int(hyper['embedding_dim']),
                                              padding_idx=0)

        network = LSTMNet(train.num_items,
                          int(hyper['embedding_dim']),
                          item_embedding_layer=item_embeddings)

        model = ImplicitSequenceModel(loss=hyper['loss'],
                                      n_iter=int(hyper['n_iter']),
                                      batch_size=int(hyper['batch_size']),
                                      learning_rate=hyper['learning_rate'],
                                      embedding_dim=int(
                                          hyper['embedding_dim']),
                                      l2=hyper['l2'],
                                      representation=network,
                                      use_cuda=CUDA,
                                      random_state=random_state)

        model.fit(train, verbose=True)

        elapsed = time.clock() - start

        print(model)

        validation_mrr = sequence_mrr_score(model, validation).mean()
        test_mrr = sequence_mrr_score(model, test).mean()

        print('MRR {} {}'.format(validation_mrr, test_mrr))

        return {
            'loss': -validation_mrr,
            'status': STATUS_OK,
            'validation_mrr': validation_mrr,
            'test_mrr': test_mrr,
            'elapsed': elapsed,
            'hyper': hyper
        }
Esempio n. 5
0
def sequence_model(num_embeddings, bloom):

    if bloom:
        item_embeddings = BloomEmbedding(num_embeddings, EMBEDDING_DIM, num_hash_functions=NUM_HASH_FUNCTIONS)
    else:
        item_embeddings = ScaledEmbedding(num_embeddings, EMBEDDING_DIM)

    network = LSTMNet(num_embeddings, EMBEDDING_DIM, item_embedding_layer=item_embeddings)

    model = ImplicitSequenceModel(
        loss='adaptive_hinge',
        n_iter=N_ITER,
        batch_size=512,
        learning_rate=1e-3,
        l2=1e-2,
        representation=network,
        use_cuda=CUDA)

    return model
Esempio n. 6
0
class ImplicitSequenceModel(object):
    """
    Model for sequential recommendations using implicit feedback.

    Parameters
    ----------

    loss: string, optional
        The loss function for approximating a softmax with negative sampling.
        One of 'pointwise', 'bpr', 'hinge', 'adaptive_hinge', corresponding
        to losses from :class:`spotlight.losses`.
    representation: string or instance of :class:`spotlight.sequence.representations`, optional
        Sequence representation to use. If string, it must be one
        of 'pooling', 'cnn', 'lstm'; otherwise must be one of the
        representations from :class:`spotlight.sequence.representations`
    embedding_dim: int, optional
        Number of embedding dimensions to use for representing items.
        Overridden if representation is an instance of a representation class.
    n_iter: int, optional
        Number of iterations to run.
    batch_size: int, optional
        Minibatch size.
    l2: float, optional
        L2 loss penalty.
    learning_rate: float, optional
        Initial learning rate.
    optimizer_func: function, optional
        Function that takes in module parameters as the first argument and
        returns an instance of a PyTorch optimizer. Overrides l2 and learning
        rate if supplied. If no optimizer supplied, then use ADAM by default.
    use_cuda: boolean, optional
        Run the model on a GPU.
    sparse: boolean, optional
        Use sparse gradients for embedding layers.
    random_state: instance of numpy.random.RandomState, optional
        Random state to use when fitting.
    num_negative_samples: int, optional
        Number of negative samples to generate for adaptive hinge loss.

    Notes
    -----

    During fitting, the model computes the loss for each timestep of the
    supplied sequence. For example, suppose the following sequences are
    passed to the ``fit`` function:

    .. code-block:: python

       [[1, 2, 3, 4, 5],
        [0, 0, 7, 1, 4]]


    In this case, the loss for the first example will be the mean loss
    of trying to predict ``2`` from ``[1]``, ``3`` from ``[1, 2]``,
    ``4`` from ``[1, 2, 3]`` and so on. This means that explicit padding
    of all subsequences is not necessary (although it is possible by using
    the ``step_size`` parameter of
    :func:`spotlight.interactions.Interactions.to_sequence`.
    """
    def __init__(self,
                 loss='pointwise',
                 representation='pooling',
                 embedding_dim=32,
                 n_iter=10,
                 batch_size=256,
                 l2=0.0,
                 learning_rate=1e-2,
                 optimizer_func=None,
                 use_cuda=False,
                 sparse=False,
                 random_state=None,
                 num_negative_samples=5):

        assert loss in ('pointwise', 'bpr', 'hinge', 'adaptive_hinge')

        if isinstance(representation, str):
            assert representation in ('pooling', 'cnn', 'lstm')

        self._loss = loss
        self._representation = representation
        self._embedding_dim = embedding_dim
        self._n_iter = n_iter
        self._learning_rate = learning_rate
        self._batch_size = batch_size
        self._l2 = l2
        self._use_cuda = use_cuda
        self._sparse = sparse
        self._optimizer_func = optimizer_func
        self._random_state = random_state or np.random.RandomState()
        self._num_negative_samples = num_negative_samples

        self._num_items = None
        self._net = None
        self._optimizer = None
        self._loss_func = None

        set_seed(self._random_state.randint(-10**8, 10**8),
                 cuda=self._use_cuda)

    def __repr__(self):

        return _repr_model(self)

    @property
    def _initialized(self):
        return self._net is not None

    def _initialize(self, interactions):

        self._num_items = interactions.num_items

        if self._representation == 'pooling':
            self._net = PoolNet(self._num_items,
                                self._embedding_dim,
                                sparse=self._sparse)
        elif self._representation == 'cnn':
            self._net = CNNNet(self._num_items,
                               self._embedding_dim,
                               sparse=self._sparse)
        elif self._representation == 'lstm':
            self._net = LSTMNet(self._num_items,
                                self._embedding_dim,
                                sparse=self._sparse)
        else:
            self._net = self._representation

        self._net = gpu(self._net, self._use_cuda)

        if self._optimizer_func is None:
            self._optimizer = optim.Adam(self._net.parameters(),
                                         weight_decay=self._l2,
                                         lr=self._learning_rate)
        else:
            self._optimizer = self._optimizer_func(self._net.parameters())

        if self._loss == 'pointwise':
            self._loss_func = pointwise_loss
        elif self._loss == 'bpr':
            self._loss_func = bpr_loss
        elif self._loss == 'hinge':
            self._loss_func = hinge_loss
        else:
            self._loss_func = adaptive_hinge_loss

    def _check_input(self, item_ids):

        if isinstance(item_ids, int):
            item_id_max = item_ids
        else:
            item_id_max = item_ids.max()

        if item_id_max >= self._num_items:
            raise ValueError('Maximum item id greater '
                             'than number of items in model.')

    def fit(self, interactions, verbose=False):
        """
        Fit the model.

        When called repeatedly, model fitting will resume from
        the point at which training stopped in the previous fit
        call.

        Parameters
        ----------

        interactions: :class:`spotlight.interactions.SequenceInteractions`
            The input sequence dataset.
        """

        sequences = interactions.sequences.astype(np.int64)

        if not self._initialized:
            self._initialize(interactions)

        self._check_input(sequences)

        for epoch_num in range(self._n_iter):

            sequences = shuffle(sequences, random_state=self._random_state)

            sequences_tensor = gpu(torch.from_numpy(sequences), self._use_cuda)

            epoch_loss = 0.0

            for minibatch_num, batch_sequence in enumerate(
                    minibatch(sequences_tensor, batch_size=self._batch_size)):

                sequence_var = Variable(batch_sequence)

                user_representation, _ = self._net.user_representation(
                    sequence_var)

                positive_prediction = self._net(user_representation,
                                                sequence_var)

                if self._loss == 'adaptive_hinge':
                    negative_prediction = self._get_multiple_negative_predictions(
                        sequence_var.size(),
                        user_representation,
                        n=self._num_negative_samples)
                else:
                    negative_prediction = self._get_negative_prediction(
                        sequence_var.size(), user_representation)

                self._optimizer.zero_grad()

                loss = self._loss_func(positive_prediction,
                                       negative_prediction,
                                       mask=(sequence_var != PADDING_IDX))
                epoch_loss += loss.data[0]

                loss.backward()

                self._optimizer.step()

            epoch_loss /= minibatch_num + 1

            if verbose:
                print('Epoch {}: loss {}'.format(epoch_num, epoch_loss))

            if np.isnan(epoch_loss) or epoch_loss == 0.0:
                raise ValueError(
                    'Degenerate epoch loss: {}'.format(epoch_loss))

    def _get_negative_prediction(self, shape, user_representation):

        negative_items = sample_items(self._num_items,
                                      shape,
                                      random_state=self._random_state)
        negative_var = Variable(
            gpu(torch.from_numpy(negative_items), self._use_cuda))
        negative_prediction = self._net(user_representation, negative_var)

        return negative_prediction

    def _get_multiple_negative_predictions(self,
                                           shape,
                                           user_representation,
                                           n=5):
        batch_size, sliding_window = shape
        size = (n, ) + (1, ) * (user_representation.dim() - 1)
        negative_prediction = self._get_negative_prediction(
            (n * batch_size, sliding_window),
            user_representation.repeat(*size))

        return negative_prediction.view(n, batch_size, sliding_window)

    def predict(self, sequences, item_ids=None):
        """
        Make predictions: given a sequence of interactions, predict
        the next item in the sequence.

        Parameters
        ----------

        sequences: array, (1 x max_sequence_length)
            Array containing the indices of the items in the sequence.
        item_ids: array (num_items x 1), optional
            Array containing the item ids for which prediction scores
            are desired. If not supplied, predictions for all items
            will be computed.

        Returns
        -------

        predictions: array
            Predicted scores for all items in item_ids.
        """

        self._net.train(False)

        sequences = np.atleast_2d(sequences)

        if item_ids is None:
            item_ids = np.arange(self._num_items).reshape(-1, 1)

        self._check_input(item_ids)
        self._check_input(sequences)

        sequences = torch.from_numpy(sequences.astype(np.int64).reshape(1, -1))
        item_ids = torch.from_numpy(item_ids.astype(np.int64))

        sequence_var = Variable(gpu(sequences, self._use_cuda))
        item_var = Variable(gpu(item_ids, self._use_cuda))

        _, sequence_representations = self._net.user_representation(
            sequence_var)
        size = (len(item_var), ) + sequence_representations.size()[1:]
        out = self._net(sequence_representations.expand(*size), item_var)

        return cpu(out.data).numpy().flatten()
Esempio n. 7
0
    def fit(self, interactions, verbose=False):
        """
        Fit the model.

        Parameters
        ----------

        interactions: :class:`spotlight.interactions.SequenceInteractions`
            The input sequence dataset.
        """

        sequences = interactions.sequences.astype(np.int64)

        self._num_items = interactions.num_items

        if self._representation == 'pooling':
            self._net = PoolNet(self._num_items,
                                self._embedding_dim,
                                sparse=self._sparse)
        elif self._representation == 'cnn':
            self._net = CNNNet(self._num_items,
                               self._embedding_dim,
                               sparse=self._sparse)
        elif self._representation == 'lstm':
            self._net = LSTMNet(self._num_items,
                                self._embedding_dim,
                                sparse=self._sparse)
        else:
            self._net = self._representation

        self._net = gpu(self._net, self._use_cuda)

        if self._optimizer is None:
            self._optimizer = optim.Adam(self._net.parameters(),
                                         weight_decay=self._l2,
                                         lr=self._learning_rate)

        if self._loss == 'pointwise':
            loss_fnc = pointwise_loss
        elif self._loss == 'bpr':
            loss_fnc = bpr_loss
        elif self._loss == 'hinge':
            loss_fnc = hinge_loss
        else:
            loss_fnc = adaptive_hinge_loss

        for epoch_num in range(self._n_iter):

            sequences = shuffle(sequences, random_state=self._random_state)

            sequences_tensor = gpu(torch.from_numpy(sequences), self._use_cuda)

            epoch_loss = 0.0

            for minibatch_num, batch_sequence in enumerate(
                    minibatch(sequences_tensor, batch_size=self._batch_size)):

                sequence_var = Variable(batch_sequence)

                user_representation, _ = self._net.user_representation(
                    sequence_var)

                positive_prediction = self._net(user_representation,
                                                sequence_var)

                if self._loss == 'adaptive_hinge':
                    negative_prediction = [
                        self._get_negative_prediction(sequence_var.size(),
                                                      user_representation)
                        for __ in range(5)
                    ]
                else:
                    negative_prediction = self._get_negative_prediction(
                        sequence_var.size(), user_representation)

                self._optimizer.zero_grad()

                loss = loss_fnc(positive_prediction,
                                negative_prediction,
                                mask=(sequence_var != PADDING_IDX))
                epoch_loss += loss.data[0]

                loss.backward()
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1

            if verbose:
                print('Epoch {}: loss {}'.format(epoch_num, epoch_loss))
Esempio n. 8
0
min_sequence_length = 10
max_sequence_length = 200
step_size = 1

train = train.to_sequence(max_sequence_length=max_sequence_length,
                          min_sequence_length=min_sequence_length,
                          step_size=step_size)
test = test.to_sequence(max_sequence_length=max_sequence_length,
                        min_sequence_length=min_sequence_length,
                        step_size=step_size)
validation = validation.to_sequence(max_sequence_length=max_sequence_length,
                                    min_sequence_length=min_sequence_length,
                                    step_size=step_size)

net = LSTMNet(len(set(item2idx)),
              embedding_dim=32,
              item_embedding_layer=None,
              sparse=False)
model = ImplicitSequenceModel(loss='adaptive_hinge',
                              representation=net,
                              batch_size=32,
                              learning_rate=0.01,
                              l2=10e-6,
                              n_iter=10,
                              use_cuda=False,
                              random_state=random_state)
model.fit(train, verbose=True)

test_mrr = sequence_mrr_score(model, test)
val_mrr = sequence_mrr_score(model, validation)
train_mrr = sequence_mrr_score(model, train)
Esempio n. 9
0
class ImplicitSequenceModel(object):
    """
    Model for sequential recommendations using implicit feedback.

    Parameters
    ----------

    loss: string, optional
        The loss function for approximating a softmax with negative sampling.
        One of 'pointwise', 'bpr', 'hinge', 'adaptive_hinge', corresponding
        to losses from :class:`spotlight.losses`.
    representation: string or instance of :class:`spotlight.sequence.representations`, optional
        Sequence representation to use. If string, it must be one
        of 'pooling', 'cnn', 'lstm'; otherwise must be one of the
        representations from :class:`spotlight.sequence.representations`
    embedding_dim: int, optional
        Number of embedding dimensions to use for representing items.
        Overriden if representation is an instance of a representation class.
    n_iter: int, optional
        Number of iterations to run.
    batch_size: int, optional
        Minibatch size.
    l2: float, optional
        L2 loss penalty.
    learning_rate: float, optional
        Initial learning rate.
    optimizer_func: function, optional
        Function that takes in module parameters as the first argument and
        returns an instance of a Pytorch optimizer. Overrides l2 and learning
        rate if supplied. If no optimizer supplied, then use ADAM by default.
    use_cuda: boolean, optional
        Run the model on a GPU.
    sparse: boolean, optional
        Use sparse gradients for embedding layers.
    random_state: instance of numpy.random.RandomState, optional
        Random state to use when fitting.

    Notes
    -----

    During fitting, the model computes the loss for each timestep of the
    supplied sequence. For example, suppose the following sequences are
    passed to the ``fit`` function:

    .. code-block:: python

       [[1, 2, 3, 4, 5],
        [0, 0, 7, 1, 4]]


    In this case, the loss for the first example will be the mean loss
    of trying to predict ``2`` from ``[1]``, ``3`` from ``[1, 2]``,
    ``4`` from ``[1, 2, 3]`` and so on. This means that explicit padding
    of all subsequences is not necessary (although it is possible by using
    the ``step_size`` parameter of
    :func:`spotlight.interactions.Interactions.to_sequence`.
    """

    def __init__(self,
                 loss='pointwise',
                 representation='pooling',
                 embedding_dim=32,
                 n_iter=10,
                 batch_size=256,
                 l2=0.0,
                 learning_rate=1e-2,
                 optimizer_func=None,
                 use_cuda=False,
                 sparse=False,
                 random_state=None):

        assert loss in ('pointwise',
                        'bpr',
                        'hinge',
                        'adaptive_hinge')

        if isinstance(representation, str):
            assert representation in ('pooling',
                                      'cnn',
                                      'lstm')

        self._loss = loss
        self._representation = representation
        self._embedding_dim = embedding_dim
        self._n_iter = n_iter
        self._learning_rate = learning_rate
        self._batch_size = batch_size
        self._l2 = l2
        self._use_cuda = use_cuda
        self._sparse = sparse
        self._optimizer_func = optimizer_func
        self._random_state = random_state or np.random.RandomState()

        self._num_items = None
        self._net = None
        self._optimizer = None

        set_seed(self._random_state.randint(-10**8, 10**8),
                 cuda=self._use_cuda)

    def __repr__(self):

        return _repr_model(self)

    def fit(self, interactions, verbose=False):
        """
        Fit the model.

        Parameters
        ----------

        interactions: :class:`spotlight.interactions.SequenceInteractions`
            The input sequence dataset.
        """

        sequences = interactions.sequences.astype(np.int64)

        self._num_items = interactions.num_items

        if self._representation == 'pooling':
            self._net = PoolNet(self._num_items,
                                self._embedding_dim,
                                sparse=self._sparse)
        elif self._representation == 'cnn':
            self._net = CNNNet(self._num_items,
                               self._embedding_dim,
                               sparse=self._sparse)
        elif self._representation == 'lstm':
            self._net = LSTMNet(self._num_items,
                                self._embedding_dim,
                                sparse=self._sparse)
        else:
            self._net = self._representation

        self._net = gpu(self._net, self._use_cuda)

        if self._optimizer is None:
            self._optimizer = optim.Adam(
                self._net.parameters(),
                weight_decay=self._l2,
                lr=self._learning_rate
            )
        else:
            self._optimizer = self._optimizer_func(self._net.parameters())

        if self._loss == 'pointwise':
            loss_fnc = pointwise_loss
        elif self._loss == 'bpr':
            loss_fnc = bpr_loss
        elif self._loss == 'hinge':
            loss_fnc = hinge_loss
        else:
            loss_fnc = adaptive_hinge_loss

        for epoch_num in range(self._n_iter):

            sequences = shuffle(sequences,
                                random_state=self._random_state)

            sequences_tensor = gpu(torch.from_numpy(sequences),
                                   self._use_cuda)

            epoch_loss = 0.0

            for minibatch_num, batch_sequence in enumerate(minibatch(sequences_tensor,
                                                                     batch_size=self._batch_size)):

                sequence_var = Variable(batch_sequence)

                user_representation, _ = self._net.user_representation(
                    sequence_var
                )

                positive_prediction = self._net(user_representation,
                                                sequence_var)

                if self._loss == 'adaptive_hinge':
                    negative_prediction = [self._get_negative_prediction(sequence_var.size(),
                                                                         user_representation)
                                           for __ in range(5)]
                else:
                    negative_prediction = self._get_negative_prediction(sequence_var.size(),
                                                                        user_representation)

                self._optimizer.zero_grad()

                loss = loss_fnc(positive_prediction,
                                negative_prediction,
                                mask=(sequence_var != PADDING_IDX))
                epoch_loss += loss.data[0]

                loss.backward()
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1

            if verbose:
                print('Epoch {}: loss {}'.format(epoch_num, epoch_loss))

    def _get_negative_prediction(self, shape, user_representation):

        negative_items = sample_items(
            self._num_items,
            shape,
            random_state=self._random_state)
        negative_var = Variable(
            gpu(torch.from_numpy(negative_items), self._use_cuda)
        )
        negative_prediction = self._net(user_representation, negative_var)

        return negative_prediction

    def predict(self, sequences, item_ids=None):
        """
        Make predictions: given a sequence of interactions, predict
        the next item in the sequence.

        Parameters
        ----------

        sequences: array, (1 x max_sequence_length)
            Array containing the indices of the items in the sequence.
        item_ids: array (num_items x 1), optional
            Array containing the item ids for which prediction scores
            are desired. If not supplied, predictions for all items
            will be computed.

        Returns
        -------

        predictions: array
            Predicted scores for all items in item_ids.
        """

        self._net.train(False)

        sequences = np.atleast_2d(sequences)

        if item_ids is None:
            item_ids = np.arange(self._num_items).reshape(-1, 1)

        sequences = torch.from_numpy(sequences.astype(np.int64).reshape(1, -1))
        item_ids = torch.from_numpy(item_ids.astype(np.int64))

        sequence_var = Variable(gpu(sequences, self._use_cuda))
        item_var = Variable(gpu(item_ids, self._use_cuda))

        _, sequence_representations = self._net.user_representation(sequence_var)
        out = self._net(sequence_representations.repeat(len(item_var), 1),
                        item_var)

        return cpu(out.data).numpy().flatten()
Esempio n. 10
0
    def fit(self, interactions, verbose=False):
        """
        Fit the model.

        Parameters
        ----------

        interactions: :class:`spotlight.interactions.SequenceInteractions`
            The input sequence dataset.
        """

        sequences = interactions.sequences.astype(np.int64)

        self._num_items = interactions.num_items

        if self._representation == 'pooling':
            self._net = PoolNet(self._num_items,
                                self._embedding_dim,
                                sparse=self._sparse)
        elif self._representation == 'cnn':
            self._net = CNNNet(self._num_items,
                               self._embedding_dim,
                               sparse=self._sparse)
        elif self._representation == 'lstm':
            self._net = LSTMNet(self._num_items,
                                self._embedding_dim,
                                sparse=self._sparse)
        else:
            self._net = self._representation

        self._net = gpu(self._net, self._use_cuda)

        if self._optimizer is None:
            self._optimizer = optim.Adam(
                self._net.parameters(),
                weight_decay=self._l2,
                lr=self._learning_rate
            )
        else:
            self._optimizer = self._optimizer_func(self._net.parameters())

        if self._loss == 'pointwise':
            loss_fnc = pointwise_loss
        elif self._loss == 'bpr':
            loss_fnc = bpr_loss
        elif self._loss == 'hinge':
            loss_fnc = hinge_loss
        else:
            loss_fnc = adaptive_hinge_loss

        for epoch_num in range(self._n_iter):

            sequences = shuffle(sequences,
                                random_state=self._random_state)

            sequences_tensor = gpu(torch.from_numpy(sequences),
                                   self._use_cuda)

            epoch_loss = 0.0

            for minibatch_num, batch_sequence in enumerate(minibatch(sequences_tensor,
                                                                     batch_size=self._batch_size)):

                sequence_var = Variable(batch_sequence)

                user_representation, _ = self._net.user_representation(
                    sequence_var
                )

                positive_prediction = self._net(user_representation,
                                                sequence_var)

                if self._loss == 'adaptive_hinge':
                    negative_prediction = [self._get_negative_prediction(sequence_var.size(),
                                                                         user_representation)
                                           for __ in range(5)]
                else:
                    negative_prediction = self._get_negative_prediction(sequence_var.size(),
                                                                        user_representation)

                self._optimizer.zero_grad()

                loss = loss_fnc(positive_prediction,
                                negative_prediction,
                                mask=(sequence_var != PADDING_IDX))
                epoch_loss += loss.data[0]

                loss.backward()
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1

            if verbose:
                print('Epoch {}: loss {}'.format(epoch_num, epoch_loss))
Esempio n. 11
0
    def objective(hyper):

        print(hyper)

        start = time.clock()

        h = hyper['model']

        cls = ImplicitSequenceModel

        if h['type'] == 'pooling':
            representation = PoolNet(train.num_items,
                                     embedding_dim=int(h['embedding_dim']))
        elif h['type'] == 'lstm':
            representation = LSTMNet(train.num_items,
                                     embedding_dim=int(h['embedding_dim']))
        elif h['type'] == 'mixture':
            num_components = int(h['num_components'])
            embedding_dim = int(h['embedding_dim'])
            representation = MixtureLSTMNet(train.num_items,
                                            num_components=num_components,
                                            embedding_dim=embedding_dim)
        elif h['type'] == 'mixture2':
            num_components = int(h['num_components'])
            embedding_dim = int(h['embedding_dim'])
            representation = Mixture2LSTMNet(train.num_items,
                                             num_components=num_components,
                                             embedding_dim=embedding_dim)
        elif h['type'] == 'linear_mixture':
            num_components = int(h['num_components'])
            embedding_dim = int(h['embedding_dim'])
            representation = LinearMixtureLSTMNet(train.num_items,
                                                  num_components=num_components,
                                                  embedding_dim=embedding_dim)
        elif h['type'] == 'diversified_mixture_fixed':
            num_components = int(h['num_components'])
            embedding_dim = int(h['embedding_dim'])
            representation = DiversifiedMixtureLSTMNet(train.num_items,
                                                       num_components=num_components,
                                                       diversity_penalty=h['diversity_penalty'],
                                                       embedding_dim=embedding_dim)
            cls = DiversifiedImplicitSequenceModel
        else:
            raise ValueError('Unknown model type')

        model = cls(
            batch_size=int(h['batch_size']),
            loss=h['loss'],
            learning_rate=h['learning_rate'],
            l2=h['l2'],
            n_iter=int(h['n_iter']),
            representation=representation,
            use_cuda=CUDA,
            random_state=np.random.RandomState(42)
        )

        try:
            model.fit(train, verbose=True)
        except ValueError:
            elapsed = time.clock() - start
            return {'loss': 0.0,
                    'status': STATUS_FAIL,
                    'validation_mrr': 0.0,
                    'test_mrr': 0.0,
                    'elapsed': elapsed,
                    'hyper': h}

        elapsed = time.clock() - start

        print(model)

        validation_mrr = sequence_mrr_score(
            model,
            validation,
            exclude_preceding=True
        ).mean()
        test_mrr = sequence_mrr_score(
            model,
            test,
            exclude_preceding=True
        ).mean()

        print('MRR {} {}'.format(validation_mrr, test_mrr))

        if np.isnan(validation_mrr):
            status = STATUS_FAIL
        else:
            status = STATUS_OK

        return {'loss': -validation_mrr,
                'status': status,
                'validation_mrr': validation_mrr,
                'test_mrr': test_mrr,
                'elapsed': elapsed,
                'hyper': h}
Esempio n. 12
0
class ImplicitSequenceModel(object):
    """
    Model for sequential recommendations using implicit feedback.

    Parameters
    ----------

    loss: string, optional
        The loss function for approximating a softmax with negative sampling.
        One of 'pointwise', 'bpr', 'hinge', 'adaptive_hinge', corresponding
        to losses from :class:`spotlight.losses`.
    representation: string or instance of :class:`spotlight.sequence.representations`, optional
        Sequence representation to use. If string, it must be one
        of 'pooling', 'cnn', 'lstm'; otherwise must be one of the
        representations from :class:`spotlight.sequence.representations`
    embedding_dim: int, optional
        Number of embedding dimensions to use for representing items.
        Overriden if representation is an instance of a representation class.
    n_iter: int, optional
        Number of iterations to run.
    batch_size: int, optional
        Minibatch size.
    l2: float, optional
        L2 loss penalty.
    learning_rate: float, optional
        Initial learning rate.
    optimizer: instance of a PyTorch optimizer, optional
        Overrides l2 and learning rate if supplied.
    use_cuda: boolean, optional
        Run the model on a GPU.
    sparse: boolean, optional
        Use sparse gradients for embedding layers.
    random_state: instance of numpy.random.RandomState, optional
        Random state to use when fitting.
    """
    def __init__(self,
                 loss='pointwise',
                 representation='pooling',
                 embedding_dim=32,
                 n_iter=10,
                 batch_size=256,
                 l2=0.0,
                 learning_rate=1e-2,
                 optimizer=None,
                 use_cuda=False,
                 sparse=False,
                 random_state=None):

        assert loss in ('pointwise', 'bpr', 'hinge', 'adaptive_hinge')

        if isinstance(representation, str):
            assert representation in ('pooling', 'cnn', 'lstm')

        self._loss = loss
        self._representation = representation
        self._embedding_dim = embedding_dim
        self._n_iter = n_iter
        self._learning_rate = learning_rate
        self._batch_size = batch_size
        self._l2 = l2
        self._use_cuda = use_cuda
        self._sparse = sparse
        self._optimizer = None
        self._random_state = random_state or np.random.RandomState()

        self._num_items = None
        self._net = None

        set_seed(self._random_state.randint(-10**8, 10**8),
                 cuda=self._use_cuda)

    def fit(self, interactions, verbose=False):
        """
        Fit the model.

        Parameters
        ----------

        interactions: :class:`spotlight.interactions.SequenceInteractions`
            The input sequence dataset.
        """

        sequences = interactions.sequences.astype(np.int64)

        self._num_items = interactions.num_items

        if self._representation == 'pooling':
            self._net = PoolNet(self._num_items,
                                self._embedding_dim,
                                sparse=self._sparse)
        elif self._representation == 'cnn':
            self._net = CNNNet(self._num_items,
                               self._embedding_dim,
                               sparse=self._sparse)
        elif self._representation == 'lstm':
            self._net = LSTMNet(self._num_items,
                                self._embedding_dim,
                                sparse=self._sparse)
        else:
            self._net = self._representation

        if self._optimizer is None:
            self._optimizer = optim.Adam(self._net.parameters(),
                                         weight_decay=self._l2,
                                         lr=self._learning_rate)

        if self._loss == 'pointwise':
            loss_fnc = pointwise_loss
        elif self._loss == 'bpr':
            loss_fnc = bpr_loss
        else:
            loss_fnc = hinge_loss

        for epoch_num in range(self._n_iter):

            sequences = shuffle(sequences, random_state=self._random_state)

            sequences_tensor = gpu(torch.from_numpy(sequences), self._use_cuda)

            epoch_loss = 0.0

            for batch_sequence in minibatch(sequences_tensor,
                                            batch_size=self._batch_size):

                sequence_var = Variable(batch_sequence)

                user_representation, _ = self._net.user_representation(
                    sequence_var)

                positive_prediction = self._net(user_representation,
                                                sequence_var)

                if self._loss == 'adaptive_hinge':
                    raise NotImplementedError
                else:
                    negative_items = sample_items(
                        self._num_items,
                        batch_sequence.size(),
                        random_state=self._random_state)
                    negative_var = Variable(
                        gpu(torch.from_numpy(negative_items)))
                    negative_prediction = self._net(user_representation,
                                                    negative_var)

                self._optimizer.zero_grad()

                loss = loss_fnc(positive_prediction,
                                negative_prediction,
                                mask=(sequence_var != PADDING_IDX))
                epoch_loss += loss.data[0]

                loss.backward()
                self._optimizer.step()

            if verbose:
                print('Epoch {}: loss {}'.format(epoch_num, epoch_loss))

    # def _get_adaptive_negatives(self, user_ids, num_neg_candidates=5):

    #     negatives = Variable(
    #         gpu(
    #             torch.from_numpy(
    #                 self._random_state
    #                 .randint(0, self._num_items,
    #                          (len(user_ids), num_neg_candidates))),
    #             self._use_cuda)
    #     )
    #     negative_predictions = self._net(
    #         user_ids.repeat(num_neg_candidates, 1).transpose(0, 1),
    #         negatives
    #     ).view(-1, num_neg_candidates)

    #     best_negative_prediction, _ = negative_predictions.max(1)

    #     return best_negative_prediction

    def predict(self, sequences, item_ids=None):
        """
        Make predictions: given a sequence of interactions, predict
        the next item in the sequence.

        Parameters
        ----------

        sequences: array, (1 x max_sequence_length)
            Array containing the indices of the items in the sequence.
        item_ids: array (num_items x 1), optional
            Array containing the item ids for which prediction scores
            are desired. If not supplied, predictions for all items
            will be computed.

        Returns
        -------

        predictions: array
            Predicted scores for all items in item_ids.
        """

        sequences = np.atleast_2d(sequences)

        if item_ids is None:
            item_ids = np.arange(self._num_items).reshape(-1, 1)

        sequences = torch.from_numpy(sequences.astype(np.int64).reshape(1, -1))
        item_ids = torch.from_numpy(item_ids.astype(np.int64))

        sequence_var = Variable(gpu(sequences, self._use_cuda))
        item_var = Variable(gpu(item_ids, self._use_cuda))

        _, sequence_representations = self._net.user_representation(
            sequence_var)
        out = self._net(sequence_representations.repeat(len(item_var), 1),
                        item_var)

        return cpu(out.data).numpy().flatten()