def build_sequence_model(hyperparameters, train, random_state): h = hyperparameters set_seed(42, CUDA) if h['compression_ratio'] < 1.0: item_embeddings = BloomEmbedding( train.num_items, h['embedding_dim'], compression_ratio=h['compression_ratio'], num_hash_functions=4, padding_idx=0) else: item_embeddings = ScaledEmbedding(train.num_items, h['embedding_dim'], padding_idx=0) network = LSTMNet(train.num_items, h['embedding_dim'], item_embedding_layer=item_embeddings) model = ImplicitSequenceModel(loss=h['loss'], n_iter=h['n_iter'], batch_size=h['batch_size'], learning_rate=h['learning_rate'], embedding_dim=h['embedding_dim'], l2=h['l2'], representation=network, use_cuda=CUDA, random_state=np.random.RandomState(42)) return model
def test_bloom_lstm(compression_ratio, expected_mrr): random_state = np.random.RandomState(RANDOM_SEED) train, test = _get_synthetic_data(randomness=1e-03, num_interactions=20000, random_state=random_state) embedding = BloomEmbedding(train.num_items, 32, compression_ratio=compression_ratio, num_hash_functions=4) representation = LSTMNet(train.num_items, embedding_dim=EMBEDDING_DIM, item_embedding_layer=embedding) model = ImplicitSequenceModel(loss=LOSS, representation=representation, batch_size=BATCH_SIZE, learning_rate=1e-2, l2=1e-7, n_iter=NUM_EPOCHS * 5, random_state=random_state, use_cuda=CUDA) model.fit(train, verbose=VERBOSE) mrr = _evaluate(model, test) assert mrr.mean() > expected_mrr
def _initialize(self, interactions): self._num_items = interactions.num_items if self._representation == 'pooling': self._net = PoolNet(self._num_items, self._embedding_dim, sparse=self._sparse) elif self._representation == 'cnn': self._net = CNNNet(self._num_items, self._embedding_dim, sparse=self._sparse) elif self._representation == 'lstm': self._net = LSTMNet(self._num_items, self._embedding_dim, sparse=self._sparse) elif self._representation == 'mixture': self._net = MixtureLSTMNet(self._num_items, self._embedding_dim, sparse=self._sparse) else: self._net = self._representation self._net = gpu(self._net, self._use_cuda) if self._optimizer_func is None: self._optimizer = optim.Adam(self._net.parameters(), weight_decay=self._l2, lr=self._learning_rate) else: self._optimizer = self._optimizer_func(self._net.parameters()) if self._loss == 'pointwise': self._loss_func = pointwise_loss elif self._loss == 'bpr': self._loss_func = bpr_loss elif self._loss == 'hinge': self._loss_func = hinge_loss else: self._loss_func = adaptive_hinge_loss
def objective(hyper): print(hyper) start = time.clock() if hyper['model']['type'] == 'lsh': num_hashes = int(hyper['model']['num_hash_functions']) num_layers = int(hyper['model']['num_layers']) nonlinearity = hyper['model']['nonlinearity'] residual = hyper['model']['residual'] embed = hyper['model']['embed'] item_embeddings = LSHEmbedding(train.num_items, int(hyper['embedding_dim']), embed=embed, residual_connections=residual, nonlinearity=nonlinearity, num_layers=num_layers, num_hash_functions=num_hashes) item_embeddings.fit(train_nonsequence.tocsr().T) else: item_embeddings = ScaledEmbedding(train.num_items, int(hyper['embedding_dim']), padding_idx=0) network = LSTMNet(train.num_items, int(hyper['embedding_dim']), item_embedding_layer=item_embeddings) model = ImplicitSequenceModel(loss=hyper['loss'], n_iter=int(hyper['n_iter']), batch_size=int(hyper['batch_size']), learning_rate=hyper['learning_rate'], embedding_dim=int( hyper['embedding_dim']), l2=hyper['l2'], representation=network, use_cuda=CUDA, random_state=random_state) model.fit(train, verbose=True) elapsed = time.clock() - start print(model) validation_mrr = sequence_mrr_score(model, validation).mean() test_mrr = sequence_mrr_score(model, test).mean() print('MRR {} {}'.format(validation_mrr, test_mrr)) return { 'loss': -validation_mrr, 'status': STATUS_OK, 'validation_mrr': validation_mrr, 'test_mrr': test_mrr, 'elapsed': elapsed, 'hyper': hyper }
def sequence_model(num_embeddings, bloom): if bloom: item_embeddings = BloomEmbedding(num_embeddings, EMBEDDING_DIM, num_hash_functions=NUM_HASH_FUNCTIONS) else: item_embeddings = ScaledEmbedding(num_embeddings, EMBEDDING_DIM) network = LSTMNet(num_embeddings, EMBEDDING_DIM, item_embedding_layer=item_embeddings) model = ImplicitSequenceModel( loss='adaptive_hinge', n_iter=N_ITER, batch_size=512, learning_rate=1e-3, l2=1e-2, representation=network, use_cuda=CUDA) return model
class ImplicitSequenceModel(object): """ Model for sequential recommendations using implicit feedback. Parameters ---------- loss: string, optional The loss function for approximating a softmax with negative sampling. One of 'pointwise', 'bpr', 'hinge', 'adaptive_hinge', corresponding to losses from :class:`spotlight.losses`. representation: string or instance of :class:`spotlight.sequence.representations`, optional Sequence representation to use. If string, it must be one of 'pooling', 'cnn', 'lstm'; otherwise must be one of the representations from :class:`spotlight.sequence.representations` embedding_dim: int, optional Number of embedding dimensions to use for representing items. Overridden if representation is an instance of a representation class. n_iter: int, optional Number of iterations to run. batch_size: int, optional Minibatch size. l2: float, optional L2 loss penalty. learning_rate: float, optional Initial learning rate. optimizer_func: function, optional Function that takes in module parameters as the first argument and returns an instance of a PyTorch optimizer. Overrides l2 and learning rate if supplied. If no optimizer supplied, then use ADAM by default. use_cuda: boolean, optional Run the model on a GPU. sparse: boolean, optional Use sparse gradients for embedding layers. random_state: instance of numpy.random.RandomState, optional Random state to use when fitting. num_negative_samples: int, optional Number of negative samples to generate for adaptive hinge loss. Notes ----- During fitting, the model computes the loss for each timestep of the supplied sequence. For example, suppose the following sequences are passed to the ``fit`` function: .. code-block:: python [[1, 2, 3, 4, 5], [0, 0, 7, 1, 4]] In this case, the loss for the first example will be the mean loss of trying to predict ``2`` from ``[1]``, ``3`` from ``[1, 2]``, ``4`` from ``[1, 2, 3]`` and so on. This means that explicit padding of all subsequences is not necessary (although it is possible by using the ``step_size`` parameter of :func:`spotlight.interactions.Interactions.to_sequence`. """ def __init__(self, loss='pointwise', representation='pooling', embedding_dim=32, n_iter=10, batch_size=256, l2=0.0, learning_rate=1e-2, optimizer_func=None, use_cuda=False, sparse=False, random_state=None, num_negative_samples=5): assert loss in ('pointwise', 'bpr', 'hinge', 'adaptive_hinge') if isinstance(representation, str): assert representation in ('pooling', 'cnn', 'lstm') self._loss = loss self._representation = representation self._embedding_dim = embedding_dim self._n_iter = n_iter self._learning_rate = learning_rate self._batch_size = batch_size self._l2 = l2 self._use_cuda = use_cuda self._sparse = sparse self._optimizer_func = optimizer_func self._random_state = random_state or np.random.RandomState() self._num_negative_samples = num_negative_samples self._num_items = None self._net = None self._optimizer = None self._loss_func = None set_seed(self._random_state.randint(-10**8, 10**8), cuda=self._use_cuda) def __repr__(self): return _repr_model(self) @property def _initialized(self): return self._net is not None def _initialize(self, interactions): self._num_items = interactions.num_items if self._representation == 'pooling': self._net = PoolNet(self._num_items, self._embedding_dim, sparse=self._sparse) elif self._representation == 'cnn': self._net = CNNNet(self._num_items, self._embedding_dim, sparse=self._sparse) elif self._representation == 'lstm': self._net = LSTMNet(self._num_items, self._embedding_dim, sparse=self._sparse) else: self._net = self._representation self._net = gpu(self._net, self._use_cuda) if self._optimizer_func is None: self._optimizer = optim.Adam(self._net.parameters(), weight_decay=self._l2, lr=self._learning_rate) else: self._optimizer = self._optimizer_func(self._net.parameters()) if self._loss == 'pointwise': self._loss_func = pointwise_loss elif self._loss == 'bpr': self._loss_func = bpr_loss elif self._loss == 'hinge': self._loss_func = hinge_loss else: self._loss_func = adaptive_hinge_loss def _check_input(self, item_ids): if isinstance(item_ids, int): item_id_max = item_ids else: item_id_max = item_ids.max() if item_id_max >= self._num_items: raise ValueError('Maximum item id greater ' 'than number of items in model.') def fit(self, interactions, verbose=False): """ Fit the model. When called repeatedly, model fitting will resume from the point at which training stopped in the previous fit call. Parameters ---------- interactions: :class:`spotlight.interactions.SequenceInteractions` The input sequence dataset. """ sequences = interactions.sequences.astype(np.int64) if not self._initialized: self._initialize(interactions) self._check_input(sequences) for epoch_num in range(self._n_iter): sequences = shuffle(sequences, random_state=self._random_state) sequences_tensor = gpu(torch.from_numpy(sequences), self._use_cuda) epoch_loss = 0.0 for minibatch_num, batch_sequence in enumerate( minibatch(sequences_tensor, batch_size=self._batch_size)): sequence_var = Variable(batch_sequence) user_representation, _ = self._net.user_representation( sequence_var) positive_prediction = self._net(user_representation, sequence_var) if self._loss == 'adaptive_hinge': negative_prediction = self._get_multiple_negative_predictions( sequence_var.size(), user_representation, n=self._num_negative_samples) else: negative_prediction = self._get_negative_prediction( sequence_var.size(), user_representation) self._optimizer.zero_grad() loss = self._loss_func(positive_prediction, negative_prediction, mask=(sequence_var != PADDING_IDX)) epoch_loss += loss.data[0] loss.backward() self._optimizer.step() epoch_loss /= minibatch_num + 1 if verbose: print('Epoch {}: loss {}'.format(epoch_num, epoch_loss)) if np.isnan(epoch_loss) or epoch_loss == 0.0: raise ValueError( 'Degenerate epoch loss: {}'.format(epoch_loss)) def _get_negative_prediction(self, shape, user_representation): negative_items = sample_items(self._num_items, shape, random_state=self._random_state) negative_var = Variable( gpu(torch.from_numpy(negative_items), self._use_cuda)) negative_prediction = self._net(user_representation, negative_var) return negative_prediction def _get_multiple_negative_predictions(self, shape, user_representation, n=5): batch_size, sliding_window = shape size = (n, ) + (1, ) * (user_representation.dim() - 1) negative_prediction = self._get_negative_prediction( (n * batch_size, sliding_window), user_representation.repeat(*size)) return negative_prediction.view(n, batch_size, sliding_window) def predict(self, sequences, item_ids=None): """ Make predictions: given a sequence of interactions, predict the next item in the sequence. Parameters ---------- sequences: array, (1 x max_sequence_length) Array containing the indices of the items in the sequence. item_ids: array (num_items x 1), optional Array containing the item ids for which prediction scores are desired. If not supplied, predictions for all items will be computed. Returns ------- predictions: array Predicted scores for all items in item_ids. """ self._net.train(False) sequences = np.atleast_2d(sequences) if item_ids is None: item_ids = np.arange(self._num_items).reshape(-1, 1) self._check_input(item_ids) self._check_input(sequences) sequences = torch.from_numpy(sequences.astype(np.int64).reshape(1, -1)) item_ids = torch.from_numpy(item_ids.astype(np.int64)) sequence_var = Variable(gpu(sequences, self._use_cuda)) item_var = Variable(gpu(item_ids, self._use_cuda)) _, sequence_representations = self._net.user_representation( sequence_var) size = (len(item_var), ) + sequence_representations.size()[1:] out = self._net(sequence_representations.expand(*size), item_var) return cpu(out.data).numpy().flatten()
def fit(self, interactions, verbose=False): """ Fit the model. Parameters ---------- interactions: :class:`spotlight.interactions.SequenceInteractions` The input sequence dataset. """ sequences = interactions.sequences.astype(np.int64) self._num_items = interactions.num_items if self._representation == 'pooling': self._net = PoolNet(self._num_items, self._embedding_dim, sparse=self._sparse) elif self._representation == 'cnn': self._net = CNNNet(self._num_items, self._embedding_dim, sparse=self._sparse) elif self._representation == 'lstm': self._net = LSTMNet(self._num_items, self._embedding_dim, sparse=self._sparse) else: self._net = self._representation self._net = gpu(self._net, self._use_cuda) if self._optimizer is None: self._optimizer = optim.Adam(self._net.parameters(), weight_decay=self._l2, lr=self._learning_rate) if self._loss == 'pointwise': loss_fnc = pointwise_loss elif self._loss == 'bpr': loss_fnc = bpr_loss elif self._loss == 'hinge': loss_fnc = hinge_loss else: loss_fnc = adaptive_hinge_loss for epoch_num in range(self._n_iter): sequences = shuffle(sequences, random_state=self._random_state) sequences_tensor = gpu(torch.from_numpy(sequences), self._use_cuda) epoch_loss = 0.0 for minibatch_num, batch_sequence in enumerate( minibatch(sequences_tensor, batch_size=self._batch_size)): sequence_var = Variable(batch_sequence) user_representation, _ = self._net.user_representation( sequence_var) positive_prediction = self._net(user_representation, sequence_var) if self._loss == 'adaptive_hinge': negative_prediction = [ self._get_negative_prediction(sequence_var.size(), user_representation) for __ in range(5) ] else: negative_prediction = self._get_negative_prediction( sequence_var.size(), user_representation) self._optimizer.zero_grad() loss = loss_fnc(positive_prediction, negative_prediction, mask=(sequence_var != PADDING_IDX)) epoch_loss += loss.data[0] loss.backward() self._optimizer.step() epoch_loss /= minibatch_num + 1 if verbose: print('Epoch {}: loss {}'.format(epoch_num, epoch_loss))
min_sequence_length = 10 max_sequence_length = 200 step_size = 1 train = train.to_sequence(max_sequence_length=max_sequence_length, min_sequence_length=min_sequence_length, step_size=step_size) test = test.to_sequence(max_sequence_length=max_sequence_length, min_sequence_length=min_sequence_length, step_size=step_size) validation = validation.to_sequence(max_sequence_length=max_sequence_length, min_sequence_length=min_sequence_length, step_size=step_size) net = LSTMNet(len(set(item2idx)), embedding_dim=32, item_embedding_layer=None, sparse=False) model = ImplicitSequenceModel(loss='adaptive_hinge', representation=net, batch_size=32, learning_rate=0.01, l2=10e-6, n_iter=10, use_cuda=False, random_state=random_state) model.fit(train, verbose=True) test_mrr = sequence_mrr_score(model, test) val_mrr = sequence_mrr_score(model, validation) train_mrr = sequence_mrr_score(model, train)
class ImplicitSequenceModel(object): """ Model for sequential recommendations using implicit feedback. Parameters ---------- loss: string, optional The loss function for approximating a softmax with negative sampling. One of 'pointwise', 'bpr', 'hinge', 'adaptive_hinge', corresponding to losses from :class:`spotlight.losses`. representation: string or instance of :class:`spotlight.sequence.representations`, optional Sequence representation to use. If string, it must be one of 'pooling', 'cnn', 'lstm'; otherwise must be one of the representations from :class:`spotlight.sequence.representations` embedding_dim: int, optional Number of embedding dimensions to use for representing items. Overriden if representation is an instance of a representation class. n_iter: int, optional Number of iterations to run. batch_size: int, optional Minibatch size. l2: float, optional L2 loss penalty. learning_rate: float, optional Initial learning rate. optimizer_func: function, optional Function that takes in module parameters as the first argument and returns an instance of a Pytorch optimizer. Overrides l2 and learning rate if supplied. If no optimizer supplied, then use ADAM by default. use_cuda: boolean, optional Run the model on a GPU. sparse: boolean, optional Use sparse gradients for embedding layers. random_state: instance of numpy.random.RandomState, optional Random state to use when fitting. Notes ----- During fitting, the model computes the loss for each timestep of the supplied sequence. For example, suppose the following sequences are passed to the ``fit`` function: .. code-block:: python [[1, 2, 3, 4, 5], [0, 0, 7, 1, 4]] In this case, the loss for the first example will be the mean loss of trying to predict ``2`` from ``[1]``, ``3`` from ``[1, 2]``, ``4`` from ``[1, 2, 3]`` and so on. This means that explicit padding of all subsequences is not necessary (although it is possible by using the ``step_size`` parameter of :func:`spotlight.interactions.Interactions.to_sequence`. """ def __init__(self, loss='pointwise', representation='pooling', embedding_dim=32, n_iter=10, batch_size=256, l2=0.0, learning_rate=1e-2, optimizer_func=None, use_cuda=False, sparse=False, random_state=None): assert loss in ('pointwise', 'bpr', 'hinge', 'adaptive_hinge') if isinstance(representation, str): assert representation in ('pooling', 'cnn', 'lstm') self._loss = loss self._representation = representation self._embedding_dim = embedding_dim self._n_iter = n_iter self._learning_rate = learning_rate self._batch_size = batch_size self._l2 = l2 self._use_cuda = use_cuda self._sparse = sparse self._optimizer_func = optimizer_func self._random_state = random_state or np.random.RandomState() self._num_items = None self._net = None self._optimizer = None set_seed(self._random_state.randint(-10**8, 10**8), cuda=self._use_cuda) def __repr__(self): return _repr_model(self) def fit(self, interactions, verbose=False): """ Fit the model. Parameters ---------- interactions: :class:`spotlight.interactions.SequenceInteractions` The input sequence dataset. """ sequences = interactions.sequences.astype(np.int64) self._num_items = interactions.num_items if self._representation == 'pooling': self._net = PoolNet(self._num_items, self._embedding_dim, sparse=self._sparse) elif self._representation == 'cnn': self._net = CNNNet(self._num_items, self._embedding_dim, sparse=self._sparse) elif self._representation == 'lstm': self._net = LSTMNet(self._num_items, self._embedding_dim, sparse=self._sparse) else: self._net = self._representation self._net = gpu(self._net, self._use_cuda) if self._optimizer is None: self._optimizer = optim.Adam( self._net.parameters(), weight_decay=self._l2, lr=self._learning_rate ) else: self._optimizer = self._optimizer_func(self._net.parameters()) if self._loss == 'pointwise': loss_fnc = pointwise_loss elif self._loss == 'bpr': loss_fnc = bpr_loss elif self._loss == 'hinge': loss_fnc = hinge_loss else: loss_fnc = adaptive_hinge_loss for epoch_num in range(self._n_iter): sequences = shuffle(sequences, random_state=self._random_state) sequences_tensor = gpu(torch.from_numpy(sequences), self._use_cuda) epoch_loss = 0.0 for minibatch_num, batch_sequence in enumerate(minibatch(sequences_tensor, batch_size=self._batch_size)): sequence_var = Variable(batch_sequence) user_representation, _ = self._net.user_representation( sequence_var ) positive_prediction = self._net(user_representation, sequence_var) if self._loss == 'adaptive_hinge': negative_prediction = [self._get_negative_prediction(sequence_var.size(), user_representation) for __ in range(5)] else: negative_prediction = self._get_negative_prediction(sequence_var.size(), user_representation) self._optimizer.zero_grad() loss = loss_fnc(positive_prediction, negative_prediction, mask=(sequence_var != PADDING_IDX)) epoch_loss += loss.data[0] loss.backward() self._optimizer.step() epoch_loss /= minibatch_num + 1 if verbose: print('Epoch {}: loss {}'.format(epoch_num, epoch_loss)) def _get_negative_prediction(self, shape, user_representation): negative_items = sample_items( self._num_items, shape, random_state=self._random_state) negative_var = Variable( gpu(torch.from_numpy(negative_items), self._use_cuda) ) negative_prediction = self._net(user_representation, negative_var) return negative_prediction def predict(self, sequences, item_ids=None): """ Make predictions: given a sequence of interactions, predict the next item in the sequence. Parameters ---------- sequences: array, (1 x max_sequence_length) Array containing the indices of the items in the sequence. item_ids: array (num_items x 1), optional Array containing the item ids for which prediction scores are desired. If not supplied, predictions for all items will be computed. Returns ------- predictions: array Predicted scores for all items in item_ids. """ self._net.train(False) sequences = np.atleast_2d(sequences) if item_ids is None: item_ids = np.arange(self._num_items).reshape(-1, 1) sequences = torch.from_numpy(sequences.astype(np.int64).reshape(1, -1)) item_ids = torch.from_numpy(item_ids.astype(np.int64)) sequence_var = Variable(gpu(sequences, self._use_cuda)) item_var = Variable(gpu(item_ids, self._use_cuda)) _, sequence_representations = self._net.user_representation(sequence_var) out = self._net(sequence_representations.repeat(len(item_var), 1), item_var) return cpu(out.data).numpy().flatten()
def fit(self, interactions, verbose=False): """ Fit the model. Parameters ---------- interactions: :class:`spotlight.interactions.SequenceInteractions` The input sequence dataset. """ sequences = interactions.sequences.astype(np.int64) self._num_items = interactions.num_items if self._representation == 'pooling': self._net = PoolNet(self._num_items, self._embedding_dim, sparse=self._sparse) elif self._representation == 'cnn': self._net = CNNNet(self._num_items, self._embedding_dim, sparse=self._sparse) elif self._representation == 'lstm': self._net = LSTMNet(self._num_items, self._embedding_dim, sparse=self._sparse) else: self._net = self._representation self._net = gpu(self._net, self._use_cuda) if self._optimizer is None: self._optimizer = optim.Adam( self._net.parameters(), weight_decay=self._l2, lr=self._learning_rate ) else: self._optimizer = self._optimizer_func(self._net.parameters()) if self._loss == 'pointwise': loss_fnc = pointwise_loss elif self._loss == 'bpr': loss_fnc = bpr_loss elif self._loss == 'hinge': loss_fnc = hinge_loss else: loss_fnc = adaptive_hinge_loss for epoch_num in range(self._n_iter): sequences = shuffle(sequences, random_state=self._random_state) sequences_tensor = gpu(torch.from_numpy(sequences), self._use_cuda) epoch_loss = 0.0 for minibatch_num, batch_sequence in enumerate(minibatch(sequences_tensor, batch_size=self._batch_size)): sequence_var = Variable(batch_sequence) user_representation, _ = self._net.user_representation( sequence_var ) positive_prediction = self._net(user_representation, sequence_var) if self._loss == 'adaptive_hinge': negative_prediction = [self._get_negative_prediction(sequence_var.size(), user_representation) for __ in range(5)] else: negative_prediction = self._get_negative_prediction(sequence_var.size(), user_representation) self._optimizer.zero_grad() loss = loss_fnc(positive_prediction, negative_prediction, mask=(sequence_var != PADDING_IDX)) epoch_loss += loss.data[0] loss.backward() self._optimizer.step() epoch_loss /= minibatch_num + 1 if verbose: print('Epoch {}: loss {}'.format(epoch_num, epoch_loss))
def objective(hyper): print(hyper) start = time.clock() h = hyper['model'] cls = ImplicitSequenceModel if h['type'] == 'pooling': representation = PoolNet(train.num_items, embedding_dim=int(h['embedding_dim'])) elif h['type'] == 'lstm': representation = LSTMNet(train.num_items, embedding_dim=int(h['embedding_dim'])) elif h['type'] == 'mixture': num_components = int(h['num_components']) embedding_dim = int(h['embedding_dim']) representation = MixtureLSTMNet(train.num_items, num_components=num_components, embedding_dim=embedding_dim) elif h['type'] == 'mixture2': num_components = int(h['num_components']) embedding_dim = int(h['embedding_dim']) representation = Mixture2LSTMNet(train.num_items, num_components=num_components, embedding_dim=embedding_dim) elif h['type'] == 'linear_mixture': num_components = int(h['num_components']) embedding_dim = int(h['embedding_dim']) representation = LinearMixtureLSTMNet(train.num_items, num_components=num_components, embedding_dim=embedding_dim) elif h['type'] == 'diversified_mixture_fixed': num_components = int(h['num_components']) embedding_dim = int(h['embedding_dim']) representation = DiversifiedMixtureLSTMNet(train.num_items, num_components=num_components, diversity_penalty=h['diversity_penalty'], embedding_dim=embedding_dim) cls = DiversifiedImplicitSequenceModel else: raise ValueError('Unknown model type') model = cls( batch_size=int(h['batch_size']), loss=h['loss'], learning_rate=h['learning_rate'], l2=h['l2'], n_iter=int(h['n_iter']), representation=representation, use_cuda=CUDA, random_state=np.random.RandomState(42) ) try: model.fit(train, verbose=True) except ValueError: elapsed = time.clock() - start return {'loss': 0.0, 'status': STATUS_FAIL, 'validation_mrr': 0.0, 'test_mrr': 0.0, 'elapsed': elapsed, 'hyper': h} elapsed = time.clock() - start print(model) validation_mrr = sequence_mrr_score( model, validation, exclude_preceding=True ).mean() test_mrr = sequence_mrr_score( model, test, exclude_preceding=True ).mean() print('MRR {} {}'.format(validation_mrr, test_mrr)) if np.isnan(validation_mrr): status = STATUS_FAIL else: status = STATUS_OK return {'loss': -validation_mrr, 'status': status, 'validation_mrr': validation_mrr, 'test_mrr': test_mrr, 'elapsed': elapsed, 'hyper': h}
class ImplicitSequenceModel(object): """ Model for sequential recommendations using implicit feedback. Parameters ---------- loss: string, optional The loss function for approximating a softmax with negative sampling. One of 'pointwise', 'bpr', 'hinge', 'adaptive_hinge', corresponding to losses from :class:`spotlight.losses`. representation: string or instance of :class:`spotlight.sequence.representations`, optional Sequence representation to use. If string, it must be one of 'pooling', 'cnn', 'lstm'; otherwise must be one of the representations from :class:`spotlight.sequence.representations` embedding_dim: int, optional Number of embedding dimensions to use for representing items. Overriden if representation is an instance of a representation class. n_iter: int, optional Number of iterations to run. batch_size: int, optional Minibatch size. l2: float, optional L2 loss penalty. learning_rate: float, optional Initial learning rate. optimizer: instance of a PyTorch optimizer, optional Overrides l2 and learning rate if supplied. use_cuda: boolean, optional Run the model on a GPU. sparse: boolean, optional Use sparse gradients for embedding layers. random_state: instance of numpy.random.RandomState, optional Random state to use when fitting. """ def __init__(self, loss='pointwise', representation='pooling', embedding_dim=32, n_iter=10, batch_size=256, l2=0.0, learning_rate=1e-2, optimizer=None, use_cuda=False, sparse=False, random_state=None): assert loss in ('pointwise', 'bpr', 'hinge', 'adaptive_hinge') if isinstance(representation, str): assert representation in ('pooling', 'cnn', 'lstm') self._loss = loss self._representation = representation self._embedding_dim = embedding_dim self._n_iter = n_iter self._learning_rate = learning_rate self._batch_size = batch_size self._l2 = l2 self._use_cuda = use_cuda self._sparse = sparse self._optimizer = None self._random_state = random_state or np.random.RandomState() self._num_items = None self._net = None set_seed(self._random_state.randint(-10**8, 10**8), cuda=self._use_cuda) def fit(self, interactions, verbose=False): """ Fit the model. Parameters ---------- interactions: :class:`spotlight.interactions.SequenceInteractions` The input sequence dataset. """ sequences = interactions.sequences.astype(np.int64) self._num_items = interactions.num_items if self._representation == 'pooling': self._net = PoolNet(self._num_items, self._embedding_dim, sparse=self._sparse) elif self._representation == 'cnn': self._net = CNNNet(self._num_items, self._embedding_dim, sparse=self._sparse) elif self._representation == 'lstm': self._net = LSTMNet(self._num_items, self._embedding_dim, sparse=self._sparse) else: self._net = self._representation if self._optimizer is None: self._optimizer = optim.Adam(self._net.parameters(), weight_decay=self._l2, lr=self._learning_rate) if self._loss == 'pointwise': loss_fnc = pointwise_loss elif self._loss == 'bpr': loss_fnc = bpr_loss else: loss_fnc = hinge_loss for epoch_num in range(self._n_iter): sequences = shuffle(sequences, random_state=self._random_state) sequences_tensor = gpu(torch.from_numpy(sequences), self._use_cuda) epoch_loss = 0.0 for batch_sequence in minibatch(sequences_tensor, batch_size=self._batch_size): sequence_var = Variable(batch_sequence) user_representation, _ = self._net.user_representation( sequence_var) positive_prediction = self._net(user_representation, sequence_var) if self._loss == 'adaptive_hinge': raise NotImplementedError else: negative_items = sample_items( self._num_items, batch_sequence.size(), random_state=self._random_state) negative_var = Variable( gpu(torch.from_numpy(negative_items))) negative_prediction = self._net(user_representation, negative_var) self._optimizer.zero_grad() loss = loss_fnc(positive_prediction, negative_prediction, mask=(sequence_var != PADDING_IDX)) epoch_loss += loss.data[0] loss.backward() self._optimizer.step() if verbose: print('Epoch {}: loss {}'.format(epoch_num, epoch_loss)) # def _get_adaptive_negatives(self, user_ids, num_neg_candidates=5): # negatives = Variable( # gpu( # torch.from_numpy( # self._random_state # .randint(0, self._num_items, # (len(user_ids), num_neg_candidates))), # self._use_cuda) # ) # negative_predictions = self._net( # user_ids.repeat(num_neg_candidates, 1).transpose(0, 1), # negatives # ).view(-1, num_neg_candidates) # best_negative_prediction, _ = negative_predictions.max(1) # return best_negative_prediction def predict(self, sequences, item_ids=None): """ Make predictions: given a sequence of interactions, predict the next item in the sequence. Parameters ---------- sequences: array, (1 x max_sequence_length) Array containing the indices of the items in the sequence. item_ids: array (num_items x 1), optional Array containing the item ids for which prediction scores are desired. If not supplied, predictions for all items will be computed. Returns ------- predictions: array Predicted scores for all items in item_ids. """ sequences = np.atleast_2d(sequences) if item_ids is None: item_ids = np.arange(self._num_items).reshape(-1, 1) sequences = torch.from_numpy(sequences.astype(np.int64).reshape(1, -1)) item_ids = torch.from_numpy(item_ids.astype(np.int64)) sequence_var = Variable(gpu(sequences, self._use_cuda)) item_var = Variable(gpu(item_ids, self._use_cuda)) _, sequence_representations = self._net.user_representation( sequence_var) out = self._net(sequence_representations.repeat(len(item_var), 1), item_var) return cpu(out.data).numpy().flatten()