Exemple #1
0
def test_bloom(compression_ratio, expected_rmse):

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    user_embeddings = BloomEmbedding(interactions.num_users,
                                     32,
                                     compression_ratio=compression_ratio,
                                     num_hash_functions=2)
    item_embeddings = BloomEmbedding(interactions.num_items,
                                     32,
                                     compression_ratio=compression_ratio,
                                     num_hash_functions=2)
    network = BilinearNet(interactions.num_users,
                          interactions.num_items,
                          user_embedding_layer=user_embeddings,
                          item_embedding_layer=item_embeddings)

    model = ExplicitFactorizationModel(loss='regression',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-5,
                                       representation=network,
                                       use_cuda=CUDA)

    model.fit(train)
    print(model)

    rmse = rmse_score(model, test)
    print(rmse)

    assert rmse - EPSILON < expected_rmse
Exemple #2
0
    def _initialize(self, interactions):

        (self._num_users, self._num_items) = (interactions.num_users,
                                              interactions.num_items)

        if self._representation is not None:
            self._net = gpu(self._representation, self._use_cuda)
        else:
            self._net = gpu(
                BilinearNet(self._num_users,
                            self._num_items,
                            self._embedding_dim,
                            sparse=self._sparse), self._use_cuda)

        if self._optimizer_func is None:
            self._optimizer = optim.Adam(self._net.parameters(),
                                         weight_decay=self._l2,
                                         lr=self._learning_rate)
        else:
            self._optimizer = self._optimizer_func(self._net.parameters())

        if self._loss == 'regression':
            self._loss_func = regression_loss
        elif self._loss == 'poisson':
            self._loss_func = poisson_loss
        elif self._loss == 'logistic':
            self._loss_func = logistic_loss
        else:
            raise ValueError('Unknown loss: {}'.format(self._loss))
Exemple #3
0
    def _initialize(self, interactions):

        (self._num_users, self._num_items) = (interactions.num_users,
                                              interactions.num_items)

        if self._representation is not None:
            self._net = gpu(self._representation, self._use_cuda)
        else:
            self._net = gpu(
                BilinearNet(self._num_users,
                            self._num_items,
                            self._embedding_dim,
                            sparse=self._sparse), self._use_cuda)

        if self._optimizer_func is None:
            self._optimizer = optim.Adam(self._net.parameters(),
                                         weight_decay=self._l2,
                                         lr=self._learning_rate)
        else:
            self._optimizer = self._optimizer_func(self._net.parameters())

        if self._loss == 'pointwise':
            self._loss_func = pointwise_loss
        elif self._loss == 'bpr':
            self._loss_func = bpr_loss
        elif self._loss == 'hinge':
            self._loss_func = hinge_loss
        else:
            self._loss_func = adaptive_hinge_loss
Exemple #4
0
def test_bpr_bloom(compression_ratio, expected_mrr):

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    user_embeddings = BloomEmbedding(interactions.num_users,
                                     32,
                                     compression_ratio=compression_ratio,
                                     num_hash_functions=2)
    item_embeddings = BloomEmbedding(interactions.num_items,
                                     32,
                                     compression_ratio=compression_ratio,
                                     num_hash_functions=2)
    network = BilinearNet(interactions.num_users,
                          interactions.num_items,
                          user_embedding_layer=user_embeddings,
                          item_embedding_layer=item_embeddings)

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-6,
                                       representation=network,
                                       use_cuda=CUDA)

    model.fit(train)
    print(model)

    mrr = mrr_score(model, test, train=train).mean()

    assert mrr > expected_mrr
Exemple #5
0
def factorization_model(num_embeddings, bloom):

    if bloom:
        user_embeddings = BloomEmbedding(num_embeddings, EMBEDDING_DIM,
                                         num_hash_functions=NUM_HASH_FUNCTIONS)
        item_embeddings = BloomEmbedding(num_embeddings, EMBEDDING_DIM,
                                         num_hash_functions=NUM_HASH_FUNCTIONS)
    else:
        user_embeddings = ScaledEmbedding(num_embeddings, EMBEDDING_DIM)
        item_embeddings = ScaledEmbedding(num_embeddings, EMBEDDING_DIM)

    network = BilinearNet(num_embeddings,
                          num_embeddings,
                          user_embedding_layer=user_embeddings,
                          item_embedding_layer=item_embeddings)

    model = ImplicitFactorizationModel(loss='adaptive_hinge',
                                       n_iter=N_ITER,
                                       embedding_dim=EMBEDDING_DIM,
                                       batch_size=2048,
                                       learning_rate=1e-2,
                                       l2=1e-6,
                                       representation=network,
                                       use_cuda=CUDA)

    return model
Exemple #6
0
def build_factorization_model(hyperparameters, train, random_state):
    h = hyperparameters

    set_seed(42, CUDA)

    if h['compression_ratio'] < 1.0:
        item_embeddings = BloomEmbedding(
            train.num_items,
            h['embedding_dim'],
            compression_ratio=h['compression_ratio'],
            num_hash_functions=4,
            padding_idx=0)
        user_embeddings = BloomEmbedding(
            train.num_users,
            h['embedding_dim'],
            compression_ratio=h['compression_ratio'],
            num_hash_functions=4,
            padding_idx=0)
    else:
        item_embeddings = ScaledEmbedding(train.num_items,
                                          h['embedding_dim'],
                                          padding_idx=0)
        user_embeddings = ScaledEmbedding(train.num_users,
                                          h['embedding_dim'],
                                          padding_idx=0)

    network = BilinearNet(train.num_users,
                          train.num_items,
                          user_embedding_layer=user_embeddings,
                          item_embedding_layer=item_embeddings)

    model = ImplicitFactorizationModel(loss=h['loss'],
                                       n_iter=h['n_iter'],
                                       batch_size=h['batch_size'],
                                       learning_rate=h['learning_rate'],
                                       embedding_dim=h['embedding_dim'],
                                       l2=h['l2'],
                                       representation=network,
                                       use_cuda=CUDA,
                                       random_state=np.random.RandomState(42))

    return model
Exemple #7
0
def build_factorization_model(train, random_state):

    embedding_dim = 32
    residual = True
    num_layers = 2
    embed = True
    gated = True

    item_embeddings = LSHEmbedding(train.num_items,
                                   embedding_dim,
                                   embed=embed,
                                   gated=gated,
                                   num_layers=num_layers,
                                   residual_connections=residual,
                                   num_hash_functions=1)
    item_embeddings.fit(train.tocsr().T)
    user_embeddings = LSHEmbedding(train.num_users,
                                   embedding_dim,
                                   embed=embed,
                                   gated=gated,
                                   num_layers=num_layers,
                                   residual_connections=residual,
                                   num_hash_functions=1)
    user_embeddings.fit(train.tocsr())

    network = BilinearNet(train.num_users,
                          train.num_items,
                          user_embedding_layer=user_embeddings,
                          item_embedding_layer=item_embeddings)

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=5 * 1e-2,
                                       embedding_dim=embedding_dim,
                                       l2=1e-6,
                                       representation=network,
                                       use_cuda=CUDA,
                                       random_state=np.random.RandomState(42))

    return model
Exemple #8
0
    def fit(self, interactions, verbose=False):
        """
        Fit the model.

        Parameters
        ----------

        interactions: :class:`spotlight.interactions.Interactions`
            The input dataset. Must have ratings.
        """

        user_ids = interactions.user_ids.astype(np.int64)
        item_ids = interactions.item_ids.astype(np.int64)

        (self._num_users, self._num_items) = (interactions.num_users,
                                              interactions.num_items)

        self._net = gpu(
            BilinearNet(self._num_users,
                        self._num_items,
                        self._embedding_dim,
                        sparse=self._sparse), self._use_cuda)

        if self._optimizer is None:
            self._optimizer = optim.Adam(self._net.parameters(),
                                         weight_decay=self._l2,
                                         lr=self._learning_rate)

        if self._loss == 'regression':
            loss_fnc = regression_loss
        elif self._loss == 'poisson':
            loss_fnc = poisson_loss
        else:
            raise ValueError('Unknown loss: {}'.format(self._loss))

        for epoch_num in range(self._n_iter):

            users, items, ratings = shuffle(user_ids,
                                            item_ids,
                                            interactions.ratings,
                                            random_state=self._random_state)

            user_ids_tensor = gpu(torch.from_numpy(users), self._use_cuda)
            item_ids_tensor = gpu(torch.from_numpy(items), self._use_cuda)
            ratings_tensor = gpu(torch.from_numpy(ratings), self._use_cuda)

            epoch_loss = 0.0

            for (batch_user, batch_item,
                 batch_ratings) in minibatch(user_ids_tensor,
                                             item_ids_tensor,
                                             ratings_tensor,
                                             batch_size=self._batch_size):

                user_var = Variable(batch_user)
                item_var = Variable(batch_item)
                ratings_var = Variable(batch_ratings)

                predictions = self._net(user_var, item_var)

                if self._loss == 'poisson':
                    predictions = torch.exp(predictions)

                self._optimizer.zero_grad()

                loss = loss_fnc(ratings_var, predictions)
                epoch_loss += loss.data[0]

                loss.backward()
                self._optimizer.step()

            if verbose:
                print('Epoch {}: loss {}'.format(epoch_num, epoch_loss))
Exemple #9
0
    def fit(self, interactions, verbose=False):
        """
        Fit the model.

        Parameters
        ----------

        interactions: :class:`spotlight.interactions.Interactions`
            The input dataset.
        """

        user_ids = interactions.user_ids.astype(np.int64)
        item_ids = interactions.item_ids.astype(np.int64)

        (self._num_users, self._num_items) = (interactions.num_users,
                                              interactions.num_items)

        self._net = gpu(
            BilinearNet(self._num_users,
                        self._num_items,
                        self._embedding_dim,
                        sparse=self._sparse), self._use_cuda)

        if self._optimizer is None:
            self._optimizer = optim.Adam(self._net.parameters(),
                                         weight_decay=self._l2,
                                         lr=self._learning_rate)
        else:
            self._optimizer = self._optimizer_func(self._net.parameters())

        if self._loss == 'pointwise':
            loss_fnc = pointwise_loss
        elif self._loss == 'bpr':
            loss_fnc = bpr_loss
        elif self._loss == 'hinge':
            loss_fnc = hinge_loss
        else:
            loss_fnc = adaptive_hinge_loss

        for epoch_num in range(self._n_iter):

            users, items = shuffle(user_ids,
                                   item_ids,
                                   random_state=self._random_state)

            user_ids_tensor = gpu(torch.from_numpy(users), self._use_cuda)
            item_ids_tensor = gpu(torch.from_numpy(items), self._use_cuda)

            epoch_loss = 0.0

            for (minibatch_num, (batch_user, batch_item)) in enumerate(
                    minibatch(user_ids_tensor,
                              item_ids_tensor,
                              batch_size=self._batch_size)):

                user_var = Variable(batch_user)
                item_var = Variable(batch_item)
                positive_prediction = self._net(user_var, item_var)

                if self._loss == 'adaptive_hinge':
                    negative_prediction = [
                        self._get_negative_prediction(user_var)
                        for _ in range(5)
                    ]
                else:
                    negative_prediction = self._get_negative_prediction(
                        user_var)

                self._optimizer.zero_grad()

                loss = loss_fnc(positive_prediction, negative_prediction)
                epoch_loss += loss.data[0]

                loss.backward()
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1

            if verbose:
                print('Epoch {}: loss {}'.format(epoch_num, epoch_loss))
    def objective(hyper):

        print(hyper)

        start = time.clock()

        if hyper['model']['type'] == 'lsh':
            num_hashes = int(hyper['model']['num_hash_functions'])
            num_layers = int(hyper['model']['num_layers'])
            nonlinearity = hyper['model']['nonlinearity']
            residual = hyper['model']['residual']
            embed = hyper['model']['embed']
            gated = hyper['model']['gated']

            item_embeddings = LSHEmbedding(train.num_items,
                                           int(hyper['embedding_dim']),
                                           embed=embed,
                                           gated=gated,
                                           residual_connections=residual,
                                           nonlinearity=nonlinearity,
                                           num_layers=num_layers,
                                           num_hash_functions=num_hashes)
            item_embeddings.fit(train.tocsr().T)
            user_embeddings = LSHEmbedding(train.num_users,
                                           int(hyper['embedding_dim']),
                                           embed=embed,
                                           gated=gated,
                                           residual_connections=residual,
                                           nonlinearity=nonlinearity,
                                           num_layers=num_layers,
                                           num_hash_functions=num_hashes)
            user_embeddings.fit(train.tocsr())
        else:
            user_embeddings = ScaledEmbedding(train.num_users,
                                              int(hyper['embedding_dim']),
                                              padding_idx=0)
            item_embeddings = ScaledEmbedding(train.num_items,
                                              int(hyper['embedding_dim']),
                                              padding_idx=0)

        network = BilinearNet(train.num_users,
                              train.num_items,
                              user_embedding_layer=user_embeddings,
                              item_embedding_layer=item_embeddings)

        model = ImplicitFactorizationModel(
            loss=hyper['loss'],
            n_iter=int(hyper['n_iter']),
            batch_size=int(hyper['batch_size']),
            learning_rate=hyper['learning_rate'],
            embedding_dim=int(hyper['embedding_dim']),
            l2=hyper['l2'],
            representation=network,
            use_cuda=CUDA,
            random_state=random_state)

        model.fit(train, verbose=True)

        elapsed = time.clock() - start

        print(model)

        validation_mrr = mrr_score(model, validation, train=train).mean()
        test_mrr = mrr_score(model,
                             test,
                             train=train.tocsr() + validation.tocsr()).mean()

        print('MRR {} {}'.format(validation_mrr, test_mrr))

        return {
            'loss': -validation_mrr,
            'status': STATUS_OK,
            'validation_mrr': validation_mrr,
            'test_mrr': test_mrr,
            'elapsed': elapsed,
            'hyper': hyper
        }
Exemple #11
0
    def objective(hyper):

        print(hyper)

        start = time.clock()

        h = hyper['model']

        cls = ImplicitFactorizationModel

        if h['type'] == 'bilinear':
            representation = BilinearNet(train.num_users,
                                         train.num_items,
                                         embedding_dim=int(h['embedding_dim']))
        elif h['type'] == 'mixture':
            representation = MixtureNet(train.num_users,
                                        train.num_items,
                                        num_components=int(
                                            h['num_components']),
                                        embedding_dim=int(h['embedding_dim']))
        elif h['type'] == 'mixture_init':
            representation = MixtureNet(train.num_users,
                                        train.num_items,
                                        projection_scale=h['projection_scale'],
                                        num_components=int(
                                            h['num_components']),
                                        embedding_dim=int(h['embedding_dim']))
        elif h['type'] == 'nonlinear_mixture':
            representation = NonlinearMixtureNet(
                train.num_users,
                train.num_items,
                num_components=int(h['num_components']),
                embedding_dim=int(h['embedding_dim']))
        elif h['type'] == 'embedding_mixture':
            representation = EmbeddingMixtureNet(
                train.num_users,
                train.num_items,
                num_components=int(h['num_components']),
                embedding_dim=int(h['embedding_dim']))
        else:
            raise ValueError('Unknown model type')

        model = cls(batch_size=int(h['batch_size']),
                    loss=h['loss'],
                    learning_rate=h['learning_rate'],
                    l2=h['l2'],
                    n_iter=int(h['n_iter']),
                    representation=representation,
                    use_cuda=CUDA,
                    random_state=np.random.RandomState(42))

        try:
            model.fit(train, verbose=True)
        except ValueError:
            elapsed = time.clock() - start
            return {
                'loss': 0.0,
                'status': STATUS_FAIL,
                'validation_mrr': 0.0,
                'test_mrr': 0.0,
                'elapsed': elapsed,
                'hyper': h
            }

        elapsed = time.clock() - start

        print(model)

        validation_mrr = mrr_score(model,
                                   validation,
                                   train=(train.tocsr() +
                                          test.tocsr())).mean()
        test_mrr = mrr_score(model,
                             test,
                             train=(train.tocsr() +
                                    validation.tocsr())).mean()

        print('MRR {} {}'.format(validation_mrr, test_mrr))

        if np.isnan(validation_mrr):
            status = STATUS_FAIL
        else:
            status = STATUS_OK

        return {
            'loss': -validation_mrr,
            'status': status,
            'validation_mrr': validation_mrr,
            'test_mrr': test_mrr,
            'elapsed': elapsed,
            'hyper': h
        }