Beispiel #1
0
def test_bpr_bloom(compression_ratio, expected_mrr):

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    user_embeddings = BloomEmbedding(interactions.num_users,
                                     32,
                                     compression_ratio=compression_ratio,
                                     num_hash_functions=2)
    item_embeddings = BloomEmbedding(interactions.num_items,
                                     32,
                                     compression_ratio=compression_ratio,
                                     num_hash_functions=2)
    network = BilinearNet(interactions.num_users,
                          interactions.num_items,
                          user_embedding_layer=user_embeddings,
                          item_embedding_layer=item_embeddings)

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-6,
                                       representation=network,
                                       use_cuda=CUDA)

    model.fit(train)
    print(model)

    mrr = mrr_score(model, test, train=train).mean()

    assert mrr > expected_mrr
Beispiel #2
0
def test_implicit_serialization(data):

    train, test = data

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=3,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-6,
                                       use_cuda=CUDA)
    model.fit(train)

    mrr_original = mrr_score(model, test, train=train).mean()
    mrr_recovered = mrr_score(_reload(model), test, train=train).mean()

    assert mrr_original == mrr_recovered
Beispiel #3
0
def evaluate_model(model, train, test, validation):

    start_time = time.time()
    model.fit(train, verbose=True)
    elapsed = time.time() - start_time

    print('Elapsed {}'.format(elapsed))
    print(model)

    if hasattr(test, 'sequences'):
        test_mrr = sequence_mrr_score(model, test)
        val_mrr = sequence_mrr_score(model, validation)
    else:
        test_mrr = mrr_score(model, test)
        val_mrr = mrr_score(model, test.tocsr() + validation.tocsr())

    return test_mrr, val_mrr, elapsed
Beispiel #4
0
    def run(self, filtering, loss, k):
        for handler in logging.root.handlers[:]:
            logging.root.removeHandler(handler)

        self.filter = filtering
        self.loss = loss
        self.model_name = str.join('_',
                                   (self.model_name, self.filter, self.loss))

        self.logger(self.model_name)
        logger = logging.getLogger()

        NUM_EPOCHS = 5

        logger.info("Training Spotlight Model, Loss: {}".format(self.loss))
        df_interactions, df_timestamps = self.df[[
            'user_id', 'tag_id', 'count'
        ]], self.df['timestamp']
        interactions = self.build_interactions_object(df_interactions,
                                                      df_timestamps)

        train, test = spotlight_random_train_test_split(interactions)
        logger.info(
            'The dataset has %s users and %s items with %s interactions in the test and %s interactions in the '
            'training set.' % (train.num_users, train.num_items,
                               test.tocoo().getnnz(), train.tocoo().getnnz()))
        model = ImplicitFactorizationModel(
            n_iter=NUM_EPOCHS,
            loss=self.loss,
            random_state=RANDOM_STATE,
            use_cuda=True,
            embedding_dim=64,  # latent dimensionality
            batch_size=128,  # minibatch size
            l2=1e-9,  # strength of L2 regularization
            learning_rate=1e-3,
        )

        logger.info("Begin fitting {0} model for {1} epochs...".format(
            self.loss, NUM_EPOCHS))
        model.fit(train, verbose=True)

        precrec = precision_recall_score(model=model,
                                         train=train,
                                         test=test,
                                         k=k)

        mrr = mrr_score(model=model, train=train, test=test).mean()

        precision = np.mean(precrec[0])
        recall = np.mean(precrec[1])
        fmeasure = 2 * ((precision * recall) / (precision + recall))
        logger.info("Precision@{0}: {1}".format(k, precision))
        logger.info("Recall@{0}: {1}".format(k, recall))
        logger.info("F-Measure: {}".format(fmeasure))
        logger.info("MRR: {}".format(mrr))
        self.model_name = 'spot'
    def obtener_metricas_gui(self):
        """
        Método obtener_metricas_gui. Obtiene las métricas del modelo escogido.

        Este método solo se utiliza en la interfaz web.

        Returns
        -------

        metricas_devueltas: dict
            diccionario con las métricas del modelo
        """

        global train, test, modelo

        # Se guardan las métricas en un diccionario para su futura muestra en la interfaz web
        metricas = dict()

        # Se calculan las métricas y se guardan en el diccionario formateadas
        if self.opcion_modelo == 1:
            rmse = rmse_score(modelo, test)
            mrr = mrr_score(modelo, test, train=train).mean()
            precision, recall = precision_recall_score(modelo, test, train=train, k=10)
            metricas_devueltas = {"RMSE": format(rmse, '.4f'), "MRR": format(mrr, '.4f'), "Precisión k": format(precision.mean(), '.4f'), "Recall k": format(recall.mean(), '.4f')}
            metricas_a_guardar = {"RMSE": [format(rmse, '.4f')], "MRR": [format(mrr, '.4f')], "Precisión k": [format(precision.mean(), '.4f')], "Recall k": [format(recall.mean(), '.4f')]}
        elif self.opcion_modelo == 2:
            mrr = mrr_score(modelo, test, train=train).mean()
            precision, recall = precision_recall_score(modelo, test, train=train, k=10)
            metricas_devueltas = {"MRR": format(mrr, '.4f'), "Precisión k": format(precision.mean(), '.4f'), "Recall k": format(recall.mean(), '.4f')}
            metricas_a_guardar = {"MRR": [format(mrr, '.4f')], "Precisión k": [format(precision.mean(), '.4f')], "Recall k": [format(recall.mean(), '.4f')]}
        else:
            mrr = sequence_mrr_score(modelo, test).mean()
            metricas_devueltas = {"MRR": format(mrr, '.4f')}
            metricas_a_guardar = {"MRR": [format(mrr, '.4f')]}
        
        # Se guardan las métricas en un archivo .csv
        guardar_resultados(metricas_a_guardar)

        return metricas_devueltas
    def evaluation(self, model, interactions: tuple):
        """Evaluates models on a number of metrics

        Takes model and evaluates it by Precision@K/Recall@K, Mean Reciprocal Rank metrics.

        Args:
            model (Arbitrary): A Spotlight model, can be of different types.
            sets (tuple): (spotlight.interactions.Interactions, spotlight.interactions.Interactions), A tuple of (train data, test data).

        Returns:
            dict: A dictionary with all the evaluation metrics.

        """

        logger = logging.getLogger()
        train, test = interactions

        logger.info("Beginning model evaluation...")

        if self._models in ('S_POOL', 'S_CNN', 'S_LSTM'):
            mrr = sequence_mrr_score(model, test).mean()
        else:
            mrr = mrr_score(model, test).mean()
        logger.info('MRR {:.8f}'.format(
            mrr
        ))

        k = 3
        
        prec, rec = sequence_precision_recall_score(
            model=model,
            test=test,
            k=k,
        )
        logger.info('Precision@{k} {:.8f}'.format(
            prec.mean(),
            k=k
        ))
        logger.info('Recall@{k} {:.8f}'.format(
            rec.mean(),
            k=k
        ))
        return {
            'test': {
                'precision':prec.mean(),
                'recall':rec.mean(),
                'f1': 2*((prec.mean()*rec.mean())/(prec.mean()+rec.mean())),
                'mrr':mrr,
            },
        }
    def resultados_factorizacion_implicito(self):
        """
        Método resultados_factorizacion_implicito. Calcula las métricas del modelo de factorización implícito.

        Este método solo se utiliza en la interfaz de texto.
        """
        
        global train, test, modelo
        
        # Se calculan las métricas
        mrr = mrr_score(modelo, test, train=train).mean()
        precision, recall = precision_recall_score(modelo, test, train=train, k=10)
        
        # Se imprimen las métricas
        imprimir_resultados_dl(mrr, precision.mean(), recall.mean())
Beispiel #8
0
def test_adaptive_hinge():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ImplicitFactorizationModel(loss='adaptive_hinge',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-6)
    model.fit(train)

    mrr = mrr_score(model, test, train=train).mean()

    assert mrr > 0.07
def test_adaptive_hinge():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ImplicitFactorizationModel(loss='adaptive_hinge',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-6)
    model.fit(train)

    mrr = mrr_score(model, test, train=train).mean()

    assert mrr > 0.07
Beispiel #10
0
def test_bpr():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-6,
                                       use_cuda=CUDA)
    model.fit(train)

    mrr = mrr_score(model, test, train=train).mean()

    assert mrr + EPSILON > 0.07
Beispiel #11
0
def test_bpr_custom_optimizer():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    def adagrad_optimizer(model_params, lr=1e-2, weight_decay=1e-6):

        return torch.optim.Adagrad(model_params,
                                   lr=lr,
                                   weight_decay=weight_decay)

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=10,
                                       batch_size=1024,
                                       optimizer_func=adagrad_optimizer)
    model.fit(train)

    mrr = mrr_score(model, test, train=train).mean()

    assert mrr > 0.06
Beispiel #12
0
def test_bpr_custom_optimizer():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    def adagrad_optimizer(model_params,
                          lr=1e-2,
                          weight_decay=1e-6):

        return torch.optim.Adagrad(model_params,
                                   lr=lr,
                                   weight_decay=weight_decay)

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=10,
                                       batch_size=1024,
                                       optimizer_func=adagrad_optimizer)
    model.fit(train)

    mrr = mrr_score(model, test, train=train).mean()

    assert mrr > 0.06
Beispiel #13
0
# fit models


model.fit(train.to_sequence(), verbose=True)
preserving_25_percent_model.fit(preserving_25_percent_train.to_sequence(), verbose=True)
preserving_50_percent_model.fit(preserving_50_percent_train.to_sequence(), verbose=True)
preserving_75_percent_model.fit(preserving_75_percent_train.to_sequence(), verbose=True)

import torch
torch.save(preserving_25_percent_model, './preserving_25_percent_model.model')
torch.save(preserving_50_percent_model, './preserving_50_percent_model.model')
torch.save(preserving_75_percent_model, './preserving_75_percent_model.model')
# result evaluation

from spotlight.evaluation import mrr_score
train_mrrs = mrr_score(model, train)
preserving_25_train_mrrs = mrr_score(preserving_25_percent_model, preserving_25_percent_train)
preserving_50_train_mrrs = mrr_score(preserving_50_percent_model, preserving_50_percent_train)
preserving_75_train_mrrs = mrr_score(preserving_75_percent_model, preserving_75_percent_train)

test_mrrs = mrr_score(model, test)
preserving_25_test_mrrs = mrr_score(preserving_25_percent_model, test)
preserving_50_test_mrrs = mrr_score(preserving_50_percent_model, test)
preserving_75_test_mrrs = mrr_score(preserving_75_percent_model, test)

print('For 100% preserving items')
print('Train MRRS {:.3f}, test MRRS {:.3f}'.format(train_mrrs.sum(), test_mrrs.sum()))
print('For 25% preserving items')
print('Train MRRS {:.3f}, test MRRS {:.3f}'.format(preserving_25_train_mrrs.sum(), preserving_25_test_mrrs.sum()))
print('For 50% preserving items')
print('Train MRRS {:.3f}, test MRRS {:.3f}'.format(preserving_50_train_mrrs.sum(), preserving_50_test_mrrs.sum()))
    def objective(hyper):

        print(hyper)

        start = time.clock()

        if hyper['model']['type'] == 'lsh':
            num_hashes = int(hyper['model']['num_hash_functions'])
            num_layers = int(hyper['model']['num_layers'])
            nonlinearity = hyper['model']['nonlinearity']
            residual = hyper['model']['residual']
            embed = hyper['model']['embed']
            gated = hyper['model']['gated']

            item_embeddings = LSHEmbedding(train.num_items,
                                           int(hyper['embedding_dim']),
                                           embed=embed,
                                           gated=gated,
                                           residual_connections=residual,
                                           nonlinearity=nonlinearity,
                                           num_layers=num_layers,
                                           num_hash_functions=num_hashes)
            item_embeddings.fit(train.tocsr().T)
            user_embeddings = LSHEmbedding(train.num_users,
                                           int(hyper['embedding_dim']),
                                           embed=embed,
                                           gated=gated,
                                           residual_connections=residual,
                                           nonlinearity=nonlinearity,
                                           num_layers=num_layers,
                                           num_hash_functions=num_hashes)
            user_embeddings.fit(train.tocsr())
        else:
            user_embeddings = ScaledEmbedding(train.num_users,
                                              int(hyper['embedding_dim']),
                                              padding_idx=0)
            item_embeddings = ScaledEmbedding(train.num_items,
                                              int(hyper['embedding_dim']),
                                              padding_idx=0)

        network = BilinearNet(train.num_users,
                              train.num_items,
                              user_embedding_layer=user_embeddings,
                              item_embedding_layer=item_embeddings)

        model = ImplicitFactorizationModel(
            loss=hyper['loss'],
            n_iter=int(hyper['n_iter']),
            batch_size=int(hyper['batch_size']),
            learning_rate=hyper['learning_rate'],
            embedding_dim=int(hyper['embedding_dim']),
            l2=hyper['l2'],
            representation=network,
            use_cuda=CUDA,
            random_state=random_state)

        model.fit(train, verbose=True)

        elapsed = time.clock() - start

        print(model)

        validation_mrr = mrr_score(model, validation, train=train).mean()
        test_mrr = mrr_score(model,
                             test,
                             train=train.tocsr() + validation.tocsr()).mean()

        print('MRR {} {}'.format(validation_mrr, test_mrr))

        return {
            'loss': -validation_mrr,
            'status': STATUS_OK,
            'validation_mrr': validation_mrr,
            'test_mrr': test_mrr,
            'elapsed': elapsed,
            'hyper': hyper
        }
def _evaluate(model, test, train):

    test_mrr = mrr_score(model, test, train=train)

    return test_mrr.mean()
Beispiel #16
0
    def objective(hyper):

        print(hyper)

        start = time.clock()

        h = hyper['model']

        cls = ImplicitFactorizationModel

        if h['type'] == 'bilinear':
            representation = BilinearNet(train.num_users,
                                         train.num_items,
                                         embedding_dim=int(h['embedding_dim']))
        elif h['type'] == 'mixture':
            representation = MixtureNet(train.num_users,
                                        train.num_items,
                                        num_components=int(
                                            h['num_components']),
                                        embedding_dim=int(h['embedding_dim']))
        elif h['type'] == 'mixture_init':
            representation = MixtureNet(train.num_users,
                                        train.num_items,
                                        projection_scale=h['projection_scale'],
                                        num_components=int(
                                            h['num_components']),
                                        embedding_dim=int(h['embedding_dim']))
        elif h['type'] == 'nonlinear_mixture':
            representation = NonlinearMixtureNet(
                train.num_users,
                train.num_items,
                num_components=int(h['num_components']),
                embedding_dim=int(h['embedding_dim']))
        elif h['type'] == 'embedding_mixture':
            representation = EmbeddingMixtureNet(
                train.num_users,
                train.num_items,
                num_components=int(h['num_components']),
                embedding_dim=int(h['embedding_dim']))
        else:
            raise ValueError('Unknown model type')

        model = cls(batch_size=int(h['batch_size']),
                    loss=h['loss'],
                    learning_rate=h['learning_rate'],
                    l2=h['l2'],
                    n_iter=int(h['n_iter']),
                    representation=representation,
                    use_cuda=CUDA,
                    random_state=np.random.RandomState(42))

        try:
            model.fit(train, verbose=True)
        except ValueError:
            elapsed = time.clock() - start
            return {
                'loss': 0.0,
                'status': STATUS_FAIL,
                'validation_mrr': 0.0,
                'test_mrr': 0.0,
                'elapsed': elapsed,
                'hyper': h
            }

        elapsed = time.clock() - start

        print(model)

        validation_mrr = mrr_score(model,
                                   validation,
                                   train=(train.tocsr() +
                                          test.tocsr())).mean()
        test_mrr = mrr_score(model,
                             test,
                             train=(train.tocsr() +
                                    validation.tocsr())).mean()

        print('MRR {} {}'.format(validation_mrr, test_mrr))

        if np.isnan(validation_mrr):
            status = STATUS_FAIL
        else:
            status = STATUS_OK

        return {
            'loss': -validation_mrr,
            'status': status,
            'validation_mrr': validation_mrr,
            'test_mrr': test_mrr,
            'elapsed': elapsed,
            'hyper': h
        }
Beispiel #17
0
                                       random_state=np.random.RandomState(42))

    return model


if __name__ == '__main__':

    random_state = np.random.RandomState(42)

    train, validation, test = load_data(random_state)

    # objective = get_objective(train, validation, test)
    # space = hyperparameter_space()

    # max_evals = 5

    # for iteration in range(1, max_evals):
    #     print('Iteration {}'.format(iteration))
    #     trials = optimize(objective,
    #                       space,
    #                       trials_fname='factorization_trials.pickle',
    #                       max_evals=iteration)

    model = build_factorization_model(train, random_state)
    model.fit(train, verbose=True)
    print(model)

    mrr = mrr_score(model, test, train=train).mean()

    print('MRR {}'.format(mrr))
Beispiel #18
0
                           timestamps=timeStamps)

    if name == "test":
        dataset_test = dataset
    elif name == "train":
        dataset_train = dataset

if model_mode.lower() == "ifm":
    model = ImplicitFactorizationModel(n_iter=n_iter)
if model_mode.lower() == "efm":
    model = ExplicitFactorizationModel(n_iter=n_iter)
if model_mode.lower() == "cnn":
    net = CNNNet(num_items=int(foods_items))
    model = ImplicitSequenceModel(n_iter=n_iter,
                                  use_cuda=torch.cuda.is_available(),
                                  representation=net)

model.fit(dataset_train)

with open(save_file, 'wb') as f:
    pickle.dump(model, f, pickle.HIGHEST_PROTOCOL)

if model_mode.lower() == "cnn":
    mrr = sequence_mrr_score(model, dataset_test)
else:
    mrr = mrr_score(model, dataset_test)

print("mrr = ", len(mrr))
print("mean mrr = ", sum(mrr) / len(mrr))
rank = 1 / (sum(mrr) / len(mrr))
print("average rank = ", rank)