Example #1
0
def train_initial_model():
    dataset = get_movielens_dataset(variant='100K')

    train, test = random_train_test_split(dataset, random_state=np.random.RandomState(42))

    model = ImplicitFactorizationModel(loss='adaptive_hinge',
                                    embedding_dim=128,  # latent dimensionality
                                    n_iter=10,  # number of epochs of training
                                    batch_size=1024,  # minibatch size
                                    l2=1e-9,  # strength of L2 regularization
                                    learning_rate=1e-3,
                                    use_cuda=torch.cuda.is_available())

    print('Fitting the model')

    model.fit(train, verbose=True)
    print(type(model))

    model_file = open('models/filmclub.model', 'wb')
    pickle.dump(model, model_file)
    model_file.close()

    dataset.num_users = 1000000

    dataset_file = open('data/dataset.pkl', 'wb')
    pickle.dump(dataset, dataset_file)
    dataset_file.close()

    train_rmse = rmse_score(model, train)
    test_rmse = rmse_score(model, test)

    print('Train RMSE {:.3f}, test RMSE {:.3f}'.format(train_rmse, test_rmse))
Example #2
0
def test_bpr_bloom(compression_ratio, expected_mrr):

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    user_embeddings = BloomEmbedding(interactions.num_users,
                                     32,
                                     compression_ratio=compression_ratio,
                                     num_hash_functions=2)
    item_embeddings = BloomEmbedding(interactions.num_items,
                                     32,
                                     compression_ratio=compression_ratio,
                                     num_hash_functions=2)
    network = BilinearNet(interactions.num_users,
                          interactions.num_items,
                          user_embedding_layer=user_embeddings,
                          item_embedding_layer=item_embeddings)

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-6,
                                       representation=network,
                                       use_cuda=CUDA)

    model.fit(train)
    print(model)

    mrr = mrr_score(model, test, train=train).mean()

    assert mrr > expected_mrr
Example #3
0
    def run(self, filtering, loss, k):
        for handler in logging.root.handlers[:]:
            logging.root.removeHandler(handler)

        self.filter = filtering
        self.loss = loss
        self.model_name = str.join('_',
                                   (self.model_name, self.filter, self.loss))

        self.logger(self.model_name)
        logger = logging.getLogger()

        NUM_EPOCHS = 5

        logger.info("Training Spotlight Model, Loss: {}".format(self.loss))
        df_interactions, df_timestamps = self.df[[
            'user_id', 'tag_id', 'count'
        ]], self.df['timestamp']
        interactions = self.build_interactions_object(df_interactions,
                                                      df_timestamps)

        train, test = spotlight_random_train_test_split(interactions)
        logger.info(
            'The dataset has %s users and %s items with %s interactions in the test and %s interactions in the '
            'training set.' % (train.num_users, train.num_items,
                               test.tocoo().getnnz(), train.tocoo().getnnz()))
        model = ImplicitFactorizationModel(
            n_iter=NUM_EPOCHS,
            loss=self.loss,
            random_state=RANDOM_STATE,
            use_cuda=True,
            embedding_dim=64,  # latent dimensionality
            batch_size=128,  # minibatch size
            l2=1e-9,  # strength of L2 regularization
            learning_rate=1e-3,
        )

        logger.info("Begin fitting {0} model for {1} epochs...".format(
            self.loss, NUM_EPOCHS))
        model.fit(train, verbose=True)

        precrec = precision_recall_score(model=model,
                                         train=train,
                                         test=test,
                                         k=k)

        mrr = mrr_score(model=model, train=train, test=test).mean()

        precision = np.mean(precrec[0])
        recall = np.mean(precrec[1])
        fmeasure = 2 * ((precision * recall) / (precision + recall))
        logger.info("Precision@{0}: {1}".format(k, precision))
        logger.info("Recall@{0}: {1}".format(k, recall))
        logger.info("F-Measure: {}".format(fmeasure))
        logger.info("MRR: {}".format(mrr))
        self.model_name = 'spot'
class EmbeddingFactorsRecommender(BaseFactorizationRecommender):

    default_model_params = dict(
        loss='adaptive_hinge',  # 'bpr', 'hinge', 'adaptive hinge'
        embedding_dim=32,
        n_iter=15,
        batch_size=1024,
        l2=0.0,
        learning_rate=1e-2,
        num_negative_samples=10)

    default_fit_params = dict(verbose=True)

    def _prep_for_fit(self, train_obs, **fit_params):
        # self.toggle_mkl_blas_1_thread(False)
        self._set_data(train_obs)
        self.set_params(**fit_params)
        self.model = ImplicitFactorizationModel(**self.model_params)
        self._set_spotlight_train_data(self.train_mat)

    def _set_spotlight_train_data(self, train_mat):
        self.spotlight_dataset = spotlight_interactions_from_sparse(train_mat)

    def fit(self, train_obs, **fit_params):
        self._prep_for_fit(train_obs, **fit_params)
        self.model.fit(self.spotlight_dataset,
                       verbose=self.fit_params.get('verbose', False))

    def fit_partial(self, train_obs, epochs=1):
        self._set_epochs(epochs)
        if self.model is None:
            self.fit(train_obs)
        else:
            self.model.fit(self.spotlight_dataset)
        return self

    def _set_epochs(self, epochs):
        self.set_params(n_iter=epochs)

    def _predict_on_inds(self, user_inds, item_inds):
        return self.model.predict(user_inds, item_inds)

    def _get_item_factors(self, mode=None):
        return self.model._net.item_biases.weight.data.numpy().ravel(), \
               self.model._net.item_embeddings.weight.data.numpy()

    def _get_user_factors(self, mode=None):
        return self.model._net.user_biases.weight.data.numpy().ravel(), \
               self.model._net.user_embeddings.weight.data.numpy()

    def _predict_rank(self, test_mat, train_mat=None):
        raise NotImplementedError()
Example #5
0
def factorization_model(num_embeddings, bloom):

    if bloom:
        user_embeddings = BloomEmbedding(num_embeddings, EMBEDDING_DIM,
                                         num_hash_functions=NUM_HASH_FUNCTIONS)
        item_embeddings = BloomEmbedding(num_embeddings, EMBEDDING_DIM,
                                         num_hash_functions=NUM_HASH_FUNCTIONS)
    else:
        user_embeddings = ScaledEmbedding(num_embeddings, EMBEDDING_DIM)
        item_embeddings = ScaledEmbedding(num_embeddings, EMBEDDING_DIM)

    network = BilinearNet(num_embeddings,
                          num_embeddings,
                          user_embedding_layer=user_embeddings,
                          item_embedding_layer=item_embeddings)

    model = ImplicitFactorizationModel(loss='adaptive_hinge',
                                       n_iter=N_ITER,
                                       embedding_dim=EMBEDDING_DIM,
                                       batch_size=2048,
                                       learning_rate=1e-2,
                                       l2=1e-6,
                                       representation=network,
                                       use_cuda=CUDA)

    return model
    def obtener_modelo_gui(self, lista_param):
        """
        Método obtener_modelo_gui. Obtiene el modelo escogido según los parámetros pasados.

        Este método solo se utiliza en la interfaz web.

        Parameters
        ----------

        lista_param: list
            lista que contiene los parámetros escogidos por el usuario para crear el modelo.
        """

        global modelo

        # Se guardan los parámetros en variables para que sea más legible
        loss = lista_param[0]
        embedding_dim = lista_param[1]
        n_iter = lista_param[2]
        batch_size = lista_param[3]
        l2 = lista_param[4]
        learning_rate = lista_param[5]
        representation = lista_param[6]

        # Se instancia el modelo según los parámetros anteriores
        if self.opcion_modelo == 1:
            modelo = ExplicitFactorizationModel(loss=loss, embedding_dim=embedding_dim, n_iter=n_iter, batch_size=batch_size, 
                l2=l2, learning_rate=learning_rate, use_cuda=torch.cuda.is_available())
        elif self.opcion_modelo == 2:
            modelo = ImplicitFactorizationModel(loss=loss, embedding_dim=embedding_dim, n_iter=n_iter, batch_size=batch_size, 
                l2=l2, learning_rate=learning_rate, use_cuda=torch.cuda.is_available())
        else:
            modelo = ImplicitSequenceModel(loss=loss, representation=representation, embedding_dim=embedding_dim, n_iter=n_iter, batch_size=batch_size, 
                l2=l2, learning_rate=learning_rate, use_cuda=torch.cuda.is_available())
Example #7
0
def test_implicit_serialization(data):

    train, test = data

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=3,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-6,
                                       use_cuda=CUDA)
    model.fit(train)

    mrr_original = mrr_score(model, test, train=train).mean()
    mrr_recovered = mrr_score(_reload(model), test, train=train).mean()

    assert mrr_original == mrr_recovered
Example #8
0
def data():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=1,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-6,
                                       random_state=RANDOM_STATE,
                                       use_cuda=CUDA)
    model.fit(train)

    return train, test, model
Example #9
0
def test_adaptive_hinge():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ImplicitFactorizationModel(loss='adaptive_hinge',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-6)
    model.fit(train)

    mrr = mrr_score(model, test, train=train).mean()

    assert mrr > 0.07
Example #10
0
def factorization(train,
                  test,
                  out_dir=None,
                  data_name="empty",
                  repeats=1,
                  verbose=False,
                  **kwargs):
    """
    Run experiment for dot product based models (Factorization Module)
    """

    precisions, recalls = [], []

    st = time.time()

    for _ in tqdm(range(repeats)):
        model = ImplicitFactorizationModel(**kwargs)
        model.fit(train, verbose=verbose)

        test_precision, test_recall = precision_recall_score(model,
                                                             test,
                                                             train,
                                                             k=50)

        precisions.append(np.mean(test_precision))
        recalls.append(np.mean(test_recall))

    ts = time.time()

    print "*=" * 40
    print "data: {} with {} repeats".format(data_name, repeats)
    print "Dot Product Model\n", kwargs
    print "Average training time: {:.4f}".format((ts - st) / repeats)
    print 'Test Precision@50 {:.4f}, Test Recall@50 {:.4f}'.format(
        np.mean(precisions), np.mean(recalls))

    if out_dir is not None:
        with open(out_dir, "a") as f:
            f.write("*=" * 40 + "\n")
            f.write("data: {} with {} repeats".format(data_name, repeats) +
                    "\n")
            f.write("Dot Product Model\n" + str(kwargs) + "\n")
            f.write("Average training time: {:.4f}".format((ts - st) /
                                                           repeats) + "\n")
            f.write('Test Precision@50 {:.4f}, Test Recall@50 {:.4f}'.format(
                np.mean(precisions), np.mean(recalls)) + "\n")
Example #11
0
def test_adaptive_hinge():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ImplicitFactorizationModel(loss='adaptive_hinge',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-6)
    model.fit(train)

    mrr = mrr_score(model, test, train=train).mean()

    assert mrr > 0.07
Example #12
0
def test_bpr():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-6,
                                       use_cuda=CUDA)
    model.fit(train)

    mrr = mrr_score(model, test, train=train).mean()

    assert mrr + EPSILON > 0.07
    def model_implicit_factorization (self, train: Interactions, random_state: np.random.RandomState, hyperparameters: dict = None) -> ImplicitFactorizationModel:
        """Trains a Spotlight implicit matrix factorization model.

        Args:
            train (spotlight.interactions.Interactions): Training set as an interactions matrix.
            random_state (np.random.RandomState): Random state to use when fitting.
            hyperparameters (dict, optional): A number of hyperparameters for the model, either sampled
                from sample_implicit_hyperparameters or default used by model. Defaults can be found
                in global variable DEFAULT_PARAMS.

        Returns:
            spotlight.factorization.implicit.ImplicitFactorizationModel: A Spotlight implicit matrix factorization model.

        """

        logger = logging.getLogger()
        if hyperparameters:
            logger.info("Beginning fitting implicit model... \n Hyperparameters: \n {0}".format(
                json.dumps({i:hyperparameters[i] for i in hyperparameters if i != 'use_cuda'})
            ))
            model = ImplicitFactorizationModel(
                loss=hyperparameters['loss'],
                learning_rate=hyperparameters['learning_rate'],
                batch_size=hyperparameters['batch_size'],
                embedding_dim=hyperparameters['embedding_dim'],
                n_iter=hyperparameters['n_iter'],
                l2=hyperparameters['l2'],
                use_cuda=True,
                random_state=random_state
            )
        else:
            logger.info("Beginning fitting implicit model with default hyperparameters...")
            model = ImplicitFactorizationModel(use_cuda=True)
        model.fit(train, verbose=True)
        return model
Example #14
0
def test_bpr_custom_optimizer():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    def adagrad_optimizer(model_params, lr=1e-2, weight_decay=1e-6):

        return torch.optim.Adagrad(model_params,
                                   lr=lr,
                                   weight_decay=weight_decay)

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=10,
                                       batch_size=1024,
                                       optimizer_func=adagrad_optimizer)
    model.fit(train)

    mrr = mrr_score(model, test, train=train).mean()

    assert mrr > 0.06
Example #15
0
def test_bpr_custom_optimizer():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    def adagrad_optimizer(model_params,
                          lr=1e-2,
                          weight_decay=1e-6):

        return torch.optim.Adagrad(model_params,
                                   lr=lr,
                                   weight_decay=weight_decay)

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=10,
                                       batch_size=1024,
                                       optimizer_func=adagrad_optimizer)
    model.fit(train)

    mrr = mrr_score(model, test, train=train).mean()

    assert mrr > 0.06
Example #16
0
def build_factorization_model(hyperparameters, train, random_state):
    h = hyperparameters

    set_seed(42, CUDA)

    if h['compression_ratio'] < 1.0:
        item_embeddings = BloomEmbedding(
            train.num_items,
            h['embedding_dim'],
            compression_ratio=h['compression_ratio'],
            num_hash_functions=4,
            padding_idx=0)
        user_embeddings = BloomEmbedding(
            train.num_users,
            h['embedding_dim'],
            compression_ratio=h['compression_ratio'],
            num_hash_functions=4,
            padding_idx=0)
    else:
        item_embeddings = ScaledEmbedding(train.num_items,
                                          h['embedding_dim'],
                                          padding_idx=0)
        user_embeddings = ScaledEmbedding(train.num_users,
                                          h['embedding_dim'],
                                          padding_idx=0)

    network = BilinearNet(train.num_users,
                          train.num_items,
                          user_embedding_layer=user_embeddings,
                          item_embedding_layer=item_embeddings)

    model = ImplicitFactorizationModel(loss=h['loss'],
                                       n_iter=h['n_iter'],
                                       batch_size=h['batch_size'],
                                       learning_rate=h['learning_rate'],
                                       embedding_dim=h['embedding_dim'],
                                       l2=h['l2'],
                                       representation=network,
                                       use_cuda=CUDA,
                                       random_state=np.random.RandomState(42))

    return model
Example #17
0
def build_factorization_model(train, random_state):

    embedding_dim = 32
    residual = True
    num_layers = 2
    embed = True
    gated = True

    item_embeddings = LSHEmbedding(train.num_items,
                                   embedding_dim,
                                   embed=embed,
                                   gated=gated,
                                   num_layers=num_layers,
                                   residual_connections=residual,
                                   num_hash_functions=1)
    item_embeddings.fit(train.tocsr().T)
    user_embeddings = LSHEmbedding(train.num_users,
                                   embedding_dim,
                                   embed=embed,
                                   gated=gated,
                                   num_layers=num_layers,
                                   residual_connections=residual,
                                   num_hash_functions=1)
    user_embeddings.fit(train.tocsr())

    network = BilinearNet(train.num_users,
                          train.num_items,
                          user_embedding_layer=user_embeddings,
                          item_embedding_layer=item_embeddings)

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=5 * 1e-2,
                                       embedding_dim=embedding_dim,
                                       l2=1e-6,
                                       representation=network,
                                       use_cuda=CUDA,
                                       random_state=np.random.RandomState(42))

    return model
    def obtener_modelos(self):
        """
        Método obtener_modelos. Obtiene, entrena y guarda el modelo escogido.

        Este método solo se utiliza en la interfaz de texto.
        """
        
        global train, modelo
        
        # Se obtiene el modelo, se entrena con parámetros por defecto y se guarda
        if self.opcion_modelo == 1:
            modelo = ExplicitFactorizationModel(loss='logistic', use_cuda=torch.cuda.is_available())
            modelo.fit(train, verbose=True)
            guardar_modelos_dl(modelo, 'el modelo de factorización explícito')
        elif self.opcion_modelo == 2:
            modelo = ImplicitFactorizationModel(loss='bpr', use_cuda=torch.cuda.is_available())
            modelo.fit(train, verbose=True)
            guardar_modelos_dl(modelo, 'el modelo de factorización implícito')
        else:
            modelo = ImplicitSequenceModel(loss='bpr',  representation='pooling', use_cuda=torch.cuda.is_available())
            modelo.fit(train, verbose=True)
            guardar_modelos_dl(modelo, 'el modelo de secuencia explícito')
    def objective(hyper):

        print(hyper)

        start = time.clock()

        if hyper['model']['type'] == 'lsh':
            num_hashes = int(hyper['model']['num_hash_functions'])
            num_layers = int(hyper['model']['num_layers'])
            nonlinearity = hyper['model']['nonlinearity']
            residual = hyper['model']['residual']
            embed = hyper['model']['embed']
            gated = hyper['model']['gated']

            item_embeddings = LSHEmbedding(train.num_items,
                                           int(hyper['embedding_dim']),
                                           embed=embed,
                                           gated=gated,
                                           residual_connections=residual,
                                           nonlinearity=nonlinearity,
                                           num_layers=num_layers,
                                           num_hash_functions=num_hashes)
            item_embeddings.fit(train.tocsr().T)
            user_embeddings = LSHEmbedding(train.num_users,
                                           int(hyper['embedding_dim']),
                                           embed=embed,
                                           gated=gated,
                                           residual_connections=residual,
                                           nonlinearity=nonlinearity,
                                           num_layers=num_layers,
                                           num_hash_functions=num_hashes)
            user_embeddings.fit(train.tocsr())
        else:
            user_embeddings = ScaledEmbedding(train.num_users,
                                              int(hyper['embedding_dim']),
                                              padding_idx=0)
            item_embeddings = ScaledEmbedding(train.num_items,
                                              int(hyper['embedding_dim']),
                                              padding_idx=0)

        network = BilinearNet(train.num_users,
                              train.num_items,
                              user_embedding_layer=user_embeddings,
                              item_embedding_layer=item_embeddings)

        model = ImplicitFactorizationModel(
            loss=hyper['loss'],
            n_iter=int(hyper['n_iter']),
            batch_size=int(hyper['batch_size']),
            learning_rate=hyper['learning_rate'],
            embedding_dim=int(hyper['embedding_dim']),
            l2=hyper['l2'],
            representation=network,
            use_cuda=CUDA,
            random_state=random_state)

        model.fit(train, verbose=True)

        elapsed = time.clock() - start

        print(model)

        validation_mrr = mrr_score(model, validation, train=train).mean()
        test_mrr = mrr_score(model,
                             test,
                             train=train.tocsr() + validation.tocsr()).mean()

        print('MRR {} {}'.format(validation_mrr, test_mrr))

        return {
            'loss': -validation_mrr,
            'status': STATUS_OK,
            'validation_mrr': validation_mrr,
            'test_mrr': test_mrr,
            'elapsed': elapsed,
            'hyper': hyper
        }
Example #20
0
 def _prep_for_fit(self, train_obs, **fit_params):
     # self.toggle_mkl_blas_1_thread(False)
     self._set_data(train_obs)
     self.set_params(**fit_params)
     self.model = ImplicitFactorizationModel(**self.model_params)
     self._set_spotlight_train_data(self.train_mat)
from spotlight.factorization.explicit import ExplicitFactorizationModel
from spotlight.factorization.implicit import ImplicitFactorizationModel

#ExplicitFactorizationModel
emodel = ExplicitFactorizationModel(n_iter=10,
                                    embedding_dim=32, 
                                    use_cuda=False)
emodel.fit(exp_train, verbose=True)
score_emodel = scoreAll(emodel)
print(calc_reciprank(exp_validation, score_emodel, train=exp_train).mean())


#ImplicitFactorizationModel
imodel = ImplicitFactorizationModel(n_iter=10,
                                    loss='bpr',
                                    embedding_dim=32, 
                                    use_cuda=False)
imodel.fit(exp_train, verbose=True)
score_imodel_32_on_exp = scoreAll(imodel)
print(calc_reciprank(exp_validation, score_imodel_32_on_exp, train=exp_train).mean())

#ImplicitFactorizationModel is more effective
#tune the number of latent factors
imodel_64 = ImplicitFactorizationModel(n_iter=10,
                                       loss='bpr',
                                    embedding_dim=64, 
                                    use_cuda=False)
imodel_64.fit(exp_train, verbose=True)
print(calc_reciprank(exp_validation, scoreAll(imodel_64), train=exp_train).mean())

imodel_128 = ImplicitFactorizationModel(n_iter=10,
    num_layers = int(args[1])
    factor_size = int(args[0])
    config["layers"] = [4 * factor_size] + [
        factor_size * (2**i) for i in range(num_layers - 1, -1, -1)
    ]
    config["latent_dim"] = 2 * factor_size
    writer.add_text('config', str(config), 0)

    rep = MLP(config)
else:
    rep = None

model = ImplicitFactorizationModel(
    n_iter=n_iters,
    loss=loss,
    notify_loss_completion=notify_loss_completion,
    notify_batch_eval_completion=notify_batch_eval_completion,
    notify_epoch_completion=notify_epoch_completion,
    log_loss_interval=log_loss_interval,
    log_eval_interval=log_eval_interval,
    betas=betas,
    learning_rate=lr,
    batch_size=batch_size,
    random_state=np.random.RandomState(2),
    num_negative_samples=num_negative_samples,
    l2=l2,
    use_cuda=use_cuda,
    representation=rep)
logger.info("Model is initialized, now fitting..")
model.fit(interactions)
Example #23
0
    print('[ %04ds ] Mapping created' % (time.time() - start_time))

    training_interactions = Interactions(
        np.array(training_user_ids), np.array(training_business_ids),
        np.array(training_ratings, dtype=np.float32))

    no_components = 30
    loss = 'pointwise'
    batch_size = 64
    learning_rate = 0.1
    l2 = 1e-7
    epochs = 8

    model = ImplicitFactorizationModel(loss=loss,
                                       embedding_dim=no_components,
                                       learning_rate=learning_rate,
                                       batch_size=batch_size,
                                       n_iter=epochs,
                                       l2=l2)

    model.fit(training_interactions, verbose=True)
    print('[ %04ds ] Model fitted' % (time.time() - start_time))

    testing_set: List[Review] = Review.load_from_file(testing_set_file)
    seen_testing_set, unseen_testing_set = Review.extract_seen_reviews(
        testing_set, training_set)
    print(len(seen_testing_set), len(unseen_testing_set))
    normalized_seen_testing_set = Review.normalize_by_user(
        seen_testing_set, user_avg)
    seen_pairs, ground_truth = Review.extract_sparse_testing_matrix_and_ground_truth(
        normalized_seen_testing_set)
    testing_user_ids = []
Example #24
0
from spotlight.evaluation import sequence_mrr_score
from spotlight.factorization.explicit import ExplicitFactorizationModel
from spotlight.factorization.implicit import ImplicitFactorizationModel
from spotlight.sequence.implicit import ImplicitSequenceModel

dataset = get_movielens_dataset(variant='100K')
train, test = random_train_test_split(dataset)


def train_and_test(model, train, test, score):
    print('Train and test {}'.format(model))
    model.fit(train, verbose=True)

    _score = score(model, test)
    print('score({}): {}'.format(score, _score))


explicit_model = ExplicitFactorizationModel(n_iter=1)
train_and_test(explicit_model, train, test, rmse_score)

implicit_model = ImplicitFactorizationModel(n_iter=3, loss='bpr')
train_and_test(implicit_model, train, test, rmse_score)

train = train.to_sequence()
test = test.to_sequence()

implicit_cnn_model = ImplicitSequenceModel(n_iter=3,
                                           representation='cnn',
                                           loss='bpr')
train_and_test(implicit_cnn_model, train, test, sequence_mrr_score)
Example #25
0
        'learning_rate': 0.0048015875347904155,
        'loss': 'adaptive_hinge',
        'n_iter': 100.0,
        'num_components': 3.0,
        'type': 'mixture'
    }

    train, validation, test = load_data('100K', random_state)

    representation = EmbeddingMixtureNet(
        train.num_users,
        train.num_items,
        num_components=int(hyper['num_components']),
        embedding_dim=int(hyper['embedding_dim']))
    # representation = BilinearNet(train.num_users,
    #                              train.num_items,
    #                              embedding_dim=int(hyper['embedding_dim']))
    model = ImplicitFactorizationModel(loss=hyper['loss'],
                                       batch_size=int(hyper['batch_size']),
                                       representation=representation,
                                       learning_rate=hyper['learning_rate'],
                                       n_iter=int(hyper['n_iter']),
                                       l2=hyper['l2'],
                                       use_cuda=CUDA,
                                       random_state=np.random.RandomState(42))
    model.fit(train, verbose=True)
    model._net.train(False)
    test_mrr = _evaluate(model, test, train.tocsr() + validation.tocsr())
    print('Test MRR {}'.format(test_mrr))
    print(model)
Example #26
0
data = pd.read_csv(train_file,
                   na_filter=False,
                   converters={"rating": np.float32})
# vdf = pd.read_csv(test_file, na_filter=False, converters={"rating": np.float32})

infos = read_info_file(train_file.parent)
num_items = int(infos.get("num_items")) + 1
# data = pd.concat([tdf, vdf], axis=0)
interactions = Interactions(data.userId.values,
                            data.movieId.values,
                            timestamps=data.timestamp.values,
                            num_items=num_items)
model = ImplicitFactorizationModel(embedding_dim=n_dimensions,
                                   n_iter=num_minor_iterations,
                                   loss='bpr',
                                   use_cuda=torch.cuda.is_available(),
                                   batch_size=batch_size,
                                   learning_rate=1e-3,
                                   l2=1e-5)

test_user_ids = data.userId.unique()  # keeps order of appearance

for i in tqdm(range(num_major_iterations)):
    print("doing it number {}".format(i))
    save_dir = sim_dir / str(i)
    if not save_dir.exists():
        save_dir.mkdir()
    model.fit(interactions, verbose=True)
    torch.save(model._net.state_dict(), save_dir / "model.pkl")

    with torch.no_grad():
Example #27
0
                exclude.getrow(user).toarray()[0] == 0)
            user_rankings = (user_rankings * exclude_interactions)
        out[user] = user_rankings.argsort()[-k:][::-1]
    return out


if __name__ == '__main__':
    """
    Train a benchmark to compare to my model
    """
    df, train, test, train_sparse = get_data()

    model = ImplicitFactorizationModel(loss='bpr',
                                       embedding_dim=32,
                                       n_iter=10,
                                       batch_size=256,
                                       l2=0.0,
                                       learning_rate=0.01,
                                       num_negative_samples=1)
    model.fit(train, verbose=True)

    rankings = rank(model, df.user.unique(), k=10, exclude=None)
    print("Train Rankings = {}".format(rankings))

    precisions, recalls = precision_recall_score(model,
                                                 train,
                                                 train=None,
                                                 k=10)
    print("Model Train precision at 10={}".format(
        sum(precisions) / len(precisions)))
Example #28
0
    foods = np.array(list(map(np.int32, data["menu_id"])))
    ratings = np.array(list(map(np.float32, data["rating"])))
    dataset = Interactions(user_ids=ids,
                           item_ids=foods,
                           ratings=ratings,
                           num_users=int(num_user),
                           num_items=int(foods_items),
                           timestamps=timeStamps)

    if name == "test":
        dataset_test = dataset
    elif name == "train":
        dataset_train = dataset

if model_mode.lower() == "ifm":
    model = ImplicitFactorizationModel(n_iter=n_iter)
if model_mode.lower() == "efm":
    model = ExplicitFactorizationModel(n_iter=n_iter)
if model_mode.lower() == "cnn":
    net = CNNNet(num_items=int(foods_items))
    model = ImplicitSequenceModel(n_iter=n_iter,
                                  use_cuda=torch.cuda.is_available(),
                                  representation=net)

model.fit(dataset_train)

with open(save_file, 'wb') as f:
    pickle.dump(model, f, pickle.HIGHEST_PROTOCOL)

if model_mode.lower() == "cnn":
    mrr = sequence_mrr_score(model, dataset_test)