Ejemplo n.º 1
0
def implicit_model(train_val_implicit_data, gpu_count):
    train, val = train_val_implicit_data
    model = MatrixFactorizationModel(train=train,
                                     val=val,
                                     embedding_dim=10,
                                     lr=1e-1)
    model_trainer = CollieTrainer(model=model,
                                  gpus=gpu_count,
                                  max_epochs=10,
                                  deterministic=True,
                                  logger=False,
                                  checkpoint_callback=False)
    model_trainer.fit(model)
    model.freeze()

    return model
Ejemplo n.º 2
0
def models_trained_for_one_step(request,
                                train_val_implicit_data,
                                movielens_metadata_df,
                                movielens_implicit_df,
                                train_val_implicit_pandas_data,
                                gpu_count):
    train, val = train_val_implicit_data

    if request.param == 'mf_hdf5':
        # create, fit, and return the model all at once so we can close the HDF5 file
        train_pandas_df, val_pandas_df = train_val_implicit_pandas_data

        with tempfile.TemporaryDirectory() as temp_dir:
            pandas_df_to_hdf5(df=train_pandas_df,
                              out_path=os.path.join(temp_dir, 'train.h5'),
                              key='interactions')
            pandas_df_to_hdf5(df=val_pandas_df,
                              out_path=os.path.join(temp_dir, 'val.h5'),
                              key='interactions')

            train_loader = HDF5InteractionsDataLoader(hdf5_path=os.path.join(temp_dir, 'train.h5'),
                                                      user_col='user_id',
                                                      item_col='item_id',
                                                      num_users=train.num_users,
                                                      num_items=train.num_items,
                                                      batch_size=1024,
                                                      shuffle=True)
            val_loader = HDF5InteractionsDataLoader(hdf5_path=os.path.join(temp_dir, 'val.h5'),
                                                    user_col='user_id',
                                                    item_col='item_id',
                                                    num_users=val.num_users,
                                                    num_items=val.num_items,
                                                    batch_size=1024,
                                                    shuffle=False)

            model = MatrixFactorizationModel(train=train_loader,
                                             val=val_loader,
                                             embedding_dim=15,
                                             dropout_p=0.1,
                                             lr=1e-1,
                                             bias_lr=1e-2,
                                             optimizer='adam',
                                             bias_optimizer='sgd',
                                             weight_decay=1e-7,
                                             loss='bpr',
                                             sparse=False)

            model_trainer = CollieTrainer(model=model,
                                          gpus=gpu_count,
                                          max_steps=1,
                                          deterministic=True,
                                          logger=False,
                                          checkpoint_callback=False)

            model_trainer.fit(model)
            model.freeze()

            return model

    elif request.param == 'sparse_mf':
        model = MatrixFactorizationModel(train=train,
                                         val=val,
                                         embedding_dim=15,
                                         dropout_p=0.1,
                                         lr=1e-1,
                                         bias_lr=1e-2,
                                         optimizer='sparse_adam',
                                         bias_optimizer='sgd',
                                         weight_decay=0,
                                         loss='hinge',
                                         sparse=True)
    elif request.param == 'mf_no_val':
        model = MatrixFactorizationModel(train=train, val=None)
    elif request.param == 'mf_non_approximate' or request.param == 'mf_approximate':
        if request.param == 'mf_non_approximate':
            train_loader = InteractionsDataLoader(interactions=train, batch_size=1024, shuffle=True)
            val_loader = InteractionsDataLoader(interactions=val, batch_size=1024, shuffle=False)
        else:
            train_loader = ApproximateNegativeSamplingInteractionsDataLoader(interactions=train,
                                                                             batch_size=1024,
                                                                             shuffle=True)
            val_loader = ApproximateNegativeSamplingInteractionsDataLoader(interactions=val,
                                                                           batch_size=1024,
                                                                           shuffle=False)

        model = MatrixFactorizationModel(train=train_loader,
                                         val=val_loader,
                                         embedding_dim=15,
                                         dropout_p=0.1,
                                         lr=1e-1,
                                         bias_lr=1e-2,
                                         optimizer='adam',
                                         bias_optimizer='sgd',
                                         weight_decay=1e-7,
                                         loss='bpr',
                                         sparse=False)
    elif request.param == 'mf_with_y_range':
        model = MatrixFactorizationModel(train=train,
                                         val=val,
                                         y_range=(0, 4))
    elif request.param == 'nonlinear_mf':
        model = NonlinearMatrixFactorizationModel(train=train,
                                                  val=val,
                                                  user_embedding_dim=15,
                                                  item_embedding_dim=15,
                                                  user_dense_layers_dims=[15, 10],
                                                  item_dense_layers_dims=[15, 10],
                                                  embedding_dropout_p=0.05,
                                                  dense_dropout_p=0.1,
                                                  lr=1e-1,
                                                  bias_lr=1e-2,
                                                  optimizer='adam',
                                                  bias_optimizer='sgd',
                                                  weight_decay=1e-7,
                                                  loss='bpr')
    elif request.param == 'nonlinear_mf_with_y_range':
        model = NonlinearMatrixFactorizationModel(train=train,
                                                  val=val,
                                                  y_range=(0, 4))
    elif request.param == 'neucf':
        model = NeuralCollaborativeFiltering(train=train,
                                             val=val,
                                             embedding_dim=10,
                                             num_layers=1,
                                             dropout_p=0.1,
                                             lr=1e-3,
                                             weight_decay=0.,
                                             optimizer='adam',
                                             loss='adaptive')
    elif request.param == 'neucf_sigmoid':
        model = NeuralCollaborativeFiltering(train=train,
                                             val=val,
                                             final_layer='sigmoid')
    elif request.param == 'neucf_relu':
        model = NeuralCollaborativeFiltering(train=train,
                                             val=val,
                                             final_layer='relu')
    elif request.param == 'neucf_leaky_rulu':
        model = NeuralCollaborativeFiltering(train=train,
                                             val=val,
                                             final_layer='leaky_relu')
    elif request.param == 'neucf_custom':
        model = NeuralCollaborativeFiltering(train=train,
                                             val=val,
                                             final_layer=torch.tanh)
    elif (
        request.param == 'hybrid_pretrained' or request.param == 'hybrid_pretrained_metadata_layers'
    ):
        implicit_model = MatrixFactorizationModel(train=train,
                                                  val=val,
                                                  embedding_dim=10,
                                                  lr=1e-1,
                                                  optimizer='adam')
        implicit_model_trainer = CollieTrainer(model=implicit_model,
                                               gpus=gpu_count,
                                               max_steps=1,
                                               deterministic=True,
                                               logger=False,
                                               checkpoint_callback=False)
        implicit_model_trainer.fit(implicit_model)
        implicit_model.freeze()

        genres = (
            torch.tensor(movielens_metadata_df[
                [c for c in movielens_metadata_df.columns if 'genre' in c]
            ].values)
            .topk(1)
            .indices
            .view(-1)
        )

        if request.param == 'hybrid_pretrained_metadata_layers':
            metadata_layers_dims = [16, 12]
        else:
            metadata_layers_dims = None

        model_frozen = HybridPretrainedModel(train=train,
                                             val=val,
                                             item_metadata=movielens_metadata_df,
                                             trained_model=implicit_model,
                                             metadata_layers_dims=metadata_layers_dims,
                                             freeze_embeddings=True,
                                             dropout_p=0.15,
                                             loss='warp',
                                             lr=.01,
                                             optimizer=torch.optim.Adam,
                                             metadata_for_loss={'genre': genres},
                                             metadata_for_loss_weights={'genre': .4},
                                             weight_decay=0.0)
        model_frozen_trainer = CollieTrainer(model=model_frozen,
                                             gpus=gpu_count,
                                             max_steps=1,
                                             deterministic=True,
                                             logger=False,
                                             checkpoint_callback=False)
        model_frozen_trainer.fit(model_frozen)

        model = HybridPretrainedModel(train=train,
                                      val=val,
                                      item_metadata=movielens_metadata_df,
                                      trained_model=implicit_model,
                                      metadata_layers_dims=metadata_layers_dims,
                                      freeze_embeddings=False,
                                      dropout_p=0.15,
                                      loss='bpr',
                                      lr=1e-4,
                                      optimizer=torch.optim.Adam,
                                      metadata_for_loss={'genre': genres},
                                      metadata_for_loss_weights={'genre': .4},
                                      weight_decay=0.0)
        model.load_from_hybrid_model(model_frozen)

    model_trainer = CollieTrainer(model=model,
                                  gpus=gpu_count,
                                  max_steps=1,
                                  deterministic=True,
                                  logger=False,
                                  checkpoint_callback=False)

    if request.param == 'mf_no_val':
        with pytest.warns(UserWarning):
            model_trainer.fit(model)
    else:
        model_trainer.fit(model)

    model.freeze()

    return model