Exemple #1
0
def main_surprise(hyper_params, gpu_id=None):
    from data import load_data
    from utils import load_user_item_counts, log_end_epoch
    from surprise_models import Model

    # User and item (train-set) counts for making `user_count_mse_map`, `item_count_mse_map`
    user_count, item_count = load_user_item_counts(hyper_params)

    train_reader, test_reader, val_reader, hyper_params = load_data(
        hyper_params)
    rating_matrix = train_reader.get_surprise_format_data()
    model = Model(hyper_params, user_count, item_count)

    start_time = time.time()
    metrics, user_count_mse_map, item_count_mse_map = model(
        rating_matrix, test_reader)
    log_end_epoch(hyper_params, metrics, 'final', (time.time() - start_time))

    return metrics, user_count_mse_map, item_count_mse_map
Exemple #2
0
def train_complete(hyper_params,
                   Model,
                   train_reader,
                   val_reader,
                   user_count,
                   item_count,
                   model,
                   review=True):
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from torch.autograd import Variable

    from loss import MSELoss
    from eval import evaluate, eval_ranking
    from utils import file_write, is_cuda_available, load_obj, log_end_epoch, init_transnet_optim

    # file_write(hyper_params['log_file'], "\n\nSimulation run on: " + str(dt.datetime.now()) + "\n\n")
    # file_write(hyper_params['log_file'], "Data reading complete!")
    # file_write(hyper_params['log_file'], "Number of train batches: {:4d}".format(len(train_reader)))
    # file_write(hyper_params['log_file'], "Number of validation batches: {:4d}".format(len(val_reader)))

    # criterion = MSELoss(hyper_params)

    # if hyper_params['model_type'] in [ 'transnet', 'transnet++' ]:
    #     optimizer = init_transnet_optim(hyper_params, model)
    #
    # else:
    #     optimizer = torch.optim.Adam(
    #         model.parameters(), lr=hyper_params['lr'], weight_decay=hyper_params['weight_decay']
    #     )

    # file_write(hyper_params['log_file'], str(model))
    # file_write(hyper_params['log_file'], "\nModel Built!\nStarting Training...\n")

    # try:
    #     best_MSE = float(INF)

    #     for epoch in range(1, hyper_params['epochs'] + 1):
    #         epoch_start_time = time.time()
    #
    #         # Training for one epoch
    #         metrics = train(
    #             model, criterion, optimizer, train_reader, hyper_params
    #         )
    #         metrics['dataset'] = hyper_params['dataset']
    #         # log_end_epoch(hyper_params, metrics, epoch, time.time() - epoch_start_time, metrics_on = '(TRAIN)')
    #
    #         # Calulating the metrics on the validation set
    #         metrics, _, _ = evaluate(
    #             model, criterion, val_reader, hyper_params,
    #             user_count, item_count, review = review
    #         )
    #         metrics['dataset'] = hyper_params['dataset']
    #         log_end_epoch(hyper_params, metrics, epoch, time.time() - epoch_start_time, metrics_on = '(VAL)')
    #
    #         # Save best model on validation set
    #         if metrics['MSE'] < best_MSE:
    #             print("Saving model...")
    #             torch.save(model.state_dict(), hyper_params['model_path'])
    #             best_MSE = metrics['MSE']
    #
    # except KeyboardInterrupt: print('Exiting from training early')

    # Load best model and return it for evaluation on test-set
    model = Model(hyper_params)
    if is_cuda_available: model = model.cuda()
    model.load_state_dict(torch.load(hyper_params['model_path']))
    model.eval()

    return model
Exemple #3
0
def main_pytorch(hyper_params, gpu_id=None):
    from data import load_data
    from eval import evaluate, eval_ranking
    from utils import load_obj, is_cuda_available
    from utils import load_user_item_counts, xavier_init, log_end_epoch
    from loss import MSELoss

    if hyper_params['model_type'] in ['deepconn', 'deepconn++']:
        from pytorch_models.DeepCoNN import DeepCoNN as Model
    elif hyper_params['model_type'] in ['transnet', 'transnet++']:
        from pytorch_models.TransNet import TransNet as Model
    elif hyper_params['model_type'] in ['NARRE']:
        from pytorch_models.NARRE_modify import NARRE as Model
    elif hyper_params['model_type'] in ['bias_only', 'MF', 'MF_dot']:
        from pytorch_models.MF import MF as Model

    import torch

    # Load the data readers
    user_count, item_count = load_user_item_counts(hyper_params)
    if hyper_params['model_type'] not in [
            'bias_only', 'MF', 'MF_dot', 'NeuMF'
    ]:
        review_based_model = True
        try:
            from data_fast import load_data_fast
            train_reader, test_reader, val_reader, hyper_params = load_data_fast(
                hyper_params)
            print(
                "Loaded preprocessed epoch files. Should be faster training..."
            )
        except Exception as e:
            print("Tried loading preprocessed epoch files, but failed.")
            print(
                "Please consider running `prep_all_data.sh` to make quick data for DeepCoNN/TransNet/NARRE."
            )
            print("This will save large amounts of run time.")
            print("Loading standard (slower) data..")
            train_reader, test_reader, val_reader, hyper_params = load_data(
                hyper_params)
    else:
        review_based_model = False
        train_reader, test_reader, val_reader, hyper_params = load_data(
            hyper_params)

    # Initialize the model
    model = Model(hyper_params)
    if is_cuda_available: model = model.cuda()
    xavier_init(model)

    # Train the model
    start_time = time.time()
    model = train_complete(hyper_params,
                           Model,
                           train_reader,
                           val_reader,
                           user_count,
                           item_count,
                           model,
                           review=review_based_model)

    # Calculating MSE on test-set
    print("Calculating MSE on test-set")
    criterion = MSELoss(hyper_params)
    metrics, user_count_mse_map, item_count_mse_map = evaluate(
        model,
        criterion,
        test_reader,
        hyper_params,
        user_count,
        item_count,
        review=review_based_model)
    print("Calculating HR@1 on test-set")
    # Calculating HR@1 on test-set
    _, test_reader2, _, _ = load_data(
        hyper_params)  # Needs default slow reader
    metrics.update(
        eval_ranking(model,
                     test_reader2,
                     hyper_params,
                     review=review_based_model))

    log_end_epoch(hyper_params,
                  metrics,
                  'final',
                  time.time() - start_time,
                  metrics_on='(TEST)')

    return metrics, user_count_mse_map, item_count_mse_map