def main_surprise(hyper_params, gpu_id=None): from data import load_data from utils import load_user_item_counts, log_end_epoch from surprise_models import Model # User and item (train-set) counts for making `user_count_mse_map`, `item_count_mse_map` user_count, item_count = load_user_item_counts(hyper_params) train_reader, test_reader, val_reader, hyper_params = load_data( hyper_params) rating_matrix = train_reader.get_surprise_format_data() model = Model(hyper_params, user_count, item_count) start_time = time.time() metrics, user_count_mse_map, item_count_mse_map = model( rating_matrix, test_reader) log_end_epoch(hyper_params, metrics, 'final', (time.time() - start_time)) return metrics, user_count_mse_map, item_count_mse_map
def train_complete(hyper_params, Model, train_reader, val_reader, user_count, item_count, model, review=True): import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable from loss import MSELoss from eval import evaluate, eval_ranking from utils import file_write, is_cuda_available, load_obj, log_end_epoch, init_transnet_optim # file_write(hyper_params['log_file'], "\n\nSimulation run on: " + str(dt.datetime.now()) + "\n\n") # file_write(hyper_params['log_file'], "Data reading complete!") # file_write(hyper_params['log_file'], "Number of train batches: {:4d}".format(len(train_reader))) # file_write(hyper_params['log_file'], "Number of validation batches: {:4d}".format(len(val_reader))) # criterion = MSELoss(hyper_params) # if hyper_params['model_type'] in [ 'transnet', 'transnet++' ]: # optimizer = init_transnet_optim(hyper_params, model) # # else: # optimizer = torch.optim.Adam( # model.parameters(), lr=hyper_params['lr'], weight_decay=hyper_params['weight_decay'] # ) # file_write(hyper_params['log_file'], str(model)) # file_write(hyper_params['log_file'], "\nModel Built!\nStarting Training...\n") # try: # best_MSE = float(INF) # for epoch in range(1, hyper_params['epochs'] + 1): # epoch_start_time = time.time() # # # Training for one epoch # metrics = train( # model, criterion, optimizer, train_reader, hyper_params # ) # metrics['dataset'] = hyper_params['dataset'] # # log_end_epoch(hyper_params, metrics, epoch, time.time() - epoch_start_time, metrics_on = '(TRAIN)') # # # Calulating the metrics on the validation set # metrics, _, _ = evaluate( # model, criterion, val_reader, hyper_params, # user_count, item_count, review = review # ) # metrics['dataset'] = hyper_params['dataset'] # log_end_epoch(hyper_params, metrics, epoch, time.time() - epoch_start_time, metrics_on = '(VAL)') # # # Save best model on validation set # if metrics['MSE'] < best_MSE: # print("Saving model...") # torch.save(model.state_dict(), hyper_params['model_path']) # best_MSE = metrics['MSE'] # # except KeyboardInterrupt: print('Exiting from training early') # Load best model and return it for evaluation on test-set model = Model(hyper_params) if is_cuda_available: model = model.cuda() model.load_state_dict(torch.load(hyper_params['model_path'])) model.eval() return model
def main_pytorch(hyper_params, gpu_id=None): from data import load_data from eval import evaluate, eval_ranking from utils import load_obj, is_cuda_available from utils import load_user_item_counts, xavier_init, log_end_epoch from loss import MSELoss if hyper_params['model_type'] in ['deepconn', 'deepconn++']: from pytorch_models.DeepCoNN import DeepCoNN as Model elif hyper_params['model_type'] in ['transnet', 'transnet++']: from pytorch_models.TransNet import TransNet as Model elif hyper_params['model_type'] in ['NARRE']: from pytorch_models.NARRE_modify import NARRE as Model elif hyper_params['model_type'] in ['bias_only', 'MF', 'MF_dot']: from pytorch_models.MF import MF as Model import torch # Load the data readers user_count, item_count = load_user_item_counts(hyper_params) if hyper_params['model_type'] not in [ 'bias_only', 'MF', 'MF_dot', 'NeuMF' ]: review_based_model = True try: from data_fast import load_data_fast train_reader, test_reader, val_reader, hyper_params = load_data_fast( hyper_params) print( "Loaded preprocessed epoch files. Should be faster training..." ) except Exception as e: print("Tried loading preprocessed epoch files, but failed.") print( "Please consider running `prep_all_data.sh` to make quick data for DeepCoNN/TransNet/NARRE." ) print("This will save large amounts of run time.") print("Loading standard (slower) data..") train_reader, test_reader, val_reader, hyper_params = load_data( hyper_params) else: review_based_model = False train_reader, test_reader, val_reader, hyper_params = load_data( hyper_params) # Initialize the model model = Model(hyper_params) if is_cuda_available: model = model.cuda() xavier_init(model) # Train the model start_time = time.time() model = train_complete(hyper_params, Model, train_reader, val_reader, user_count, item_count, model, review=review_based_model) # Calculating MSE on test-set print("Calculating MSE on test-set") criterion = MSELoss(hyper_params) metrics, user_count_mse_map, item_count_mse_map = evaluate( model, criterion, test_reader, hyper_params, user_count, item_count, review=review_based_model) print("Calculating HR@1 on test-set") # Calculating HR@1 on test-set _, test_reader2, _, _ = load_data( hyper_params) # Needs default slow reader metrics.update( eval_ranking(model, test_reader2, hyper_params, review=review_based_model)) log_end_epoch(hyper_params, metrics, 'final', time.time() - start_time, metrics_on='(TEST)') return metrics, user_count_mse_map, item_count_mse_map