def main_NeuMF(hyper_params, gpu_id=None): from pytorch_models.NeuMF import GMF, MLP, NeuMF from data import load_data from eval import evaluate, eval_ranking from utils import load_user_item_counts, is_cuda_available from utils import xavier_init, log_end_epoch from loss import MSELoss import torch user_count, item_count = load_user_item_counts(hyper_params) train_reader, test_reader, val_reader, hyper_params = load_data( hyper_params) start_time = time.time() initial_path = hyper_params['model_path'] # Pre-Training the GMF Model hyper_params['model_path'] = initial_path + "_gmf" gmf_model = GMF(hyper_params) if is_cuda_available: gmf_model = gmf_model.cuda() xavier_init(gmf_model) gmf_model = train_complete(hyper_params, GMF, train_reader, val_reader, user_count, item_count, gmf_model) # Pre-Training the MLP Model hyper_params['model_path'] = initial_path + "_mlp" mlp_model = MLP(hyper_params) if is_cuda_available: mlp_model = mlp_model.cuda() xavier_init(mlp_model) mlp_model = train_complete(hyper_params, MLP, train_reader, val_reader, user_count, item_count, mlp_model) # Training the final NeuMF Model hyper_params['model_path'] = initial_path model = NeuMF(hyper_params) if is_cuda_available: model = model.cuda() model.init(gmf_model, mlp_model) model = train_complete(hyper_params, NeuMF, train_reader, val_reader, user_count, item_count, model) # Evaluating the final model for MSE on test-set criterion = MSELoss(hyper_params) metrics, user_count_mse_map, item_count_mse_map = evaluate(model, criterion, test_reader, hyper_params, user_count, item_count, review=False) # Evaluating the final model for HR@1 on test-set metrics.update(eval_ranking(model, test_reader, hyper_params, review=False)) log_end_epoch(hyper_params, metrics, 'final', (time.time() - start_time), metrics_on='(TEST)') return metrics, user_count_mse_map, item_count_mse_map
def main_surprise(hyper_params, gpu_id=None): from data import load_data from utils import load_user_item_counts, log_end_epoch from surprise_models import Model # User and item (train-set) counts for making `user_count_mse_map`, `item_count_mse_map` user_count, item_count = load_user_item_counts(hyper_params) train_reader, test_reader, val_reader, hyper_params = load_data( hyper_params) rating_matrix = train_reader.get_surprise_format_data() model = Model(hyper_params, user_count, item_count) start_time = time.time() metrics, user_count_mse_map, item_count_mse_map = model( rating_matrix, test_reader) log_end_epoch(hyper_params, metrics, 'final', (time.time() - start_time)) return metrics, user_count_mse_map, item_count_mse_map
def main_pytorch(hyper_params, gpu_id=None): from data import load_data from eval import evaluate, eval_ranking from utils import load_obj, is_cuda_available from utils import load_user_item_counts, xavier_init, log_end_epoch from loss import MSELoss if hyper_params['model_type'] in ['deepconn', 'deepconn++']: from pytorch_models.DeepCoNN import DeepCoNN as Model elif hyper_params['model_type'] in ['transnet', 'transnet++']: from pytorch_models.TransNet import TransNet as Model elif hyper_params['model_type'] in ['NARRE']: from pytorch_models.NARRE_modify import NARRE as Model elif hyper_params['model_type'] in ['bias_only', 'MF', 'MF_dot']: from pytorch_models.MF import MF as Model import torch # Load the data readers user_count, item_count = load_user_item_counts(hyper_params) if hyper_params['model_type'] not in [ 'bias_only', 'MF', 'MF_dot', 'NeuMF' ]: review_based_model = True try: from data_fast import load_data_fast train_reader, test_reader, val_reader, hyper_params = load_data_fast( hyper_params) print( "Loaded preprocessed epoch files. Should be faster training..." ) except Exception as e: print("Tried loading preprocessed epoch files, but failed.") print( "Please consider running `prep_all_data.sh` to make quick data for DeepCoNN/TransNet/NARRE." ) print("This will save large amounts of run time.") print("Loading standard (slower) data..") train_reader, test_reader, val_reader, hyper_params = load_data( hyper_params) else: review_based_model = False train_reader, test_reader, val_reader, hyper_params = load_data( hyper_params) # Initialize the model model = Model(hyper_params) if is_cuda_available: model = model.cuda() xavier_init(model) # Train the model start_time = time.time() model = train_complete(hyper_params, Model, train_reader, val_reader, user_count, item_count, model, review=review_based_model) # Calculating MSE on test-set print("Calculating MSE on test-set") criterion = MSELoss(hyper_params) metrics, user_count_mse_map, item_count_mse_map = evaluate( model, criterion, test_reader, hyper_params, user_count, item_count, review=review_based_model) print("Calculating HR@1 on test-set") # Calculating HR@1 on test-set _, test_reader2, _, _ = load_data( hyper_params) # Needs default slow reader metrics.update( eval_ranking(model, test_reader2, hyper_params, review=review_based_model)) log_end_epoch(hyper_params, metrics, 'final', time.time() - start_time, metrics_on='(TEST)') return metrics, user_count_mse_map, item_count_mse_map
def main_MPCN(hyper_params, gpu_id=None): from utils import log_end_epoch, load_obj, load_user_item_counts # Try getting GPU ID to train MPCN on if gpu_id is None: if "CUDA_VISIBLE_DEVICES" in os.environ: gpu_id = os.environ["CUDA_VISIBLE_DEVICES"] else: gpu_id = 0 # Run MPCN (needs a python 2 environment) start_time = time.time() command = "bash run_MPCN_in_p2.sh " + hyper_params['data_dir'] command += " " + str(gpu_id) + " " + str(hyper_params['latent_size']) os.system(command) # This is where MPCN training and evaluation log file would be log_path = "logs/" + hyper_params['data_dir'] log_path += "RAW_MSE_MPCN_FN_FM/log/logs.txt" # Reading the log file to extract best MSE and HR@1 f = open(log_path, 'r') lines = f.readlines() f.close() best_mse, hr = float(INF), None for line_num, line in enumerate(lines): if line[:10] == "[Test] MSE": mse = float(line.strip().split("=")[-1]) if mse < best_mse: best_mse = mse hr = float(lines[line_num + 1][5:].strip()) metrics = { 'MSE': round(best_mse, 4), 'HR@1': hr, 'dataset': hyper_params['dataset'], } log_end_epoch(hyper_params, metrics, 'FINAL', (time.time() - start_time)) # Loading test data and user, item counts test_data = load_obj(hyper_params['data_dir'] + 'test') user_count, item_count = load_user_item_counts(hyper_params) # Load saved test predictions f = open("logs/" + hyper_params['data_dir'] + "test_preds.txt", "r") lines = f.readlines() f.close() test_results = [float(i.strip()) for i in lines] assert len(test_results) == len(test_data) # Getting the `user_count_mse_map` and `item_count_mse_map` user_count_mse_map, item_count_mse_map = {}, {} for i in range(len(test_data)): user, item = int(test_data[i][0]), int(test_data[i][1]) pred, rating = float(test_results[i]), float(test_data[i][2]) user_c, item_c = 0, 0 if user in user_count: user_c = user_count[user] if item in item_count: item_c = item_count[item] mse = (rating - pred)**2 if user_c not in user_count_mse_map: user_count_mse_map[user_c] = [] if item_c not in item_count_mse_map: item_count_mse_map[item_c] = [] user_count_mse_map[user_c].append(mse) item_count_mse_map[item_c].append(mse) return metrics, user_count_mse_map, item_count_mse_map