if learner.lower() == "adagrad": model.compile(optimizer=Adagrad(lr=learning_rate), loss='binary_crossentropy') elif learner.lower() == "rmsprop": model.compile(optimizer=RMSprop(lr=learning_rate), loss='binary_crossentropy') elif learner.lower() == "adam": model.compile(optimizer=Adam(lr=learning_rate), loss='binary_crossentropy') else: model.compile(optimizer=SGD(lr=learning_rate), loss='binary_crossentropy') # Load pretrain model if mf_pretrain != '' and mlp_pretrain != '': gmf_model = GMF.get_model(num_users, num_items, mf_dim) gmf_model.load_weights(mf_pretrain) mlp_model = MLP.get_model(num_users, num_items, layers, reg_layers) mlp_model.load_weights(mlp_pretrain) model = load_pretrain_model(model, gmf_model, mlp_model, len(layers)) print("Load pretrained GMF (%s) and MLP (%s) models done. " % (mf_pretrain, mlp_pretrain)) # Init performance (hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, topK, evaluation_threads) hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean() print('Init: HR = %.4f, NDCG = %.4f' % (hr, ndcg)) best_hr, best_ndcg, best_iter = hr, ndcg, -1 if args.out > 0: model.save_weights(model_out_file, overwrite=True)
print "number of training epochs for pretrain and full model is " + str(num_pretrain_epochs) num_final_epochs = num_pretrain_epochs data_management.load_data() interaction_mx = np.load('input/int_mat.npy') inputs, labels = data_management.training_data_generation('input/training_data.npy', 'input/int_mat.npy') labels = keras.utils.to_categorical(labels, 6) # pretrain MLP MLP.train_mlp(num_predictive_factors=num_predictive_factors, batch_size=batch_size, epochs=num_pretrain_epochs, interaction_mx=interaction_mx, inputs=inputs, labels=labels) # pretrain GMF GMF.train_gmf(num_predictive_factors=num_predictive_factors, batch_size=batch_size, epochs=num_pretrain_epochs, interaction_mx=interaction_mx, inputs=inputs, labels=labels) # check out the shared vision guide at https://keras.io/getting-started/functional-api-guide/ user_input = Input(shape=(1,), name='user_input') item_input = Input(shape=(1,), name='item_input') # ----- MLP Model ----- mlp = MLP.create_model(num_users=interaction_mx.shape[0], num_items=interaction_mx.shape[1], num_predictive_factors=num_predictive_factors, pretrain=False) mlp_output = mlp([user_input, item_input]) # ----- GMF Model -----
num_books = r.book_id.max() num_users = r.user_id.max() reader = Reader(rating_scale=(1, 5)) data = Dataset.load_from_df(r[['user_id', 'book_id', 'rating']], reader) trainset, testset = train_test_split(data, test_size=0.2, random_state=8) algoStats = {} ######## MLP results ########## model2 = MLP.get_model(num_users, num_books, layers=[64, 32, 16, 8], reg_layers=[0, 0, 0, 0]) modelFile = "Pretrain/gb-10k_MLP_[64,32,16,8]_trainset.h5" model2.load_weights(modelFile) algoStats["MLP"] = rankInTopK(model2, testset, num_books, k=1000) ######## GMF results ########## model = GMF.get_model(num_users, num_books, 8) modelFile = "Pretrain/gb-10k_GMF_8_trainset.h5" model.load_weights(modelFile) algoStats["GMF"] = rankInTopK(model, testset, num_books, k=1000) ######## NeuMF results ########## model3 = NeuMF.get_model(num_users, num_books, mf_dim=8, layers=[64, 32, 16, 8], reg_layers=[0, 0, 0, 0], reg_mf=0) modelFile = "Pretrain/gb-10k_NeuMF_8_[64,32,16,8]_trainset.h5" model3.load_weights(modelFile) algoStats["NeuMF"] = rankInTopK(model3, testset, num_books, k=1000) with open("NeuCFstats.json", 'w') as inp:
dimensions = np.load(dimensions_file) inputs, labels = data_management_yelp.training_data_generation( 'input/training_data.npy', 'input/training_reviews.npy') print(labels[0:100]) # pretrain MLP MLP.train_mlp(num_predictive_factors=num_predictive_factors, batch_size=batch_size, epochs=num_pretrain_epochs, dimensions=dimensions, inputs=inputs, labels=labels) # pretrain GMF GMF.train_gmf(num_predictive_factors=num_predictive_factors, batch_size=batch_size, epochs=num_pretrain_epochs, dimensions=dimensions, inputs=inputs, labels=labels) # check out the shared vision guide at https://keras.io/getting-started/functional-api-guide/ user_input = Input(shape=(1, ), name='user_input') item_input = Input(shape=(1, ), name='item_input') review_input = Input(shape=(100, ), name='review_input') # ----- MLP Model ----- mlp = MLP.create_model(num_users=dimensions[0], num_items=dimensions[1], num_predictive_factors=num_predictive_factors, pretrain=False) mlp_output = mlp([user_input, item_input])
def train_GMF(self, outpath, data): print('pretraining MLP model ...') self.gmf = GMF.main(outpath, data=data)
def fit(name_data='100k', batch_size=2048): #args = parse_args() args = Args() num_epochs = args.epochs #batch_size = args.batch_size mf_dim = args.num_factors layers = eval(args.layers) reg_mf = eval(args.reg_mf) reg_layers = eval(args.reg_layers) num_negatives = args.num_neg learning_rate = args.lr learner = args.learner verbose = args.verbose mf_pretrain = args.mf_pretrain mlp_pretrain = args.mlp_pretrain num_tasks = args.num_tasks # Override args args.dataset = name_data args.batch_size = batch_size topK = 10 evaluation_threads = 1#mp.cpu_count() print("NeuMF arguments: %s " %(args)) model_out_file = 'Pretrain/%s_MNeuMF_%d_%s_%d.h5' %(args.dataset, mf_dim, args.layers, time()) result_out_file = 'outputs/%s_MNeuMF_%d_%s_%d.csv' %(args.dataset, mf_dim, args.layers, time()) # Loading data t1 = time() if args.dataset=='1m': num_users = 6040 num_items = 3706 elif args.dataset=='100k': num_users = 671 num_items = 9125 else: raise Exception('wrong dataset size!!!') dataset = Dataset(args.path, args.dataset) train, testRatings, testNegatives, genreList = dataset.train_ratings, dataset.test_ratings, dataset.negatives, dataset.genre print("Load data done [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d" %(time()-t1, num_users, num_items, train.shape[0], testRatings.shape[0])) # Build model model = get_model(num_users, num_items, num_tasks, mf_dim, layers, reg_layers, reg_mf) if learner.lower() == "adagrad": model.compile(optimizer=Adagrad(lr=learning_rate), loss='binary_crossentropy') elif learner.lower() == "rmsprop": model.compile(optimizer=RMSprop(lr=learning_rate), loss='binary_crossentropy') elif learner.lower() == "adam": model.compile(optimizer=Adam(lr=learning_rate), loss='binary_crossentropy') else: model.compile(optimizer=SGD(lr=learning_rate), loss='binary_crossentropy') # Load pretrain model if mf_pretrain != '' and mlp_pretrain != '': gmf_model = GMF.get_model(num_users,num_items,mf_dim) gmf_model.load_weights(mf_pretrain) mlp_model = MLP.get_model(num_users,num_items, layers, reg_layers) mlp_model.load_weights(mlp_pretrain) model = load_pretrain_model(model, gmf_model, mlp_model, len(layers)) print("Load pretrained GMF (%s) and MLP (%s) models done. " %(mf_pretrain, mlp_pretrain)) # Init performance (hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, genreList, topK, evaluation_threads) hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean() print('Init: HR = %.4f, NDCG = %.4f' % (hr, ndcg)) best_hr, best_ndcg, best_iter = hr, ndcg, -1 if args.out > 0: model.save_weights(model_out_file, overwrite=True) # save Hit ratio and ndcg, loss output = pd.DataFrame(columns=['hr', 'ndcg']) output.loc[0] = [hr, ndcg] # Generate training instances user_input, item_input, labels = get_train_instances(train, num_negatives) genre_input = item_to_onehot_genre(item_input, genreList) # Training model for epoch in range(int(num_epochs)): t1 = time() # Training hist = model.fit([np.array(user_input), np.array(item_input), genre_input], #input np.array(labels), # labels batch_size=batch_size, epochs=1, verbose=verbose, shuffle=True) t2 = time() # Evaluation if epoch %1 == 0: (hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, genreList, topK, evaluation_threads) hr, ndcg, loss = np.array(hits).mean(), np.array(ndcgs).mean(), hist.history['loss'][0] print('Iteration %d [%.1f s]: HR = %.4f, NDCG = %.4f, loss = %.4f [%.1f s]' % (epoch, t2-t1, hr, ndcg, loss, time()-t2)) output.loc[epoch+1] = [hr, ndcg] if hr > best_hr: best_hr, best_ndcg, best_iter = hr, ndcg, epoch if args.out > 0: model.save_weights(model_out_file, overwrite=True) print("End. Best Iteration %d: HR = %.4f, NDCG = %.4f. " %(best_iter, best_hr, best_ndcg)) if args.out > 0: print("The best NeuMF model is saved to %s" %(model_out_file)) output.to_csv(result_out_file) return([best_iter, best_hr, best_ndcg])
if __name__=="__main__": data_file = os.path.join(args.data_path, args.data_set) train_data, test_data, user_num, item_num, train_mat = data_util.load_all(data_file) train_dataset = data_util.NCFData(train_data, item_num, train_mat, args.num_ng, True) test_dataset = data_util.NCFData(test_data, item_num, train_mat, 0, False) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) test_loader = data.DataLoader(test_dataset, batch_size=args.test_num_ng + 1, shuffle=False, num_workers=0) GMF_model_path = os.path.join(args.model_path, 'GMF.pth') MLP_model_path = os.path.join(args.model_path, 'MLP.pth') if args.use_pretrained: assert os.path.exists(GMF_model_path), 'lack of GMF model' assert os.path.exists(MLP_model_path), 'lack of MLP model' GMF_model = GMF.GMF(user_num, item_num, args.embedding_dim_GMF, args.dropout) GMF_model.load_state_dict(torch.load(GMF_model_path)) MLP_model = MLP.MLP(user_num, item_num, args.embedding_dim_MLP, args.hidden_layer_MLP, args.dropout) MLP_model.load_state_dict(torch.load(MLP_model_path)) else: GMF_model = None MLP_model = None model = NeuMF(user_num, item_num, args.embedding_dim_GMF, args.embedding_dim_MLP, args.hidden_layer_MLP, args.dropout, GMF_model, MLP_model) model.to(device=args.device) loss_function = nn.BCEWithLogitsLoss() if args.use_pretrained: optimizer = optim.SGD(model.parameters(), lr=args.lr) else:
%(time()-t1, num_users, num_items, train.nnz, len(testRatings)))) # Build model model = get_model(num_users, num_items, num_factors, layers, reg_layers, reg_mf) if learner.lower() == "adagrad": model.compile(optimizer=Adagrad(lr=learning_rate), loss='binary_crossentropy') elif learner.lower() == "rmsprop": model.compile(optimizer=RMSprop(lr=learning_rate), loss='binary_crossentropy') elif learner.lower() == "adam": model.compile(optimizer=Adam(lr=learning_rate), loss='binary_crossentropy') else: model.compile(optimizer=SGD(lr=learning_rate), loss='binary_crossentropy') # Load pretrain model if mf_pretrain != '' and mlp_pretrain != '': gmf_model = GMF.get_model(num_users, num_items, num_factors) gmf_model.load_weights(mf_pretrain) mlp_model = MLP.get_model(num_users, num_items, num_factors, layers, reg_layers) mlp_model.load_weights(mlp_pretrain) model = load_pretrain_model(model, gmf_model, mlp_model, len(layers)) print(("Load pretrained GMF (%s) and MLP (%s) models done. " %(mf_pretrain, mlp_pretrain))) # Init performance (hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, topK, evaluation_threads) hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean() print(('Init: HR = %.4f, NDCG = %.4f' % (hr, ndcg))) best_hr, best_ndcg, best_iter = hr, ndcg, -1 if args.out > 0: model.save_weights(model_out_file, overwrite=True) # Training model