Esempio n. 1
0
    if learner.lower() == "adagrad":
        model.compile(optimizer=Adagrad(lr=learning_rate),
                      loss='binary_crossentropy')
    elif learner.lower() == "rmsprop":
        model.compile(optimizer=RMSprop(lr=learning_rate),
                      loss='binary_crossentropy')
    elif learner.lower() == "adam":
        model.compile(optimizer=Adam(lr=learning_rate),
                      loss='binary_crossentropy')
    else:
        model.compile(optimizer=SGD(lr=learning_rate),
                      loss='binary_crossentropy')

    # Load pretrain model
    if mf_pretrain != '' and mlp_pretrain != '':
        gmf_model = GMF.get_model(num_users, num_items, mf_dim)
        gmf_model.load_weights(mf_pretrain)
        mlp_model = MLP.get_model(num_users, num_items, layers, reg_layers)
        mlp_model.load_weights(mlp_pretrain)
        model = load_pretrain_model(model, gmf_model, mlp_model, len(layers))
        print("Load pretrained GMF (%s) and MLP (%s) models done. " %
              (mf_pretrain, mlp_pretrain))

    # Init performance
    (hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, topK,
                                   evaluation_threads)
    hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
    print('Init: HR = %.4f, NDCG = %.4f' % (hr, ndcg))
    best_hr, best_ndcg, best_iter = hr, ndcg, -1
    if args.out > 0:
        model.save_weights(model_out_file, overwrite=True)
Esempio n. 2
0
        print "number of training epochs for pretrain and full model is " + str(num_pretrain_epochs)


num_final_epochs = num_pretrain_epochs



data_management.load_data()
interaction_mx = np.load('input/int_mat.npy')
inputs, labels = data_management.training_data_generation('input/training_data.npy', 'input/int_mat.npy')
labels = keras.utils.to_categorical(labels, 6)
# pretrain MLP
MLP.train_mlp(num_predictive_factors=num_predictive_factors, batch_size=batch_size, epochs=num_pretrain_epochs,
              interaction_mx=interaction_mx, inputs=inputs, labels=labels)
# pretrain GMF
GMF.train_gmf(num_predictive_factors=num_predictive_factors, batch_size=batch_size, epochs=num_pretrain_epochs,
              interaction_mx=interaction_mx, inputs=inputs, labels=labels)

# check out the shared vision guide at https://keras.io/getting-started/functional-api-guide/
user_input = Input(shape=(1,), name='user_input')
item_input = Input(shape=(1,), name='item_input')


# ----- MLP Model -----
mlp = MLP.create_model(num_users=interaction_mx.shape[0],
                       num_items=interaction_mx.shape[1],
                       num_predictive_factors=num_predictive_factors,
                       pretrain=False)
mlp_output = mlp([user_input, item_input])


# ----- GMF Model -----
Esempio n. 3
0
    num_books = r.book_id.max()
    num_users = r.user_id.max()
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(r[['user_id', 'book_id', 'rating']], reader)
    trainset, testset = train_test_split(data, test_size=0.2, random_state=8)
    algoStats = {}
    ######## MLP results ##########
    model2 = MLP.get_model(num_users,
                           num_books,
                           layers=[64, 32, 16, 8],
                           reg_layers=[0, 0, 0, 0])
    modelFile = "Pretrain/gb-10k_MLP_[64,32,16,8]_trainset.h5"
    model2.load_weights(modelFile)
    algoStats["MLP"] = rankInTopK(model2, testset, num_books, k=1000)
    ######## GMF results ##########
    model = GMF.get_model(num_users, num_books, 8)
    modelFile = "Pretrain/gb-10k_GMF_8_trainset.h5"
    model.load_weights(modelFile)
    algoStats["GMF"] = rankInTopK(model, testset, num_books, k=1000)
    ######## NeuMF results ##########
    model3 = NeuMF.get_model(num_users,
                             num_books,
                             mf_dim=8,
                             layers=[64, 32, 16, 8],
                             reg_layers=[0, 0, 0, 0],
                             reg_mf=0)
    modelFile = "Pretrain/gb-10k_NeuMF_8_[64,32,16,8]_trainset.h5"
    model3.load_weights(modelFile)
    algoStats["NeuMF"] = rankInTopK(model3, testset, num_books, k=1000)

    with open("NeuCFstats.json", 'w') as inp:
Esempio n. 4
0
dimensions = np.load(dimensions_file)
inputs, labels = data_management_yelp.training_data_generation(
    'input/training_data.npy', 'input/training_reviews.npy')
print(labels[0:100])

# pretrain MLP
MLP.train_mlp(num_predictive_factors=num_predictive_factors,
              batch_size=batch_size,
              epochs=num_pretrain_epochs,
              dimensions=dimensions,
              inputs=inputs,
              labels=labels)
# pretrain GMF
GMF.train_gmf(num_predictive_factors=num_predictive_factors,
              batch_size=batch_size,
              epochs=num_pretrain_epochs,
              dimensions=dimensions,
              inputs=inputs,
              labels=labels)

# check out the shared vision guide at https://keras.io/getting-started/functional-api-guide/

user_input = Input(shape=(1, ), name='user_input')
item_input = Input(shape=(1, ), name='item_input')
review_input = Input(shape=(100, ), name='review_input')

# ----- MLP Model -----
mlp = MLP.create_model(num_users=dimensions[0],
                       num_items=dimensions[1],
                       num_predictive_factors=num_predictive_factors,
                       pretrain=False)
mlp_output = mlp([user_input, item_input])
Esempio n. 5
0
 def train_GMF(self, outpath, data):
     print('pretraining MLP model ...')
     self.gmf = GMF.main(outpath, data=data)
Esempio n. 6
0
def fit(name_data='100k', batch_size=2048):
        #args = parse_args()
    args = Args()
    num_epochs = args.epochs
    #batch_size = args.batch_size
    mf_dim = args.num_factors
    layers = eval(args.layers)
    reg_mf = eval(args.reg_mf)
    reg_layers = eval(args.reg_layers)
    num_negatives = args.num_neg
    learning_rate = args.lr
    learner = args.learner
    verbose = args.verbose
    mf_pretrain = args.mf_pretrain
    mlp_pretrain = args.mlp_pretrain
    num_tasks = args.num_tasks

    # Override args
    args.dataset = name_data
    args.batch_size = batch_size
            
    topK = 10
    evaluation_threads = 1#mp.cpu_count()
    print("NeuMF arguments: %s " %(args))
    model_out_file = 'Pretrain/%s_MNeuMF_%d_%s_%d.h5' %(args.dataset, mf_dim, args.layers, time())
    result_out_file = 'outputs/%s_MNeuMF_%d_%s_%d.csv' %(args.dataset, mf_dim, args.layers, time())

     # Loading data
    t1 = time()
    if args.dataset=='1m':
        num_users = 6040
        num_items = 3706
    elif args.dataset=='100k':
        num_users = 671
        num_items = 9125
    else:
        raise Exception('wrong dataset size!!!')   

    dataset = Dataset(args.path, args.dataset)
    train, testRatings, testNegatives, genreList = dataset.train_ratings, dataset.test_ratings, dataset.negatives, dataset.genre

    print("Load data done [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d" 
        %(time()-t1, num_users, num_items, train.shape[0], testRatings.shape[0]))
    
        # Build model
    model = get_model(num_users, num_items, num_tasks, mf_dim, layers, reg_layers, reg_mf)
    if learner.lower() == "adagrad": 
        model.compile(optimizer=Adagrad(lr=learning_rate), loss='binary_crossentropy')
    elif learner.lower() == "rmsprop":
        model.compile(optimizer=RMSprop(lr=learning_rate), loss='binary_crossentropy')
    elif learner.lower() == "adam":
        model.compile(optimizer=Adam(lr=learning_rate), loss='binary_crossentropy')
    else:
        model.compile(optimizer=SGD(lr=learning_rate), loss='binary_crossentropy')
    
    # Load pretrain model
    if mf_pretrain != '' and mlp_pretrain != '':
        gmf_model = GMF.get_model(num_users,num_items,mf_dim)
        gmf_model.load_weights(mf_pretrain)
        mlp_model = MLP.get_model(num_users,num_items, layers, reg_layers)
        mlp_model.load_weights(mlp_pretrain)
        model = load_pretrain_model(model, gmf_model, mlp_model, len(layers))
        print("Load pretrained GMF (%s) and MLP (%s) models done. " %(mf_pretrain, mlp_pretrain))
        
    # Init performance
    (hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, genreList, topK, evaluation_threads)
    hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
    print('Init: HR = %.4f, NDCG = %.4f' % (hr, ndcg))
    best_hr, best_ndcg, best_iter = hr, ndcg, -1
    if args.out > 0:
        model.save_weights(model_out_file, overwrite=True) 

    # save Hit ratio and ndcg, loss
    output = pd.DataFrame(columns=['hr', 'ndcg'])
    output.loc[0] = [hr, ndcg]

    # Generate training instances
    user_input, item_input, labels = get_train_instances(train, num_negatives) 
    genre_input = item_to_onehot_genre(item_input, genreList)


    # Training model
    for epoch in range(int(num_epochs)):
        t1 = time()
        
        # Training
        hist = model.fit([np.array(user_input), np.array(item_input), genre_input], #input
                         np.array(labels), # labels 
                         batch_size=batch_size, epochs=1, verbose=verbose, shuffle=True)
        t2 = time()
        
        # Evaluation
        if epoch %1 == 0:
            (hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, genreList, topK, evaluation_threads)
            hr, ndcg, loss = np.array(hits).mean(), np.array(ndcgs).mean(), hist.history['loss'][0]
            print('Iteration %d [%.1f s]: HR = %.4f, NDCG = %.4f, loss = %.4f [%.1f s]' 
                  % (epoch,  t2-t1, hr, ndcg, loss, time()-t2))
            output.loc[epoch+1] = [hr, ndcg]
            if hr > best_hr:
                best_hr, best_ndcg, best_iter = hr, ndcg, epoch
                if args.out > 0:
                    model.save_weights(model_out_file, overwrite=True)
    
    print("End. Best Iteration %d:  HR = %.4f, NDCG = %.4f. " %(best_iter, best_hr, best_ndcg))
    if args.out > 0:
        print("The best NeuMF model is saved to %s" %(model_out_file))

    output.to_csv(result_out_file)
    return([best_iter, best_hr, best_ndcg])
Esempio n. 7
0
if __name__=="__main__":
    data_file = os.path.join(args.data_path, args.data_set)
    train_data, test_data, user_num, item_num, train_mat = data_util.load_all(data_file)

    train_dataset = data_util.NCFData(train_data, item_num, train_mat, args.num_ng, True)
    test_dataset = data_util.NCFData(test_data, item_num, train_mat, 0, False)
    train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4)
    test_loader = data.DataLoader(test_dataset, batch_size=args.test_num_ng + 1, shuffle=False, num_workers=0)

    GMF_model_path = os.path.join(args.model_path, 'GMF.pth')
    MLP_model_path = os.path.join(args.model_path, 'MLP.pth')
    if args.use_pretrained:
        assert os.path.exists(GMF_model_path), 'lack of GMF model'
        assert os.path.exists(MLP_model_path), 'lack of MLP model'
        GMF_model = GMF.GMF(user_num, item_num, args.embedding_dim_GMF, args.dropout)
        GMF_model.load_state_dict(torch.load(GMF_model_path))
        
        MLP_model = MLP.MLP(user_num, item_num, args.embedding_dim_MLP, args.hidden_layer_MLP, args.dropout)
        MLP_model.load_state_dict(torch.load(MLP_model_path))
    else:
        GMF_model = None
        MLP_model = None
    model = NeuMF(user_num, item_num, args.embedding_dim_GMF, args.embedding_dim_MLP,
                  args.hidden_layer_MLP, args.dropout, GMF_model, MLP_model)
    model.to(device=args.device)
    loss_function = nn.BCEWithLogitsLoss()

    if args.use_pretrained:
        optimizer = optim.SGD(model.parameters(), lr=args.lr)
    else:
       %(time()-t1, num_users, num_items, train.nnz, len(testRatings))))
 
 # Build model
 model = get_model(num_users, num_items, num_factors, layers, reg_layers, reg_mf)
 if learner.lower() == "adagrad": 
     model.compile(optimizer=Adagrad(lr=learning_rate), loss='binary_crossentropy')
 elif learner.lower() == "rmsprop":
     model.compile(optimizer=RMSprop(lr=learning_rate), loss='binary_crossentropy')
 elif learner.lower() == "adam":
     model.compile(optimizer=Adam(lr=learning_rate), loss='binary_crossentropy')
 else:
     model.compile(optimizer=SGD(lr=learning_rate), loss='binary_crossentropy')
 
 # Load pretrain model
 if mf_pretrain != '' and mlp_pretrain != '':
     gmf_model = GMF.get_model(num_users, num_items, num_factors)
     gmf_model.load_weights(mf_pretrain)
     mlp_model = MLP.get_model(num_users, num_items, num_factors, layers, reg_layers)
     mlp_model.load_weights(mlp_pretrain)
     model = load_pretrain_model(model, gmf_model, mlp_model, len(layers))
     print(("Load pretrained GMF (%s) and MLP (%s) models done. " %(mf_pretrain, mlp_pretrain)))
     
 # Init performance
 (hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, topK, evaluation_threads)
 hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
 print(('Init: HR = %.4f, NDCG = %.4f' % (hr, ndcg)))
 best_hr, best_ndcg, best_iter = hr, ndcg, -1
 if args.out > 0:
     model.save_weights(model_out_file, overwrite=True) 
     
 # Training model