Esempio n. 1
0
def run(args):
    print 1

    data_loader = DataHelper(args, args.data_path, args.save_dir)
    data_loader.choose_data()

    data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64})
    valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64})
    args.n_items = len(data['ItemId'].unique())

    args.dropout_p_hidden = 1.0 if args.is_training == 0 else args.dropout

    print(args.n_epochs)

    if not os.path.exists(args.checkpoint_dir):
        os.mkdir(args.checkpoint_dir)
    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    with tf.Session(config=gpu_config) as sess:
        gru = train.GRU4Rec(sess, args)
        if args.is_training==1:
            gru.fit(data)

        else:
            res = evaluation.evaluate_sessions_batch(gru, data, valid)
            print('Recall@20: {}\tMRR@20: {}'.format(res[0], res[1]))
Esempio n. 2
0
    def main():
        command_line = parseArgs()
        data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64})
        valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64})
        # 参数
        args = Args()
        args.n_items = len(data['ItemId'].unique())
        args.layers = command_line.layer
        args.rnn_size = command_line.size
        args.n_epochs = command_line.epoch
        args.learning_rate = command_line.lr
        args.is_training = command_line.train
        args.test_model = command_line.test
        args.hidden_act = command_line.hidden_act
        args.final_act = command_line.final_act
        args.loss = command_line.loss
        args.dropout_p_hidden = 1.0 if args.is_training == 0 else command_line.dropout
        print(args.dropout_p_hidden)

        # print data stat
        data_stat = data_utils.DataUtils(data, valid, args)
        data_stat.session_stat()

        if not os.path.exists(args.checkpoint_dir):
            os.mkdir(args.checkpoint_dir)
        gpu_config = tf.ConfigProto()
        gpu_config.gpu_options.allow_growth = True
        with tf.Session(config=gpu_config) as sess:
            gru = model.GRU4Rec(sess, args)
            if args.is_training:
                gru.fit(data)
            else:
                res = evaluation.evaluate_sessions_batch(gru, data, valid)
                print('Recall@20: {}\tMRR@20: {}'.format(res[0], res[1]))
Esempio n. 3
0
    gru.fit(data)
    
    res = evaluation.evaluate_sessions_batch(gru, valid, None)
    print('Recall@20: {}'.format(res[0]))
    print('MRR@20: {}'.format(res[1]))
    '''

    #Reproducing results from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847)

    print('Training GRU4Rec with 256 hidden units')

    #gru = gru4rec.GRU4Rec(loss='bpr', final_act='linear', hidden_act='tanh', layers=[100], batch_size=32, dropout_p_hidden=0.0, learning_rate=0.2, n_sample=0, sample_alpha=0, time_sort=True, n_epochs=10) # useful result
    gru = gru4rec.GRU4Rec(loss='bpr',
                          final_act='linear',
                          hidden_act='tanh',
                          layers=[256],
                          batch_size=32,
                          dropout_p_hidden=0.0,
                          learning_rate=0.2,
                          n_sample=0,
                          sample_alpha=0,
                          time_sort=True,
                          n_epochs=10,
                          embedding=256)
    #gru = gru4rec.GRU4Rec(loss='bpr-max-0.5', final_act='linear', hidden_act='tanh', layers=[256], batch_size=200, dropout_p_hidden=0.0, learning_rate=0.2, momentum=0.5, n_sample=2048, sample_alpha=0, time_sort=True)
    gru.fit(data)
    gru.save_ItemEmbedding(data)
    res = evaluation.evaluate_sessions_batch(gru, valid, None)
    #print('Recall@20: {}'.format(res[0]))
    #print('MRR@20: {}'.format(res[1]))
Esempio n. 4
0

if __name__ == '__main__':
    command_line = parseArgs()
    data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64})
    valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64})
    args = Args()
    args.n_items = len(data['ItemId'].unique())
    args.layers = command_line.layer
    args.rnn_size = command_line.size
    args.n_epochs = command_line.epoch
    args.learning_rate = command_line.lr
    args.is_training = command_line.train
    args.test_model = command_line.test
    args.hidden_act = command_line.hidden_act
    args.final_act = command_line.final_act
    args.loss = command_line.loss
    args.dropout_p_hidden = 1.0 if args.is_training == 0 else command_line.dropout
    print(args.dropout_p_hidden)
    if not os.path.exists(args.checkpoint_dir):
        os.mkdir(args.checkpoint_dir)
    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    with tf.Session(config=gpu_config) as sess:
        gru = model.GRU4Rec(sess, args)
        if args.is_training:
            gru.fit(data)
        else:
            res = evaluation.evaluate_sessions_batch(gru, data, valid)
            print('Recall@20: {}\tMRR@20: {}'.format(res[0], res[1]))
Esempio n. 5
0
# -*- coding: utf-8 -*-
"""
Created on Wed Apr  6 18:14:46 2016

@author: Balázs Hidasi
"""

import sys
sys.path.append('../..')

import numpy as np
import pandas as pd
import gru4rec
import evaluation

PATH_TO_TRAIN = '/path/to/rsc15_train_full.txt'
PATH_TO_TEST = '/path/to/rsc15_test.txt'

if __name__ == '__main__':
    data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId':np.int64})
    valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId':np.int64})
    
    print('Training GRU4Rec with 100 hidden units')    
    
    gru = gru4rec.GRU4Rec(layers=[100], loss='top1', batch_size=50, dropout_p_hidden=0.5, learning_rate=0.01, momentum=0.0)
    gru.fit(data)
    
    res = evaluation.evaluate_sessions_batch(gru, valid, None)
    print('Recall@20: {}'.format(res[0]))
    print('MRR@20: {}'.format(res[1]))
    
Esempio n. 6
0
     os.mkdir(ARGS.checkpoint_dir)
 GPU_CONFIG = tf.ConfigProto()
 #GPU_CONFIG.gpu_options.allow_growth = True
 with tf.Session(config=GPU_CONFIG) as sess:
     GRU = model.GRU4Rec(sess, ARGS)
     START_TIME = time.time()
     if ARGS.is_training:
         OUTPUT = open('train_results.txt', 'w')
         GRU.fit(DATA)
         OUTPUT.close()
         TRAINING_TIME = time.time()
         print("Training time =", TRAINING_TIME - START_TIME, "seconds")
     else:
         TEST_OUTPUT = open('test_results.txt', 'w')
         print("\n\nEvaluating Model....\n", file=TEST_OUTPUT)
         RES = evaluation.evaluate_sessions_batch(GRU, DATA, DATA)
         print('Recall@1: {}'.format(RES[0][0]), file=TEST_OUTPUT)
         print('MRR@1: {}'.format(RES[1][0]), file=TEST_OUTPUT)
         print('Recall@2: {}'.format(RES[0][1]), file=TEST_OUTPUT)
         print('MRR@2: {}'.format(RES[1][1]), file=TEST_OUTPUT)
         print('Recall@5: {}'.format(RES[0][2]), file=TEST_OUTPUT)
         print('MRR@5: {}'.format(RES[1][2]), file=TEST_OUTPUT)
         #RES = evaluation.evaluate_sessions_batch(GRU, DATA, DATA, 10)
         print('Recall@10: {}'.format(RES[0][3]), file=TEST_OUTPUT)
         print('MRR@10: {}'.format(RES[1][3]), file=TEST_OUTPUT)
         #RES = evaluation.evaluate_sessions_batch(GRU, DATA, DATA, 20)
         print('Recall@20: {}'.format(RES[0][4]), file=TEST_OUTPUT)
         print('MRR@20: {}'.format(RES[1][4]), file=TEST_OUTPUT)
         #RES = evaluation.evaluate_sessions_batch(GRU, DATA, DATA, 50)
         print('Recall@50: {}'.format(RES[0][5]), file=TEST_OUTPUT)
         print('MRR@50: {}'.format(RES[1][5]), file=TEST_OUTPUT)
Esempio n. 7
0
    args = Args()
    args.n_items = len(data['product_id'].unique())
    args.layers = command_line.layer
    args.rnn_size = command_line.size
    args.n_epochs = command_line.epoch
    args.learning_rate = command_line.lr
    args.is_training = command_line.train
    args.test_model = command_line.test
    args.hidden_act = command_line.hidden_act
    args.final_act = command_line.final_act
    args.loss = command_line.loss
    args.dropout_p_hidden = 1.0 if args.is_training == 0 else command_line.dropout
    print(args.dropout_p_hidden)
    print(args.loss)
    if not os.path.exists(args.checkpoint_dir):
        os.mkdir(args.checkpoint_dir)
    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    with tf.Session(config=gpu_config) as sess:
        gru = model.GRU4Rec(sess, args)
        if args.is_training:
            gru.fit(data,args.loss)
        else:
            print("Testing")
            rec , mrr , topN = evaluation.evaluate_sessions_batch(gru, data, valid,session_key='cookie_id', item_key='product_id', time_key='timestamp')
            print('Recall@20: {}\tMRR@20: {}'.format(rec, mrr))
            print(topN)
            topN = pd.DataFrame(topN)
            """store Top20 products for every user_session in a csv file"""
            topN.to_csv('/home/nick/Desktop/thesis/datasets/pharmacy-data/api-data/rnn-data/top1-top20_2.csv')
Esempio n. 8
0
            feed_dict[self.state[i]] = self.predict_state[i]
        preds, self.predict_state = self.sess.run(fetches, feed_dict)
        preds = np.asarray(preds).T
        return pd.DataFrame(data=preds, index=itemidmap.index)


if __name__ == '__main__':
    defaults = Defaults()

    data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64})
    valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64})

    defaults.n_items = len(data['ItemId'].unique())
    defaults.dropout_p_hidden = 1.0 if defaults.is_training == 0 else 0.5

    if not os.path.exists(defaults.checkpoint_dir):
        os.mkdir(defaults.checkpoint_dir)

    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True

    with tf.Session(config=gpu_config) as session:
        predictor = Session4RecPredictor(defaults, session)

        if defaults.is_training:
            print('Start session4rec training...')
            predictor.train(data)
        else:
            res = evaluation.evaluate_sessions_batch(predictor, data, valid)
            print('Recall@20: {}\tMRR@20: {}'.format(res[0], res[1]))
Esempio n. 9
0
    gru = MN4rec.GRU4Rec(loss='bpr',
                         final_act='linear',
                         hidden_act=args.activation,
                         layers=[256],
                         batch_size=args.batch_size,
                         embedding=length,
                         KBembedding=KBlength,
                         dropout_p_hidden=args.dropout,
                         n_sample=args.num_neg,
                         learning_rate=args.lr,
                         momentum=0.1,
                         sample_alpha=0,
                         time_sort=True,
                         n_epochs=args.epochs,
                         train_random_order=True,
                         out_dim=args.out_dim,
                         MN_nfactors=r_matrix.shape[0],
                         MN_dims=r_matrix.shape[1])
    gru.fit(data, ItemEmbedding, KBItemEmbedding, r_matrix)
    print("Training time is")
    print(start_time - time.time())
    ItemFile = 'item_embedding'
    gru.save_ItemEmbedding(data, ItemFile)  #'item.embedding')

    UserFile = 'user.embedding'
    evaluation.evaluate_sessions_batch(
        gru, valid, None, SaveUserFile=UserFile)  #'user.embedding')
    end_time = time.time()
    print start_time, end_time
    print(start_time - end_time)
Esempio n. 10
0
                       momentum=0.0,
                       time_sort=False)
 gru.fit(data)
 #cf=baselines.ItemKNN()
 #cf.fit(data)
 #mf=baselines.BPR()
 #mf.fit(data)
 #acc_gru=[]
 #acc_cf=[]
 #acc_mf=[]
 mrr_gru = []
 #mrr_cf=[]
 #mrr_mf=[]
 for i in top:
     res_gru = evaluation.evaluate_sessions_batch(gru,
                                                  valid,
                                                  None,
                                                  cut_off=i + 1)
     #res_cf = evaluation.evaluate_sessions(cf, valid, data, None,cut_off=i+1)
     #   res_mf = evaluation.evaluate_sessions(mf, valid, data, None,cut_off=i+1)
     mrr_gru.append(res_gru[1])
 print mrr_gru
 #    mrr_gru.append(res_gru[1])
 #    acc_cf.append(res_cf[0])
 #    mrr_cf.append(res_cf[1])
 #    acc_mf.append(res_mf[0])
 #    mrr_mf.append(res_mf[1])
 #bar_width=0.6
 #X=top
 #Y_gru=acc_gru
 #Y_cf=acc_cf
 #Y_mf=acc_mf
Esempio n. 11
0
    args.latent_size = command_line.latent_size
    args.optimizer = command_line.optimizer
    args.dropout_p_hidden = 1.0 if args.is_training == 0 else command_line.dropout
    print(args.dropout_p_hidden)
    if not os.path.exists(args.checkpoint_dir):
        os.mkdir(args.checkpoint_dir)
    gpu_config = tf.ConfigProto()
    #gpu_config=tf.compat.v1.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    with tf.Session(config=gpu_config) as sess:
        gru = model.GRU4Rec(sess, args)
        if args.is_training:
            gru.fit(data)
        else:
            print(args.test_model)
            res = evaluation.evaluate_sessions_batch(
                gru, data, valid, cut_off=5, batch_size=args.batch_size)
            print('Recall@5: {}'.format(res[0]))
            print('MRR@5: {}'.format(res[1]))
            print('NDCG@5: {}'.format(res[2]))

            res = evaluation.evaluate_sessions_batch(
                gru, data, valid, cut_off=10, batch_size=args.batch_size)
            print('Recall@10: {}'.format(res[0]))
            print('MRR@10: {}'.format(res[1]))
            print('NDCG@10: {}'.format(res[2]))

            res = evaluation.evaluate_sessions_batch(
                gru, data, valid, cut_off=20, batch_size=args.batch_size)
            print('Recall@20: {}'.format(res[0]))
            print('MRR@20: {}'.format(res[1]))
            print('NDCG@20: {}'.format(res[2]))
Esempio n. 12
0
if __name__ == '__main__':
    command_line = parseArgs()
    train_data = pd.read_csv(train_data, index_col=0)
    test_data = pd.read_csv(test_data, index_col=0)

    args = Args()
    args.n_items = len(train_data['ItemId'].unique())
    args.layers = command_line.layer
    args.rnn_size = command_line.size
    args.n_epochs = command_line.epoch
    args.learning_rate = command_line.lr
    args.is_training = command_line.train
    args.test_model = command_line.test
    args.hidden_act = command_line.hidden_act
    args.final_act = command_line.final_act
    args.loss = command_line.loss
    args.dropout_p_hidden = 1.0 if args.is_training == 0 else command_line.dropout

    if not os.path.exists(args.checkpoint_dir):
        os.mkdir(args.checkpoint_dir)
    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    with tf.Session(config=gpu_config) as sess:
        gru = model.GRU4Rec(sess, args)
        if args.is_training:
            gru.fit(train_data)
        else:
            res = evaluation.evaluate_sessions_batch(
                gru, train_data, test_data, batch_size=args.batch_size)
            print('Recall@20: {}\tMRR@20: {}'.format(res[0], res[1]))
Esempio n. 13
0
                          layers=[100],
                          adapt='adagrad',
                          n_epochs=10,
                          batch_size=32,
                          dropout_p_embed=0,
                          dropout_p_hidden=0,
                          learning_rate=0.2,
                          momentum=0.3,
                          n_sample=128,
                          sample_alpha=0,
                          bpreg=1,
                          constrained_embedding=False,
                          dwell_time=100)
    gru.fit(data)
    res = evaluation.evaluate_sessions_batch(gru,
                                             valid,
                                             cut_off=5,
                                             dwell_time=100)
    result1[0, 0] = res[0]
    result1[0, 1] = res[1]
    result1[0, 2] = res[2]
    print('bpr-max-sample45')
    print('HR@5: {}'.format(res[0]))
    print('MRR@5: {}'.format(res[1]))
    print('NDCG@5: {}'.format(res[2]))

    res = evaluation.evaluate_sessions_batch(gru,
                                             valid,
                                             cut_off=10,
                                             dwell_time=100)
    result1[1, 0] = res[0]
    result1[1, 1] = res[1]
Esempio n. 14
0
















 gru = model_RBP.GRU4Rec(sess, args)
        if args.is_training:
            gru.fit(data)
        else:
            #res = evaluation.evaluate_sessions_batch(gru, data, valid)
            #print('Recall@20: {}\tMRR@20: {}'.format(res[0], res[1]))            
            for c in [3,15,20]:
                res = evaluation.evaluate_sessions_batch(gru, data, valid, cut_off=c) 
                print('Recall@{}: {}\tMRR@{}: {}'.format(c, res[0], c, res[1])) 
            ### Export ratings
            # preds = res[2]
            # preds.to_csv(PATH_TO_PROCESSED_DATA + 'eikon_pred_test.csv', sep=',', index=False)
            #ids = res[3]
            #ids.to_csv(PATH_TO_PROCESSED_DATA + 'eikon_pred_test_ids.csv', sep=',', index=False)
Esempio n. 15
0

if __name__ == '__main__':
    command_line = parseArgs()
    train_data = pd.read_csv(train_data)
    test_data = pd.read_csv(test_data)

    args = Args()
    args.n_items = len(train_data['ItemId'].unique())
    args.layers = command_line.layer
    args.rnn_size = command_line.size
    args.n_epochs = command_line.epoch
    args.learning_rate = command_line.lr
    args.is_training = command_line.train
    args.test_model = command_line.test
    args.hidden_act = command_line.hidden_act
    args.final_act = command_line.final_act
    args.loss = command_line.loss
    args.dropout_p_hidden = 1.0 if args.is_training == 0 else command_line.dropout

    if not os.path.exists(args.checkpoint_dir):
        os.mkdir(args.checkpoint_dir)
    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    with tf.Session(config=gpu_config) as sess:
        gru = model.GRU4Rec(sess, args)
        if args.is_training:
            gru.fit(train_data)
        else:
            res = evaluation.evaluate_sessions_batch(gru, train_data, test_data)
            print('Recall@20: {}\tMRR@20: {}'.format(res[0], res[1]))
Esempio n. 16
0
                          final_act='softmax',
                          hidden_act='tanh',
                          layers=[100],
                          adapt='adagrad',
                          n_epochs=10,
                          batch_size=32,
                          dropout_p_embed=0,
                          dropout_p_hidden=0.3,
                          learning_rate=0.1,
                          momentum=0.7,
                          n_sample=2048,
                          sample_alpha=0,
                          bpreg=0,
                          constrained_embedding=False)
    gru.fit(data)
    res = evaluation.evaluate_sessions_batch(gru, valid, cut_off=5)
    print('cross-entropy-sample0')
    print('HR@5: {}'.format(res[0]))
    print('MRR@5: {}'.format(res[1]))
    print('MAP@5: {}'.format(res[2]))
    print('NDCG@5: {}'.format(res[3]))
    print('PRECISION@5: {}'.format(res[4]))
    print('F1-SCORE@5: {}'.format(res[5]))

    res = evaluation.evaluate_sessions_batch(gru, valid, cut_off=10)
    print('HR@10: {}'.format(res[0]))
    print('MRR@10: {}'.format(res[1]))
    print('MAP@10: {}'.format(res[2]))
    print('NDCG@10: {}'.format(res[3]))
    print('PRECISION@10: {}'.format(res[4]))
    print('F1-SCORE@10: {}'.format(res[5]))