def runGRU4Rec(self): import gru4rec session_key = "Sid" #"Aid" # Or Sid time_key = "TimeStamp" item_key = "QueryName" #"QueryName" data = self.dataManager.loadData([time_key,item_key,session_key,"Aid","Sid"],removeFirstK=5) train, test = self.dataManager.splitData(data,isRandom=False) print('Training GRU4Rec') batch_size = 200 for layers in [100,1000]: for loss_type in ['top1']: for momentum in [3,4,5]: for dropOut in [3,4,5]: try: print('Batch Size: ' + str(batch_size) + ' Dropout: ' + str(float(dropOut)/10.0) + ' Momentum: ' + str(float(momentum)/10.0)) gru = gru4rec.GRU4Rec(layers=[layers], loss=loss_type, batch_size=batch_size, dropout_p_hidden=float(dropOut)/10.0, learning_rate=0.05, momentum=float(momentum)/10.0 ,n_epochs=3,hidden_act = 'tanh', final_act='tanh' ,session_key=session_key, item_key=item_key, time_key=time_key) gru.fit(train) res = gru.evaluate_sessions_batch(test, cut_off=2, batch_size=batch_size, session_key=session_key, item_key=item_key, time_key=time_key) print('Recall : {}'.format(res[0])) except: print("Unexpected error")
def runGRU4RecForSpecificAid(self,numOfAids=10): import gru4rec session_key = "Sid" #"Aid" # Or Sid time_key = "TimeStamp" item_key = "QueryName" data = self.dataManager.loadData([time_key,item_key,"Aid","Sid"],removeFirstK=5,onlyFirstFile=True) result = data.groupby('Aid').apply( lambda group: (group.Sid.nunique()) ) result.sort_values(inplace=True,ascending=False) for selectedAid in result.keys()[2:(numOfAids+2)]: idx = self.dataManager.fields.index("Aid"); ret = self.dataManager.mcle.all_encoders_[idx].inverse_transform(selectedAid); print(str(ret)) aidData = data.loc[data['Aid'] == selectedAid] train, test = self.dataManager.splitData(aidData,isRandom=False) print('Training GRU4Rec') batch_size = 5 momentum = 0.4 dropOut = 0.5 print('Batch Size: ' + str(batch_size) + ' Dropout: ' + str(float(dropOut)) + ' Momentum: ' + str(float(momentum))) gru = gru4rec.GRU4Rec(layers=[1000], loss='top1', batch_size=batch_size, dropout_p_hidden=float(dropOut), learning_rate=0.05, momentum=float(momentum) ,n_epochs=10,hidden_act = 'tanh', final_act='tanh' ,session_key=session_key, item_key=item_key, time_key=time_key) gru.fit(train) test.is_copy = False test.sort_values([time_key,session_key], inplace=True) # Sort by time_key first and then by session_key specificSession = test[test[session_key] == test[session_key].values[0]] preds = gru.predict_next_batch(specificSession[session_key], specificSession[item_key], None, len(specificSession)) print('Correct: {}'.format(correct)) if(len(test)-batch_size-1 > 0): print('Accuracy: {}'.format(float(correct)/float(len(test)-batch_size-1)))
def learnFromExperiencedGRU(self): import gru4rec session_key = "Sid" #"Aid" # Or Sid time_key = "TimeStamp" item_key = "QueryName" #"QueryName" data = self.dataManager.loadData([time_key,item_key,session_key,"Aid","Sid"],removeFirstK=5) result = data.groupby('Aid').apply( lambda group: (group.Sid.nunique() <= 3) ) resultBelow = result[result == True] resultAbove = result[result == False] dataBelow = data[data.Aid.isin(resultBelow.index)] dataAbove = data[data.Aid.isin(resultAbove.index)] train, test = self.dataManager.splitData(dataAbove,isRandom=False) testBelow = dataBelow[:len(test)] print('Training GRU4Rec') batch_size = 200 for layers in [1000]: for loss_type in ['top1']: for momentum in [3,4,5]: for dropOut in [3]: try: print('Batch Size: ' + str(batch_size) + ' Dropout: ' + str(float(dropOut)/10.0) + ' Momentum: ' + str(float(momentum)/10.0)) gru = gru4rec.GRU4Rec(layers=[1000], loss=loss_type, batch_size=batch_size, dropout_p_hidden=float(dropOut)/10.0, learning_rate=0.05, momentum=float(momentum)/10.0 ,n_epochs=3,hidden_act = 'tanh', final_act='tanh' ,session_key=session_key, item_key=item_key, time_key=time_key) gru.fit(train) res = gru.evaluate_sessions_batch(test, cut_off=2, batch_size=batch_size, session_key=session_key, item_key=item_key, time_key=time_key) print('Above Test Recall : {}'.format(res[0])) res = gru.evaluate_sessions_batch(testBelow, cut_off=2, batch_size=batch_size, session_key=session_key, item_key=item_key, time_key=time_key) print('Below Recall : {}'.format(res[0])) except: print("Unexpected error")
valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64}) print(data.head()) print(valid.head()) # OUTDATED!!! # Reproducing results from the original paperr"Session-based Recommendations with Recurrent Neural Networks" on RSC15 (http://arxiv.org/abs/1511.06939) print('Training GRU4Rec with 100 hidden units') start = time.time() gru = gru4rec.GRU4Rec(loss='top1', final_act='tanh', hidden_act='tanh', layers=[100], batch_size=50, dropout_p_hidden=0.5, learning_rate=0.01, momentum=0.0, time_sort=False, session_key='SessionId', item_key='ItemId', time_key='Timestamp') gru.fit(data) res = evaluation.evaluate_gpu(gru, valid, session_key='SessionId', item_key='ItemId', time_key='Timestamp') print('Recall@20: {}'.format(res[0])) print('MRR@20: {}'.format(res[1])) print('-------------------------------------------------------') end = time.time()
PATH_TO_TRAIN = '../data/rsc15_train_full.txt' PATH_TO_TEST = '../data/rsc15_test.txt' if __name__ == '__main__': data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64}) valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64}) # State-of-the-art results on RSC15 from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847) # BPR-max, no embedding (R@20 = 0.7197, M@20 = 0.3157) gru = gru4rec.GRU4Rec(loss='bpr-max', final_act='elu-0.5', hidden_act='tanh', layers=[100], adapt='adagrad', n_epochs=10, batch_size=32, dropout_p_embed=0, dropout_p_hidden=0, learning_rate=0.2, momentum=0.1, n_sample=2048, sample_alpha=0, bpreg=0.5, constrained_embedding=True) gru.fit(data) res = evaluation.evaluate_gpu(gru, valid, output_path='predictions_GRU4REC.csv', mode='standard') print('Recall@20: {}'.format(res[0])) print('MRR@20: {}'.format(res[1]))
valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64}) #Reproducing results from "Session-based Recommendations with Recurrent Neural Networks" on RSC15 (http://arxiv.org/abs/1511.06939) #adapt : None, 'adagrad', 'rmsprop', 'adam', 'adadelta' logger.info( 'rnn_size:%3d batch_size:%s hidden_act:%s dropout_p_hidden:%s final_act:%s optimizer:%s learning_rate:%s', FLAGS.rnn_size, FLAGS.batch_size, FLAGS.hidden_act, FLAGS.dropout_p_hidden, FLAGS.final_act, FLAGS.optimizer, FLAGS.learning_rate) gru = gru4rec.GRU4Rec(loss=FLAGS.loss, final_act=FLAGS.final_act, hidden_act=FLAGS.hidden_act, layers=[FLAGS.rnn_size], batch_size=FLAGS.batch_size, dropout_p_hidden=FLAGS.dropout_p_hidden, learning_rate=FLAGS.learning_rate, momentum=0.0, time_sort=False, n_epochs=FLAGS.n_epochs, adapt=FLAGS.optimizer) gru.fit(data, valid) #Reproducing results from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847) #print('Training GRU4Rec with 100 hidden units') #gru = gru4rec.GRU4Rec(loss='bpr-max-0.5', final_act='linear', hidden_act='tanh', layers=[100], batch_size=32, dropout_p_hidden=0.0, learning_rate=0.2, momentum=0.5, n_sample=2048, sample_alpha=0, time_sort=True) #gru.fit(data) # #res = evaluation.evaluate_sessions_batch(gru, valid, None)
PATH_TO_TEST = '../report_labels_test_openoffice' if __name__ == '__main__': data = pd.read_csv(PATH_TO_TRAIN, sep='\t') valid = pd.read_csv(PATH_TO_TEST, sep='\t') #Reproducing results from "Session-based Recommendations with Recurrent Neural Networks" on RSC15 (http://arxiv.org/abs/1511.06939) #print('Training GRU4Rec with 100 hidden units') top = np.array([1, 5, 10, 15, 20]) #top=11 gru = gru4rec.GRU4Rec(n_epochs=10, loss='top1', final_act='tanh', hidden_act='relu', layers=[512], batch_size=32, dropout_p_hidden=0.5, learning_rate=0.01, momentum=0.0, time_sort=False) gru.fit(data) #cf=baselines.ItemKNN() #cf.fit(data) #mf=baselines.BPR() #mf.fit(data) #acc_gru=[] #acc_cf=[] #acc_mf=[] mrr_gru = [] #mrr_cf=[] #mrr_mf=[]
EPOCHS = 1 BATCH_SIZE = 50 if __name__ == '__main__': data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64}) valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64}) print('Training GRU4Rec with ' + str(LAYERS) + ' hidden units and ' + str(EPOCHS) + ' epochs') #Reproducing results from "Session-based Recommendations with Recurrent Neural Networks" on RSC15 (http://arxiv.org/abs/1511.06939) gru = gru4rec.GRU4Rec(loss='top1', final_act='tanh', hidden_act='tanh', layers=[LAYERS], batch_size=BATCH_SIZE, dropout_p_hidden=0.5, learning_rate=0.01, momentum=0.0, n_epochs=EPOCHS, time_sort=False) gru.fit(data) res = evaluation.evaluate_sessions_batch(gru, valid, None) print('Recall@20: {}'.format(res[0])) print('MRR@20: {}'.format(res[1])) #Reproducing results from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847) # gru = gru4rec.GRU4Rec(loss='bpr-max-0.5', # final_act='linear', # hidden_act='tanh', # layers=[LAYERS],
""" import sys sys.path.append('../..') import numpy as np import pandas as pd import gru4rec import evaluation PATH_TO_TRAIN = '/path/to/rsc15_train_full.txt' PATH_TO_TEST = '/path/to/rsc15_test.txt' if __name__ == '__main__': data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64}) valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64}) print('Training GRU4Rec with 100 hidden units') gru = gru4rec.GRU4Rec(layers=[100], loss='top1', batch_size=50, dropout_p_hidden=0.5, learning_rate=0.01, momentum=0.0) gru.fit(data) res = evaluation.evaluate_sessions_batch(gru, valid, None) print('Recall@20: {}'.format(res[0])) print('MRR@20: {}'.format(res[1]))
EPOCHS = 1 BATCH_SIZE = 50 if __name__ == '__main__': data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64}) valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64}) print('Training GRU4Rec with ' + str(LAYERS) + ' hidden units and ' + str(EPOCHS) + ' epochs') #Reproducing results from "Session-based Recommendations with Recurrent Neural Networks" on RSC15 (http://arxiv.org/abs/1511.06939) gru = gru4rec.GRU4Rec(loss='top1', final_act='tanh', hidden_act='tanh', layers=[LAYERS], batch_size=BATCH_SIZE, dropout_p_hidden=0.5, learning_rate=0.01, momentum=0.0, n_epochs=EPOCHS, time_sort=False) gru.fit(data) res = evaluation.evaluate_sessions_batch(gru, valid, None) print('Recall@20: {}'.format(res[0])) print('MRR@20: {}'.format(res[1])) #Reproducing results from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847) gru = gru4rec.GRU4Rec(loss='bpr-max-0.5', final_act='linear', hidden_act='tanh', layers=[LAYERS],
PATH_TO_TRAIN = 'reddit_train_full.txt' PATH_TO_TEST = 'reddit_test.txt' if __name__ == '__main__': data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64}) valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64}) #Reproducing results from "Session-based Recommendations with Recurrent Neural Networks" on RSC15 (http://arxiv.org/abs/1511.06939) for x in tqdm(range(10, 100)): print('Training GRU4Rec with ' + str(x) + ' hidden units') for y in range(1, 50): gru = gru4rec.GRU4Rec(loss='top1', final_act='tanh', hidden_act='tanh', layers=[x], batch_size=y, dropout_p_hidden=0.5, learning_rate=0.01, momentum=0.0, time_sort=False) gru.fit(data) print('---------------------') print() print('Training params') print('Layers: ' + str(x)) print('Batch size: ' + str(y)) print('Final act: tanh') print('Hidden act: tanh') print('Learning rate: 0.01') print('dropout p: 0.5') print('Time sort: False')
''' print('Training GRU4Rec with 100 hidden units') gru = gru4rec.GRU4Rec(loss='top1', final_act='tanh', hidden_act='tanh', layers=[100], batch_size=50, dropout_p_hidden=0.5, learning_rate=0.01, momentum=0.0, time_sort=False) gru.fit(data) res = evaluation.evaluate_sessions_batch(gru, valid, None) print('Recall@20: {}'.format(res[0])) print('MRR@20: {}'.format(res[1])) ''' #Reproducing results from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847) print('Training GRU4Rec with 100 hidden units') start_time = time.time() gru = gru4rec.GRU4Rec(loss='bpr', final_act='linear', hidden_act='tanh', layers=[256], batch_size=200, embedding = length, dropout_p_hidden=0.2, n_sample=10, learning_rate=0.001, momentum=0.1, sample_alpha=0, time_sort=True, n_epochs=10, train_random_order=True) gru.fit(data, ItemEmbedding) ItemFile = 'item_embedding' if len(sys.argv)>2: ItemFile = sys.argv[2] gru.save_ItemEmbedding(data, ItemFile)#'item.embedding') UserFile = 'user.embedding' if len(sys.argv)>3: UserFile = sys.argv[3] evaluation.evaluate_sessions_batch(gru, valid, None, SaveUserFile = UserFile)#'user.embedding') end_time = time.time() print start_time, end_time print (start_time - end_time) #print('Recall@20: {}'.format(res[0])) #print('MRR@20: {}'.format(res[1]))
PATH_TO_TRAIN = '' PATH_TO_TEST = '' if __name__ == '__main__': data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64}) valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64}) ##cross-entropy gru = gru4rec.GRU4Rec(loss='cross-entropy', final_act='softmax', hidden_act='tanh', layers=[100], adapt='adagrad', n_epochs=10, batch_size=32, dropout_p_embed=0, dropout_p_hidden=0.3, learning_rate=0.1, momentum=0.7, n_sample=2048, sample_alpha=0, bpreg=0, constrained_embedding=False) gru.fit(data) res = evaluation.evaluate_sessions_batch(gru, valid, cut_off=5) print('cross-entropy-sample0') print('HR@5: {}'.format(res[0])) print('MRR@5: {}'.format(res[1])) print('MAP@5: {}'.format(res[2])) print('NDCG@5: {}'.format(res[3])) print('PRECISION@5: {}'.format(res[4]))