def test(config_path): # Load the parameters param_dict, rep_param_dict = load_params(config_path) # load data TEST_DIR01 = '{}/MQ2007/S1/'.format(param_dict["data_base_path"]) TEST_DIR02 = '{}/MQ2007/S2/'.format(param_dict["data_base_path"]) TEST_DIR03 = '{}/MQ2007/S3/'.format(param_dict["data_base_path"]) TEST_DIR04 = '{}/MQ2007/S4/'.format(param_dict["data_base_path"]) TEST_DIR05 = '{}/MQ2007/S5/'.format(param_dict["data_base_path"]) test_files01 = glob.glob("{}/testdata0.pkl".format(TEST_DIR01)) test_files02 = glob.glob("{}/testdata0.pkl".format(TEST_DIR02)) test_files03 = glob.glob("{}/testdata0.pkl".format(TEST_DIR03)) test_files04 = glob.glob("{}/testdata0.pkl".format(TEST_DIR04)) test_files05 = glob.glob("{}/testdata0.pkl".format(TEST_DIR05)) fold = param_dict["fold"] model_base_path = param_dict['model_base_path'] model_name_str = param_dict['model_name_str'] if fold == 1: test_files = test_files05[0] # a path list ['/...'] only take the str rel_path = '{}/{}/tmp/test/S5.qrels'.format(model_base_path, model_name_str) elif fold == 2: test_files = test_files01[0] rel_path = '{}/{}/tmp/test/S1.qrels'.format(model_base_path, model_name_str) elif fold == 3: test_files = test_files02[0] rel_path = '{}/{}/tmp/test/S2.qrels'.format(model_base_path, model_name_str) elif fold == 4: test_files = test_files03[0] rel_path = '{}/{}/tmp/test/S3.qrels'.format(model_base_path, model_name_str) elif fold == 5: test_files = test_files04[0] rel_path = '{}/{}/tmp/test/S4.qrels'.format(model_base_path, model_name_str) else: raise ValueError("wrong fold num {}".format(fold)) test_data = load_dataset(test_files) q_len = param_dict['q_len'] d_len = param_dict['d_len'] emb_size = param_dict['emb_size'] num_heads = param_dict['num_heads'] kernel_size = rep_param_dict['kernel_size'] filt_size = rep_param_dict['filt_size'] vocab_size = param_dict['vocab_size'] output_dim = rep_param_dict['output_dim'] hidden_size = param_dict['hidden_size'] batch_size = param_dict['batch_size'] preemb = param_dict['preemb'] emb_path = param_dict['emb_path'] hinge_margin = param_dict['hinge_margin'] model = Attention(emb_size=emb_size, query_length=q_len, doc_length=d_len, num_heads=num_heads, kernel_size=kernel_size, filter_size=filt_size, vocab_size=vocab_size, dropout=0.0, qrep_dim=output_dim, hidden_size=hidden_size, batch_size=batch_size, preemb=preemb, emb_path=emb_path).cuda() # Test # load model from file model_file = '{}/{}/saves/model_file'.format(model_base_path, model_name_str) model.load_state_dict(torch.load(model_file)) print("loaded model, and perform test now") MAP, NDCGs = evaluate(config_path, model, test_data, rel_path, mode="test") print(MAP, NDCGs)
def train(config_path, resume=True): # Load the parameters param_dict, rep_param_dict = load_params(config_path) # use cuda flag use_cuda = True """ the tranining directory """ # load data TRAIN_DIR01 = "{}/MQ2007/S1/".format(param_dict["data_base_path"]) TRAIN_DIR02 = "{}/MQ2007/S2/".format(param_dict["data_base_path"]) TRAIN_DIR03 = "{}/MQ2007/S3/".format(param_dict["data_base_path"]) TRAIN_DIR04 = "{}/MQ2007/S4/".format(param_dict["data_base_path"]) TRAIN_DIR05 = "{}/MQ2007/S5/".format(param_dict["data_base_path"]) TEST_DIR01 = '{}/MQ2007/S1/'.format(param_dict["data_base_path"]) TEST_DIR02 = '{}/MQ2007/S2/'.format(param_dict["data_base_path"]) TEST_DIR03 = '{}/MQ2007/S3/'.format(param_dict["data_base_path"]) TEST_DIR04 = '{}/MQ2007/S4/'.format(param_dict["data_base_path"]) TEST_DIR05 = '{}/MQ2007/S5/'.format(param_dict["data_base_path"]) train_files01 = glob.glob("{}/data0.pkl".format(TRAIN_DIR01)) train_files02 = glob.glob("{}/data0.pkl".format(TRAIN_DIR02)) train_files03 = glob.glob("{}/data0.pkl".format(TRAIN_DIR03)) train_files04 = glob.glob("{}/data0.pkl".format(TRAIN_DIR04)) train_files05 = glob.glob("{}/data0.pkl".format(TRAIN_DIR05)) test_files01 = glob.glob("{}/testdata0.pkl".format(TEST_DIR01)) test_files02 = glob.glob("{}/testdata0.pkl".format(TEST_DIR02)) test_files03 = glob.glob("{}/testdata0.pkl".format(TEST_DIR03)) test_files04 = glob.glob("{}/testdata0.pkl".format(TEST_DIR04)) test_files05 = glob.glob("{}/testdata0.pkl".format(TEST_DIR05)) fold = param_dict["fold"] model_base_path = param_dict['model_base_path'] model_name_str = param_dict['model_name_str'] q_len = param_dict["q_len"] d_len = param_dict["d_len"] if fold == 1: train_files = train_files01 + train_files02 + train_files03 test_files = test_files04[0] # a path list ['/...'] only take the str rel_path = '{}/{}/tmp/test/S4.qrels'.format(model_base_path, model_name_str) elif fold == 2: train_files = train_files02 + train_files03 + train_files04 test_files = test_files05[0] rel_path = '{}/{}/tmp/test/S5.qrels'.format(model_base_path, model_name_str) elif fold == 3: train_files = train_files03 + train_files04 + train_files05 test_files = test_files01[0] rel_path = '{}/{}/tmp/test/S1.qrels'.format(model_base_path, model_name_str) elif fold == 4: train_files = train_files04 + train_files05 + train_files01 test_files = test_files02[0] rel_path = '{}/{}/tmp/test/S2.qrels'.format(model_base_path, model_name_str) elif fold == 5: train_files = train_files05 + train_files01 + train_files02 test_files = test_files03[0] rel_path = '{}/{}/tmp/test/S3.qrels'.format(model_base_path, model_name_str) else: raise ValueError("wrong fold num {}".format(fold)) """ Build the model """ emb_size = param_dict['emb_size'] num_heads = param_dict['num_heads'] kernel_size = rep_param_dict['kernel_size'] filt_size = rep_param_dict['filt_size'] vocab_size = param_dict['vocab_size'] output_dim = rep_param_dict['output_dim'] hidden_size = param_dict['hidden_size'] batch_size = param_dict['batch_size'] preemb = param_dict['preemb'] emb_path = param_dict['emb_path'] hinge_margin = param_dict['hinge_margin'] model = Attention(emb_size=emb_size, query_length=q_len, doc_length=d_len, num_heads=num_heads, kernel_size=kernel_size, filter_size=filt_size, vocab_size=vocab_size, dropout=0.0, qrep_dim=output_dim, hidden_size=hidden_size, batch_size=batch_size, preemb=preemb, emb_path=emb_path) if use_cuda: model.cuda() # optimizer optimizer = optim.Adam(model.parameters(), lr=param_dict['learning_rate'], betas=(param_dict['beta1'], param_dict['beta2']), weight_decay=param_dict['alpha']) # loss func loss = nn.MarginRankingLoss(margin=hinge_margin, size_average=True) # experiment print("Experiment") if resume == False: f_log = open( '{}/{}/logs/training_log.txt'.format(model_base_path, model_name_str), 'w+', 1) valid_log = open( '{}/{}/logs/valid_log.txt'.format(model_base_path, model_name_str), 'w+', 1) else: f_log = open( '{}/{}/logs/training_log.txt'.format(model_base_path, model_name_str), 'a+', 1) valid_log = open( '{}/{}/logs/valid_log.txt'.format(model_base_path, model_name_str), 'a+', 1) # model_file model_file = '{}/{}/saves/model_file'.format(model_base_path, model_name_str) """ TRAINING """ # define the parameters n_epoch = param_dict['n_epoch'] # init best validation MAP value best_MAP = 0.0 best_NDCG1 = 0.0 batch_count_tr = 0 # restore saved parameter if resume_training is true if resume == True: model_file = '{}/{}/saves/model_file'.format(model_base_path, model_name_str) model.load_state_dict(torch.load(model_file)) with open( '{}/{}/saves/best_MAP.pkl'.format(model_base_path, model_name_str), 'rb') as f_MAP: best_MAP = pickle.load(f_MAP) print("loaded model, and resume training now") for epoch in range(1, n_epoch + 1): '''load_data''' for f in train_files: data = load_dataset(f) print("loaded {}".format(f)) '''prepare_data''' [Q, D_pos, D_neg, L] = pair_data_generator(data, q_len) valid_data = load_dataset(test_files) ''' shuffle data''' train_data = list_shuffle(Q, D_pos, D_neg, L) '''training func''' num_batch = len(train_data[0]) // batch_size for batch_count in range(num_batch): Q = train_data[0][batch_size * batch_count:batch_size * (batch_count + 1)] D_pos = train_data[1][batch_size * batch_count:batch_size * (batch_count + 1)] D_neg = train_data[2][batch_size * batch_count:batch_size * (batch_count + 1)] L = train_data[3][batch_size * batch_count:batch_size * (batch_count + 1)] if use_cuda: Q = Variable(torch.LongTensor( pad_batch_list(Q, max_len=q_len, padding_id=0)), requires_grad=False).cuda() D_pos = Variable(torch.LongTensor( pad_batch_list(D_pos, max_len=d_len, padding_id=0)), requires_grad=False).cuda() D_neg = Variable(torch.LongTensor( pad_batch_list(D_neg, max_len=d_len, padding_id=0)), requires_grad=False).cuda() L = Variable(torch.FloatTensor(L), requires_grad=False).cuda() else: Q = Variable(torch.LongTensor( pad_batch_list(Q, max_len=q_len, padding_id=0)), requires_grad=False) D_pos = Variable(torch.LongTensor( pad_batch_list(D_pos, max_len=d_len, padding_id=0)), requires_grad=False) D_neg = Variable(torch.LongTensor( pad_batch_list(D_neg, max_len=d_len, padding_id=0)), requires_grad=False) L = Variable(torch.FloatTensor(L), requires_grad=False) # run on this batch optimizer.zero_grad() t1 = time.time() q_mask, d_pos_mask, d_neg_mask = model.generate_mask( Q, D_pos, D_neg) """ need to do the modification i the model.py """ S_pos, S_neg = model(Q, D_pos, D_neg, q_mask, d_pos_mask, d_neg_mask) Loss = hinge_loss(S_pos, S_neg, 1.0) Loss.backward() optimizer.step() t2 = time.time() batch_count_tr += 1 print("epoch {} batch {} training cost: {} using {}s" \ .format(epoch, batch_count+1, Loss.data[0], t2-t1)) f_log.write("epoch {} batch {} training cost: {}, using {}s". format(epoch, batch_count + 1, Loss.data[0], t2 - t1) + '\n') """ evaluate part """ if batch_count_tr % 20 == 0: if valid_data is not None: MAP, NDCGs = evaluate(config_path, model, valid_data, rel_path, mode="valid") print(MAP, NDCGs) valid_log.write( "epoch {}, batch {}, MAP: {}, NDCGs: {} {} {} {}". format(epoch + 1, batch_count + 1, MAP, NDCGs[1][0], NDCGs[1][1], NDCGs[1][2], NDCGs[1][3])) if MAP > best_MAP: # save this best model best_MAP = MAP with open( '{}/{}/saves/best_MAP.pkl'.format( model_base_path, model_name_str), 'wb') as f_MAP: pickle.dump(best_MAP, f_MAP) # save model params after several epoch model_file = '{}/{}/saves/model_file'.format( model_base_path, model_name_str) torch.save(model.state_dict(), model_file) print("successfully saved model to the path {}". format(model_file)) valid_log.write("{} {} {} {}".format( NDCGs[1][0], NDCGs[1][1], NDCGs[1][2], NDCGs[1][3])) valid_log.write(" MAP: {}".format(MAP)) valid_log.write('\n') f_log.close() valid_log.close()