def load_model_decode(model_dir, data, name, gpu, seg=True): data.HP_gpu = gpu print "Load Model from file: ", model_dir model = SeqModel(data) ## load model need consider if the model trained in GPU and load in CPU, or vice versa # if not gpu: # model.load_state_dict(torch.load(model_dir), map_location=lambda storage, loc: storage) # # model = torch.load(model_dir, map_location=lambda storage, loc: storage) # else: model.load_state_dict(torch.load(model_dir)) # model = torch.load(model_dir) print("Decode %s data ..." % (name)) start_time = time.time() speed, acc, p, r, f, pred_results = evaluate(data, model, name) end_time = time.time() time_cost = end_time - start_time if seg: print( "%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (name, time_cost, speed, acc, p, r, f)) else: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f" % (name, time_cost, speed, acc)) return pred_results
def train(data, save_model_dir, seg=True): model = SeqModel(data) # print "finished built model." parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.SGD(parameters, lr=data.HP_lr, momentum=data.HP_momentum) ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" %(idx,data.HP_iteration)) # random.shuffle(data.index_data) ## set model in train model model.train() model.zero_grad() batch_size = 1 ## current only support batch size = 1 to compulate and accumulate to data.HP_batch_size update weights train_num = len(data.index_data) total_batch = train_num//batch_size+1 for batch_id in range(total_batch): start = batch_id*batch_size end = (batch_id+1)*batch_size if end >train_num: end = train_num instance = data.index_data[start:end] if not instance: continue gaz_list, batch_word, batch_entity, batch_gloss, batch_label, mask = batchify_with_label(instance, data.HP_gpu) loss,predict = model.neg_log_likelihood_loss(gaz_list,batch_word,batch_entity,batch_gloss,batch_label, mask) # print(predict) print("Batch_Id:",batch_id," Loss:",loss) loss.backward() optimizer.step() model.zero_grad() predict_check(predict, batch_label, mask) evaluate_result(predict, batch_label, mask) print("\n")
def load_model(model_dir, data, gpu): data.HP_gpu = gpu print "Load Model from file: ", model_dir model = SeqModel(data) ## load model need consider if the model trained in GPU and load in CPU, or vice versa if not gpu: model.load_state_dict( torch.load(model_dir, map_location=lambda storage, loc: storage)) # model = torch.load(model_dir, map_location=lambda storage, loc: storage) else: model.load_state_dict(torch.load(model_dir)) # model = torch.load(model_dir) return model
class Tagger(): def __init__(self, model_dir, dset_dir, gpu, seg): self.model_dir = model_dir self.dset_dir = dset_dir self.data = load_data_setting(dset_dir) self.data.HP_gpu = gpu self.model = SeqModel(self.data) self.model.load_state_dict(torch.load(self.model_dir)) def change_inlines(self, text): self.data.inference_single_with_gaz(text) def load_model_inference(self, seg=True): #self.model = SeqModel(self.data) return inference(self.data, self.model)
def load_model_decode(model_dir, data, name, gpu, seg=True): data.HP_gpu = gpu print("Load Model from file: ", model_dir) model = None if data.model_name == 'WC-LSTM_model': model = CW_NER(data) elif data.model_name == 'CNN_model': model = CNNmodel(data) elif data.model_name == 'LSTM_model': model = BiLSTM_CRF(data) assert (model is not None) model.load_state_dict(torch.load(model_dir)) print("Decode %s data ..." % name) start_time = time.time() speed, acc, p, r, f, pred_results = evaluate(data, model, name) end_time = time.time() time_cost = end_time - start_time # seg: boolen. # If task is segmentation like, tasks with token accuracy evaluation (e.g. POS, CCG) is False; # tasks with F-value evaluation(e.g. Word Segmentation, NER, Chunking) is True . if seg: print( "%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (name, time_cost, speed, acc, p, r, f)) else: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f" % (name, time_cost, speed, acc)) return pred_results
def __init__(self, model_dir, dset_dir, gpu, seg): self.model_dir = model_dir self.dset_dir = dset_dir self.data = load_data_setting(dset_dir) self.data.HP_gpu = gpu self.model = SeqModel(self.data) self.model.load_state_dict(torch.load(self.model_dir))
def load_model_decode(model_dir, data, name, gpu, seg=True): data.HP_gpu = gpu print ("Load Model from file: ", model_dir) model = SeqModel(data) ## load model need consider if the model trained in GPU and load in CPU, or vice versa # if not gpu: # model.load_state_dict(torch.load(model_dir), map_location=lambda storage, loc: storage) # # model = torch.load(model_dir, map_location=lambda storage, loc: storage) # else: model.load_state_dict(torch.load(model_dir)) # model = torch.load(model_dir) print("Decode %s data ..."%(name)) start_time = time.time() speed, acc, p, r, f, pred_results = evaluate(data, model, name) end_time = time.time() time_cost = end_time - start_time if seg: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(name, time_cost, speed, acc, p, r, f)) else: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f"%(name, time_cost, speed, acc)) return pred_results
def train(data, save_model_dir, dset_dir, seg=True): print("Training model...") data.show_data_summary() save_data_setting(data, dset_dir) model = None if data.model_name == 'WC-LSTM_model': model = CW_NER(data, type=2) elif data.model_name == 'CNN_model': model = CNNmodel(data) elif data.model_name == 'LSTM_model': model = BiLSTM_CRF(data) assert (model is not None) print("finished built model.") # loss_function = nn.NLLLoss() # requires_grad指定要不要更新這個變數 属性默认为False 可以加快運算 parameters = filter(lambda p: p.requires_grad, model.parameters()) # SGD: Stochastic gradient descent # 每读入一个数据,便立刻计算cost fuction的梯度来更新参数 # 算法收敛速度快 可以在线更新 有几率跳出较差的局部最优 # 易收敛到局部最优,易被困在鞍点 # 更新方向完全依赖于当前batch计算出的梯度,因而十分不稳定 # # SGD+momentum # 更新的时候在一定程度上保留之前更新的方向,同时利用当前batch的梯度微调最终的更新方向 # 在一定程度上增加稳定性,从而学习地更快,并且还有一定摆脱局部最优的能力 # optim: SGD/Adagrad/AdaDelta/RMSprop/Adam. optimizer selection. # optimizer = optim.SGD(parameters, lr=data.HP_lr, momentum=data.HP_momentum) optimizer = optim.Adagrad(parameters, lr=data.HP_lr) best_dev = -1 # training for idx in tqdm(range(data.HP_iteration)): epoch_start = time.time() print("\nEpoch: %s/%s" % (idx, data.HP_iteration)) optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 batch_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) model.train() model.zero_grad() batch_size = data.HP_batch_size train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue tag_seq, batch_label, mask, loss = None, None, None, None if data.model_name == 'WC-LSTM_model': gaz_list, reverse_gaz_list, batch_char, batch_bichar, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label_3( instance, data.HP_gpu, data.HP_num_layer) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( gaz_list, reverse_gaz_list, batch_char, batch_charlen, batch_label, mask) elif data.model_name == 'CNN_model': gaz_list, batch_char, batch_bichar, batch_charlen, batch_label, layer_gaz, gaz_mask, mask = batchify_with_label_2( instance, data.HP_gpu, data.HP_num_layer) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( gaz_list, batch_char, batch_bichar, batch_charlen, layer_gaz, gaz_mask, mask, batch_label) elif data.model_name == 'LSTM_model': gaz_list, batch_char, batch_bichar, batch_charlen, batch_wordrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu, data.HP_num_layer) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( gaz_list, batch_char, batch_bichar, batch_charlen, batch_label, mask) assert (loss.size != torch.Size([])) right, whole = predict_check(tag_seq, batch_label, mask) right_token += right whole_token += whole total_loss += loss.item() batch_loss += loss if end % data.HP_clip == 0: batch_loss.backward() optimizer.step() model.zero_grad() batch_loss = 0 epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) speed, acc, p, r, f, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_score = f if seg else acc if seg: print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) if current_score > best_dev: if seg: print( "Exceed previous best f score: %.4f, \033[35mnew best f: %.4f\033[0m" % (best_dev, current_score)) else: print( "Exceed previous best acc score:%.4f, \033[35mnew best acc: %.4f\033[0m" % (best_dev, current_score)) save_model_name = save_model_dir + '-' + str(idx) + '-' + str( round(current_score * 100, 1)) + ".model" torch.save(model.state_dict(), save_model_name) best_dev = current_score speed, acc, p, r, f, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if seg: print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) gc.collect()
def train(data, save_model_dir,save_data_set, seg=True): print ("Training model...") data.show_data_summary() #save_data_name = save_data_set #save_data_setting(data, save_data_name) model = SeqModel(data) print ("finished built model.") loss_function = nn.NLLLoss() parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.SGD(parameters, lr=data.HP_lr, momentum=data.HP_momentum) best_dev = -1 data.HP_iteration = 100 ## start training data.HP_iteration for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" %(idx,data.HP_iteration)) optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 batch_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) ## set model in train model model.train() model.zero_grad() batch_size = 10 ## current only support batch size = 1 to compulate and accumulate to data.HP_batch_size update weights train_num = len(data.train_Ids) total_batch = train_num//batch_size+1 for batch_id in range(total_batch): start = batch_id*batch_size end = (batch_id+1)*batch_size if end >train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue gaz_list, batch_word, batch_biword, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(instance, data.HP_gpu) # print "gaz_list:",gaz_list # exit(0) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss(gaz_list, batch_word, batch_biword, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask) right_token += right whole_token += whole sample_loss += loss.data[0] total_loss += loss.data[0] batch_loss += loss if end%500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print("Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token)) sys.stdout.flush() sample_loss = 0 if end%data.HP_batch_size == 0: batch_loss.backward() optimizer.step() model.zero_grad() batch_loss = 0 temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s"%(idx, epoch_cost, train_num/epoch_cost, total_loss)) # exit(0) # continue speed, acc, p, r, f, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if seg: current_score = f print("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f"%(dev_cost, speed, acc)) if current_score > best_dev: if seg: print ("Exceed previous best f score:", best_dev) else: print ("Exceed previous best acc score:", best_dev) model_name = save_model_dir +'.'+ str(idx) + ".model" torch.save(model.state_dict(), model_name) best_dev = current_score # ## decode test speed, acc, p, r, f, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if seg: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(test_cost, speed, acc, p, r, f)) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc)) gc.collect()
def train(data, save_model_dir, seg=True): print "Training model..." data.show_data_summary() save_data_name = save_model_dir + ".dset" save_data_setting(data, save_data_name) model = SeqModel(data) print "finished built model." loss_function = nn.NLLLoss() parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.SGD(parameters, lr=data.HP_lr, momentum=data.HP_momentum) best_dev = -1 data.HP_iteration = 100 ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 batch_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) ## set model in train model model.train() model.zero_grad() batch_size = 1 ## current only support batch size = 1 to compulate and accumulate to data.HP_batch_size update weights batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue gaz_list, batch_word, batch_biword, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu) # print "gaz_list:",gaz_list # exit(0) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( gaz_list, batch_word, batch_biword, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask) right_token += right whole_token += whole sample_loss += loss.data[0] total_loss += loss.data[0] batch_loss += loss if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) sys.stdout.flush() sample_loss = 0 if end % data.HP_batch_size == 0: batch_loss.backward() optimizer.step() model.zero_grad() batch_loss = 0 temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) # exit(0) # continue speed, acc, p, r, f, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if seg: current_score = f print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) if current_score > best_dev: if seg: print "Exceed previous best f score:", best_dev else: print "Exceed previous best acc score:", best_dev model_name = save_model_dir + '.' + str(idx) + ".model" torch.save(model.state_dict(), model_name) best_dev = current_score # ## decode test speed, acc, p, r, f, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if seg: print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) gc.collect()
def load_model_inference(model_dir, data, name, gpu, seg=True): data.HP_gpu = gpu model = SeqModel(data) model.load_state_dict(torch.load(model_dir)) return inference(data, model)
def train(data, save_model_dir, seg=True): print "Training model..." data.show_data_summary() save_data_name = save_model_dir + ".dset" save_data_setting(data, save_data_name) model = SeqModel(data) # model = torch.nn.DataParallel(model, device_ids=[1, 2, 3, 0]) ## catner loss_function = nn.NLLLoss() optimizer = optim.SGD(model.parameters(), lr = data.HP_lr, momentum = data.HP_momentum) # optimizer = optim.Adam(model.parameters(), lr = data.HP_lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) # best_dev = -1 best_test = -1 data.HP_iteration = 100 vis = visdom.Visdom() losses = [] all_F = [[0, 0, 0]] ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" %(idx, data.HP_iteration)) optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) ### catner udpate lr instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size # batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size+1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id+1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start: end] if not instance: continue batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(instance, data.HP_gpu) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss(batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask) right_token += right whole_token += whole sample_loss += loss.data[0] total_loss += loss.data[0] if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) sys.stdout.flush() losses.append(sample_loss / 50.0) vis.line(np.array(losses), X=np.array([i for i in range(len(losses))]), win='loss', opts={'title': 'loss', 'legend': ['loss']}) sample_loss = 0 loss.backward() if data.HP_clip: torch.nn.utils.clip_grad_norm(model.parameters(), 50.0) optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s"%(idx, epoch_cost, train_num/epoch_cost, total_loss)) # continue speed, acc, p, r, f_train, _ = evaluate(data, model, "train") speed, acc, p, r, f_dev, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if seg: # current_score = f_dev print("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(dev_cost, speed, acc, p, r, f_dev)) else: # current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f"%(dev_cost, speed, acc)) # if current_score > best_dev: # if seg: # print "Exceed previous best f score:", best_dev # else: # print "Exceed previous best acc score:", best_dev # model_name = save_model_dir +'.'+ str(idx) + ".model" # torch.save(model.state_dict(), model_name) # best_dev = current_score # ## decode test speed, acc, p, r, f_test, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if seg: current_score = f_test print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(test_cost, speed, acc, p, r, f_test)) else: current_score = acc print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc)) if current_score > best_test: if seg: print("Exceed previous best f score:", best_test) else: print ("Exceed previous best acc score:", best_test) model_name = save_model_dir +'/model'+ str(idx) torch.save(model.state_dict(), model_name) best_test = current_score with open(save_model_dir + '/eval' + str(idx) + ".txt", 'wb') as f: if seg: f.write("acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (acc, p, r, f_test)) else: f.write("acc: %.4f" % acc) if seg: print("Current best f score:", best_test) else: print("Current best acc score:", best_test) all_F.append([f_train*100.0, f_dev*100.0, f_test*100.0]) Fwin = 'F-score of {train, dev, test}' vis.line(np.array(all_F), win=Fwin, X=np.array([i for i in range(len(all_F))]), opts={'title': Fwin, 'legend': ['train', 'dev', 'test']}) gc.collect()
def train(data, save_model_dir, seg=True, ori_model_dir=None, use_attn=False, use_w2c=False): logger.info(("Training model...")) data.show_data_summary() save_data_name = save_model_dir + ".dset" # save_data_setting(data, save_data_name) model = SeqModel(data, use_attn=use_attn, use_w2c=use_w2c) '''i added''' if data.HP_gpu: model = model.cuda() logger.info(("finished built model.")) # model.load_state_dict(torch.load(ori_model_dir)) loss_function = nn.NLLLoss() parameters = [p for p in model.parameters() if p.requires_grad] sgd_optimizer = optim.SGD(parameters, lr=data.HP_lr, momentum=data.HP_momentum) # optimizer = optim.Adam(parameters) best_dev = -1 data.HP_iteration = 100 ## start training for idx in range(data.HP_iteration): # idx = idx + 6 epoch_start = time.time() temp_start = epoch_start logger.info((("Epoch: %s/%s" % (idx, data.HP_iteration)))) # if (idx < 5): # optimizer = optim.Adam(parameters) # else: # optimizer = lr_decay(sgd_optimizer, idx + 5, data.HP_lr_decay, data.HP_lr) optimizer = lr_decay(sgd_optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 batch_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) ## set model in train model model.train() model.zero_grad() batch_size = 1 ## current only support batch size = 1 to compulate and accumulate to data.HP_batch_size update weights batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue gaz_list, batch_word, batch_biword, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu) # logger.info(( "gaz_list:",gaz_list)) # logger.info(()) # exit(0) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( gaz_list, batch_word, batch_biword, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask) right_token += right whole_token += whole sample_loss += loss.data[0] total_loss += loss.data[0] batch_loss += loss # originally 500 if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time logger.info((( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)))) sys.stdout.flush() sample_loss = 0 if end % data.HP_batch_size == 0: batch_loss.backward() optimizer.step() model.zero_grad() batch_loss = 0 temp_time = time.time() temp_cost = temp_time - temp_start logger.info( ((" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)))) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start logger.info((( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)))) # exit(0) # continue speed, acc, p, r, f, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if seg: current_score = f logger.info((( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)))) else: current_score = acc logger.info((("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)))) if current_score > best_dev: if seg: logger.info(("Exceed previous best f score:", best_dev)) else: logger.info(("Exceed previous best acc score:", best_dev)) if current_score > best_dev or idx == data.HP_iteration - 1: model_name = save_model_dir + '.' + str(idx) + ".model" torch.save(model.state_dict(), model_name) logger.info(("model name: " + model_name)) if idx > 3: save_data_setting(data, save_data_name + '.' + str(idx) + '.dset') best_dev = current_score # ## decode test # speed, acc, p, r, f, _ = evaluate(data, model, "test") # test_finish = time.time() # test_cost = test_finish - dev_finish # if seg: # logger.info((("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(test_cost, speed, acc, p, r, f)))) # else: # logger.info((("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc)))) gc.collect()
# -*- coding: utf-8 -*-
def train(data, save_model_dir, seg=True): print "Training model..." data.show_data_summary() save_data_name = save_model_dir + ".dset" save_data_setting(data, save_data_name) loss_function = nn.NLLLoss() model = SeqModel(data) #model=copy.deepcopy(premodel) optimizer = optim.SGD(model.examiner.parameters(), lr=data.HP_lr, momentum=data.HP_momentum) best_dev = -1 data.HP_iteration = 5 USE_CRF = True ## start training acc_list = [] p_list = [] r_list = [] f_list = [] map_list = [] #random.seed(2) print("total", ) data.HP_lr = 0.1 for idx in range(1): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 total_rl_loss = 0 total_ml_loss = 0 total_num = 0.0 total_reward = 0.0 right_token_reform = 0 whole_token_reform = 0 #random.seed(2) #random.shuffle(data.train_Ids) #random.seed(seed_num) ## set model in train model model.examiner.train() model.examiner.zero_grad() model.topk = 5 model.examiner.topk = 5 batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 gamma = 0 cnt = 0 click = 0 sum_click = 0 sum_p_at_5 = 0.0 sum_p = 1.0 #if idx==0: # selected_data=[batch_id for batch_id in range(0,total_batch//1000)] tag_mask = None batch_ids = [i for i in range(total_batch)] for batch_idx in range(0, total_batch): # if end%500 == 0: # temp_time = time.time() # temp_cost = temp_time - temp_start # temp_start = temp_time # print(" Instance: %s; Time: %.2fs; loss: %.4f;"%(end, temp_cost, sample_loss)) # sys.stdout.flush() # sample_loss = 0 #updating the crf by selected position batch_id = batch_ids[batch_idx] start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue update_once = False start_time = time.time() #selected_data.append(batch_id) if batch_id == 15: for j in range(0, 10): __tot = 0.0 for i in range(5, 15): model.sample_train(0, i) batch_id_temp = batch_ids[i] start = batch_id_temp * batch_size end = (batch_id_temp + 1) * batch_size instance = data.train_Ids[start:end] batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu) real_batch_label = batch_label batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) #_pred_label, _gold_label = recover_label(Variable(tag_seq.cuda()), real_batch_label.cuda(),mask.cuda(), data.label_alphabet, batch_wordrecover) _tag_mask = tag_mask pos_mask, score = model.reinforment_supervised( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, real_batch_label, tag_seq, tag_prob, mask) __tot += score.sum() score.sum().backward() optimizer.step() model.examiner.zero_grad() __tot = 0.0 for i in range(10, -1, -1): print(i) model.sample_train(i + 1, 15) batch_id_temp = batch_ids[i] start = batch_id_temp * batch_size end = (batch_id_temp + 1) * batch_size instance = data.train_Ids[start:end] batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu) real_batch_label = batch_label batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) #_pred_label, _gold_label = recover_label(Variable(tag_seq.cuda()), real_batch_label.cuda(),mask.cuda(), data.label_alphabet, batch_wordrecover) _tag_mask = tag_mask pos_mask, score = model.reinforment_supervised( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, real_batch_label, tag_seq, tag_prob, mask) __tot += score.sum() score.sum().backward() optimizer.step() model.examiner.zero_grad() print("score", __tot / 14) model.train() if batch_id >= 15: t = np.random.randint(0, len(model.X_train)) if np.random.rand() > -1 or model.tag_mask_list[t].sum( ).data[0] <= 5: t = np.random.randint(len(model.X_train), total_batch) #This is for seq choosing #if batch_id>total_batch//100+100: # batch_id=batch_ids[batch_idx] # tmin=-1 # for i in range(len(model.X_train),total_batch): # batch_id=batch_ids[i] # start = batch_id*batch_size # end = (batch_id+1)*batch_size # if end >train_num: # end = train_num # instance = data.train_Ids[start:end] # if len(instance)==0: # continue # batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(instance, data.HP_gpu) # batch_label,tag_seq,tag_mask,score,indices,scores_ref=model.crf_loss(batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) # if tmin==-1 or (scores_ref.cpu().data[0]》=tmin): # tmin=scores_ref.cpu().data[0] # t=i # temp=batch_ids[batch_idx] # batch_ids[batch_idx]=batch_ids[t] # batch_ids[t]=temp batch_id = batch_ids[batch_idx] start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu) real_batch_label = batch_label batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) model.add_instance(batch_word, batch_label, tag_mask, instance, scores_ref.data[0]) #pred_label, gold_label = recover_label(Variable(tag_seq.cuda()), real_batch_label.cuda(),mask.cuda(), data.label_alphabet, batch_wordrecover) # u=False # for x in pred_label: # if not gold_label==pred_label: # u=True # break # #if u==True: # print "mask", tag_mask # print "gold", gold_label # print "pred", pred_label else: # tmin=model.scores_refs[t] # for i in range(len(model.X_train)): # if model.scores_refs[i]<=tmin: # tmin=model.scores_refs[i] # t=i instance = model.instances[t] batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu) real_batch_label = batch_label batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask, t=t) model.readd_instance(batch_label, mask, tag_mask, t, scores_ref.data[0]) print("score", score) #sum_p_at_5+=score sum_p += 1.0 end_time = time.time() if click + 5 >= 10: print("time", end_time - start_time) else: batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu) model.add_instance(batch_word, batch_label, tag_mask, instance, -100000.0) #print("Y_train",model.Y_train[-1]) # if batch_id>=total_batch//100+15: # for i in range(15): # model.train() # model.reevaluate_instance(mask) #print("loss",loss) #print(batch_wordlen) if batch_id < 15: if batch_id == 14: model.train() #print("Y_train",model.Y_train) print(batch_ids) speed, acc, p, r, f, _ = evaluate(data, model, "test") print(len(model.Y_train)) print("after", acc) print("Check", f) acc_list.append(acc) p_list.append(p) r_list.append(r) f_list.append(sum_click) sum_p_at_5 = 0.0 sum_p = 1.0 continue if batch_id == 15: model.train() #print("Y_train",model.Y_train) print(batch_ids) speed, acc, p, r, f, _ = evaluate(data, model, "test") print(len(model.Y_train)) print("after", acc) print("Check", f) acc_list.append(acc) p_list.append(p) r_list.append(r) f_list.append(sum_click) sum_p_at_5 = 0.0 sum_p = 1.0 click += model.topk sum_click += model.topk #click+=batch_wordlen[0] #sum_click+=batch_wordlen[0] if click >= 10: model.train() speed, acc, p, r, f, _ = evaluate(data, model, "test") print("Step:", len(model.Y_train)) print("after", acc) acc_list.append(acc) p_list.append(p) r_list.append(r) f_list.append(sum_click) sum_p_at_5 = 0.0 sum_p = 1.0 click -= 10 instance_count += 1 pos_mask, selection_score, select_reward = model.reinforment_reward( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, real_batch_label, tag_seq, tag_prob, mask) if USE_CRF == True: start_time = time.time() t = np.random.randint(1, 10) #print("size",total_batch) speed, acc, p, r, f, _ = evaluate(data, model, "dev") end_time = time.time() if total_num != 0: ave_scores = total_reward / total_num else: ave_scores = 0.0 total_reward += acc total_num += 1 # print(batch_label) sample_scores = torch.from_numpy(np.asarray([acc])).float() ave_scores = torch.from_numpy(np.asarray([ave_scores])).float() if idx >= 0: reward_diff = Variable(sample_scores - ave_scores, requires_grad=False) else: reward_diff = select_reward reward_diff = reward_diff.cuda() rl_loss = -selection_score # B #if idx>=10: #print("rl_loss",rl_loss) print("reward", reward_diff) rl_loss = torch.mul(rl_loss, reward_diff.expand_as(rl_loss)) #b_size #print("reward",reward_diff) #rl_loss = rl_loss.sum() rl_loss.backward() optimizer.step() model.examiner.zero_grad() if len(p_list) >= 100: break if len(p_list) >= 100: break temp_time = time.time() temp_cost = temp_time - temp_start print("rl_loss", total_rl_loss) print("ml_loss", total_ml_loss) #print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) # continue speed, acc, p, r, f, _ = evaluate(data, model, "test") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if seg: current_score = f print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) if current_score > best_dev: if seg: print "Exceed previous best f score:", best_dev else: print "Exceed previous best acc score:", best_dev model_name = save_model_dir + '.' + str(idx) + ".model" #torch.save(model.state_dict(), model_name) best_dev = current_score ## decode test speed, acc, p, r, f, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if best_dev == current_score: best_ = test_cost, speed, acc, p, r, f if seg: print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) gc.collect() file_dump = open("exp_list.pkl", "w") pickle.dump([acc_list, p_list, r_list, f_list, map_list], file_dump) file_dump.close()