def load_model_decode(data, name): print("Load Model from file: ", data.model_dir) model = SeqModel(data) ## load model need consider if the model trained in GPU and load in CPU, or vice versa # if not gpu: # model.load_state_dict(torch.load(model_dir)) # # model.load_state_dict(torch.load(model_dir), map_location=lambda storage, loc: storage) # # model = torch.load(model_dir, map_location=lambda storage, loc: storage) # else: # model.load_state_dict(torch.load(model_dir)) # # model = torch.load(model_dir) model.load_state_dict(torch.load(data.load_model_dir)) print("Decode %s data, nbest: %s ..." % (name, data.nbest)) start_time = time.time() speed, acc, p, r, f, pred_results, pred_scores = evaluate( data, model, name, data.nbest) end_time = time.time() time_cost = end_time - start_time if data.seg: print( "%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (name, time_cost, speed, acc, p, r, f)) else: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f" % (name, time_cost, speed, acc)) return pred_results, pred_scores
def load_model_decode(data, name): print("Load Model from file: ", data.model_dir) model = SeqModel(data) ## load model need consider if the model trained in GPU and load in CPU, or vice versa # if not gpu: # model.load_state_dict(torch.load(model_dir)) # # model.load_state_dict(torch.load(model_dir), map_location=lambda storage, loc: storage) # # model = torch.load(model_dir, map_location=lambda storage, loc: storage) # else: # model.load_state_dict(torch.load(model_dir)) # # model = torch.load(model_dir) if data.HP_gpu: model.load_state_dict(torch.load(data.load_model_dir)) else: model.load_state_dict( torch.load(data.load_model_dir, map_location=lambda storage, loc: storage.cpu())) #start_time = time.time() speed, acc, p, r, f, pred_results, pred_scores, probs, acc_instances, acc_speed = evaluate( data, model, name, data.nbest) #end_time = time.time() #time_cost = end_time - start_time return pred_results, pred_scores, probs, acc_instances, acc_speed
def load_model_decode(data, name, label_flag=True): print("Load Model from file", data.model_dir) model = SeqModel(data) ## handle GPU/non GPU issues map_location = lambda storage, loc: storage if data.HP_gpu: map_location = None ## load weights model.load_state_dict( torch.load(data.load_model_dir, map_location=map_location)) start_time = time.time() speed, acc, p, r, f, pred_results, pred_scores = evaluate( data, model, name, nbest=None, label_flag=label_flag) end_time = time.time() time_cost = end_time - start_time # distinguish between non-segmentation tasks (POS, CCG ) and segmentation tasks (word segmentation, ner, chuncking) for which f1 score is necessary if data.seg: print( "{}: time{:.2f}s, speed: {:.2f}st/s; acc: {:.4f}, p: {:.4f}, r: {:.4f}, f: {:.4f}" .format(name, time_cost, speed, acc, p, r, f)) else: print("{}: time{:.2f}s, speed: {:.2f}st/s; acc: {:.4f}".format( name, time_cost, speed, acc)) # pred_scores is empty (it is only filled when nbest is not None) return pred_results, pred_scores
def load_model_decode(data, name): print "Load Model from file: ", data.model_dir model = SeqModel(data) model.load_state_dict(torch.load(data.load_model_dir)) print("Decode %s data, nbest: %s ..." % (name, data.nbest)) start_time = time.time() summary = evaluate(data, model, name, True, data.nbest) pred_results_tasks = [] pred_scores_tasks = [] range_tasks = len(data.index_of_main_tasks) for idtask in xrange(range_tasks): speed, acc, p, r, f, pred_results, pred_scores = summary[idtask] pred_results_tasks.append(pred_results) pred_scores_tasks.append(pred_scores) end_time = time.time() time_cost = end_time - start_time if data: print( "%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (name, time_cost, speed, acc, p, r, f)) else: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f" % (name, time_cost, speed, acc)) return pred_results_tasks, pred_scores_tasks
def load_model_decode(data): print("Load Model from dir: ", data.model_dir) model = SeqModel(data) model_name = data.model_dir + "/best_model.ckpt" model.load_state_dict(torch.load(model_name)) evaluate(data, model, "raw")
def load_model_decode(data, name): print("Load Model from file: ", data.model_dir) model = SeqModel(data) model.load_state_dict(torch.load(data.load_model_dir)) print("Decode %s data, nbest: %s ..." % (name, data.nbest)) start_time = time.time() speed, acc, p, r, f, pred_results, pred_scores = evaluate(data, model, name, data.nbest) end_time = time.time() time_cost = end_time - start_time print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % ( name, time_cost, speed, acc[0], p[0], r[0], f[0])) print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % ( name, time_cost, speed, acc[1], p[1], r[1], f[1])) return pred_results[1], pred_scores[1]
def build_model(data): ''' For deployment: instantiate the model based on data object architecture specifications ''' print("Load Model weights from file", data.load_model_dir) start_time = time.time() model = SeqModel(data) ## handle GPU/non GPU issues map_location = lambda storage, loc: storage if data.HP_gpu: map_location = None # laoding the weights of the model from load_model_dir model.load_state_dict(torch.load(data.load_model_dir, map_location = map_location)) end_time = time.time() time_cost = end_time - start_time return model
def load_model_decode(data, name): print("Load Model from file: ", data.model_dir) model = SeqModel(data) ## load model need consider if the model trained in GPU and load in CPU, or vice versa # if not gpu: # model.load_state_dict(torch.load(model_dir)) # # model.load_state_dict(torch.load(model_dir), map_location=lambda storage, loc: storage) # # model = torch.load(model_dir, map_location=lambda storage, loc: storage) # else: # model.load_state_dict(torch.load(model_dir)) # # model = torch.load(model_dir) model.load_state_dict(torch.load(data.load_model_dir)) print("Decode %s data, nbest: %s ..."%(name, data.nbest)) start_time = time.time() speed, acc, p, r, f, pred_results, pred_scores = evaluate(data, model, name, data.nbest) end_time = time.time() time_cost = end_time - start_time if data.seg: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(name, time_cost, speed, acc, p, r, f)) else: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f"%(name, time_cost, speed, acc)) return pred_results, pred_scores
def train(data): print "Training model..." data.show_data_summary() save_data_name = data.model_dir +".dset" data.save(save_data_name) model = SeqModel(data) if data.pretrained_model is not None: model_dict = model.state_dict() #We load the weights for the layers that we have pretrained (e.g. for language modeling) pretrained_dict = torch.load(data.pretrained_model) pretrained_dict = {k: v for k, v in pretrained_dict.items() if data.pretrained_part == data.PRETRAINED_ALL or (data.pretrained_part == data.PRETRAINED_LSTMS and "hidden2tagList" not in k)} # We overwrite entries in the existing state dict model_dict.update(pretrained_dict) # We load the new state dict model.load_state_dict(model_dict) loss_function = nn.NLLLoss() if data.optimizer.lower() == "sgd": #optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum) optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum,weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s"%(data.optimizer)) exit(0) best_dev = -10 range_valid_tasks = range(data.HP_tasks) for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" %(idx,data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 sample_loss = {idtask: 0 for idtask in range(data.HP_tasks)} right_token = {idtask: 0 for idtask in range(data.HP_tasks)} whole_token = {idtask: 0 for idtask in range(data.HP_tasks)} random.shuffle(data.train_Ids) #We get the indexes where are the samples of each (shuffled) treebank if data.disjoint: treebank_indexes = {} for idxsample, sample in enumerate(data.train_Ids): if sample[-1] not in treebank_indexes: treebank_indexes[sample[-1]] = [] treebank_indexes[sample[-1]].append(idxsample) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num//batch_size+1 if data.disjoint: tb_idxs = {tb:(0,batch_size) for tb in treebank_indexes} for batch_id in range(total_batch): start = batch_id*batch_size end = (batch_id+1)*batch_size if end >train_num: end = train_num if data.disjoint: eligible_treebanks = [t for t in treebank_indexes if tb_idxs[t][0] < len(treebank_indexes[t]) and idx < data.ignore_after_epoch[t] ] if eligible_treebanks == []: break tb = random.choice(eligible_treebanks) range_valid_tasks = data.dataset_ids[tb] idx_init, idx_end = tb_idxs[tb] train_idxs = treebank_indexes[tb][idx_init:idx_end] instance = [data.train_Ids[idx_ins] for idx_ins in train_idxs] #data.train_Ids[train_idxs] tb_idxs[tb] = (idx_end, idx_end+batch_size) else: instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(instance, data.HP_gpu, inference=False) instance_count += 1 loss, losses, tag_seq = model.neg_log_likelihood_loss(batch_word,batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask, range_valid_tasks, inference=False) log=True for idtask in range_valid_tasks: right, whole = predict_check(tag_seq[idtask], batch_label[idtask], mask) sample_loss[idtask]+= losses[idtask].data[0] right_token[idtask]+=right whole_token[idtask]+=whole if end%500 == 0 and log: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time log = False if sample_loss[idtask] > 1e8 or str(sample_loss) == "nan": print "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." exit(0) sys.stdout.flush() for aux_idtask in range(data.HP_tasks): if whole_token[aux_idtask] == 0: print ("Task %d (no samples found)"%(aux_idtask)) else: if data.inv_dataset_ids[aux_idtask] in eligible_treebanks: print("Task %d %s Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(aux_idtask,data.inv_dataset_ids[aux_idtask],end, temp_cost, sample_loss[aux_idtask], right_token[aux_idtask], whole_token[aux_idtask],(right_token[aux_idtask]+0.)/whole_token[aux_idtask])) else: print("Task %d %s does not contain more samples; loss: %4f"%(aux_idtask,data.inv_dataset_ids[aux_idtask], losses[aux_idtask].data[0])) sample_loss[aux_idtask] = 0 total_loss += loss.data[0] loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start for aux_idtask in range(data.HP_tasks): if whole_token[aux_idtask] == 0: print ("Task %d (no samples found)"%(aux_idtask)) else: name_tb = data.inv_dataset_ids[aux_idtask] print("Task %d %s Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(aux_idtask,name_tb,len(treebank_indexes[name_tb]), temp_cost, sample_loss[aux_idtask], right_token[aux_idtask], whole_token[aux_idtask],(right_token[aux_idtask]+0.)/whole_token[aux_idtask])) sample_loss[aux_idtask] = 0 epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s"%(idx,epoch_cost, train_num/epoch_cost, total_loss)) if total_loss > 1e8 or str(total_loss) == "nan": print "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." exit(0) summary = evaluate(data,model, "dev", False, False) dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_scores = [] for idtask in xrange(0, data.HP_tasks): speed,acc,p,r,f,pred_labels,_,valid_indexes = summary[idtask] if data.seg: current_score = f current_scores.append(f) print("Task %d Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(idtask, dev_cost, speed, acc, p, r, f)) else: current_score = acc current_scores.append(acc) print("Task %d Dev: time: %.2fs speed: %.2fst/s; acc: %.4f"%(idtask, dev_cost, speed, acc)) pred_results_tasks = [] pred_scores_tasks = [] pred_las_tasks = [] valid_indexes = None for idtask in xrange(data.HP_tasks): speed, acc, p, r, f, pred_results, pred_scores, pred_indexes = summary[idtask] pred_results_tasks.append(pred_results) pred_scores_tasks.append(pred_scores_tasks) if idtask in data.task_metric and data.task_metric[idtask] == "LAS": pred_las_tasks.append(pred_results) valid_indexes = pred_indexes with tempfile.NamedTemporaryFile() as f_decode_mt: with tempfile.NamedTemporaryFile() as f_decode_st: # If we are learning multiple task we move it as a sequence labeling if data.HP_main_tasks > 1: data.decode_dir = f_decode_mt.name decoded_st_dir = f_decode_st.name data.write_decoded_results(pred_las_tasks, 'dev', indexes=valid_indexes) split_char = "{}" else: if data.decode_dir is None: data.decode_dir = f_decode_st.name decoded_st_dir = f_decode_st.name data.write_decoded_results(pred_las_tasks, 'dev', indexes=valid_indexes) split_char = "@" output_nn = open(data.decode_dir) tmp = tempfile.NamedTemporaryFile().name if data.offset: decode_dependencies.decode_combined_tasks(output_nn, tmp, split_char) else: print("decoding single task") decode_dependencies.decode(output_nn, tmp, split_char) current_score = decode_dependencies.evaluate_dependencies(data.gold_dev_dep, tmp) print "Current Score (from LAS)", current_score, "Previous best dev (from LAS)", best_dev if current_score > best_dev: if data.seg: print "Exceed previous best f score:", best_dev else: print "Exceed previous best acc score:", best_dev model_name = data.model_dir +".model" print "Overwritting model to", model_name torch.save(model.state_dict(), model_name) best_dev = current_score else: print("sofar the best "+repr(best_dev)) if data.HP_tasks_weight_decays: print "Updating the weights using linear weight decay. ", print "The old weights were", data.HP_tasks_weights, data.HP_tasks_weights =[max(weight-decay,0) for weight,decay in zip(data.HP_tasks_weights, data.HP_tasks_weight_decays)] print ". The new weights are", data.HP_tasks_weights model.set_tasks_weights(data.HP_tasks_weights) gc.collect()
def load_model_decode(data): print("Load Model from file: ", data.model_dir) model = SeqModel(data) model.load_state_dict(torch.load(data.load_model_dir)) evaluate(data.ner_2_test_idx, data, model)
config_dic.get("glove_path"), emb_dim=config_dic.get("word_emb_dim")) pretrain_embeddings = build_pretrain_embeddings( word2vec, word_dic, emb_dim=config_dic.get("word_emb_dim")) else: pretrain_embeddings = None # initialize Model seq_model = SeqModel( config_dic, len(word_dic.token2id), len(char_dic.token2id), [len(sw_dic.token2id) for sw_dic in sw_dicts.values()], len(label_dic.token2id), pretrain_embeddings) # load Model print(f"Load model {args.model} !") seq_model.load_state_dict(torch.load(args.model)) word_dic.id2token = {v: k for k, v in word_dic.token2id.items()} char_dic.id2token = {v: k for k, v in char_dic.token2id.items()} for sp_key, sp in sps.items(): sw_dicts[sp_key].id2token = { v: k for k, v in sw_dicts[sp_key].token2id.items() } ################ test predict check ##################### print("============== Predict Check==========") true_seqs, pred_seqs, word_seqs, char_seqs = [], [], [], [] right_token, total_token = 0, 0 batch_size = config_dic.get("ner_batch_size") batch_steps = len(test_word_documents) // batch_size + 1
data.build_pretrain_emb() train(data) elif opt.whattodo == 3: # step 3, evaluate on test data and output results in bioc format, one doc one file data = Data() data.read_config(opt.config) status = data.status.lower() data.HP_gpu = torch.cuda.is_available() data.load(data.dset_dir) data.read_config(opt.config) data.show_data_summary() data.fix_alphabet() model = SeqModel(data) model.load_state_dict(torch.load(data.load_model_dir)) ner_output_dir = os.path.join(opt.testdata, "ner") if os.path.exists(ner_output_dir): shutil.rmtree(ner_output_dir) os.makedirs(ner_output_dir) else: os.makedirs(ner_output_dir) test_token, test_entity, _, test_name = preprocess.loadPreprocessData( opt.testdata) for i in tqdm(range(len(test_name))): doc_name = test_name[i] doc_token = test_token[i] doc_entity = test_entity[i]
def error_pipeline(data, opt): test_token, test_entity, test_relation, test_name = preprocess.loadPreprocessData( data.test_dir) # evaluate on test data and output results in bioc format, one doc one file data.load(opt.data_file) data.MAX_SENTENCE_LENGTH = -1 data.show_data_summary() data.fix_alphabet() seq_model = SeqModel(data) seq_model.load_state_dict( torch.load(os.path.join(opt.ner_dir, 'model.pkl'))) seq_wordseq = WordSequence(data, False, True, True, data.use_char) seq_wordseq.load_state_dict( torch.load(os.path.join(opt.ner_dir, 'wordseq.pkl'))) classify_model = ClassifyModel(data) if torch.cuda.is_available(): classify_model = classify_model.cuda(data.HP_gpu) classify_model.load_state_dict( torch.load(os.path.join(opt.re_dir, 'model.pkl'))) classify_wordseq = WordSequence(data, True, False, True, False) classify_wordseq.load_state_dict( torch.load(os.path.join(opt.re_dir, 'wordseq.pkl'))) error_dir = "error" if not os.path.exists(error_dir): os.makedirs(error_dir) for i in tqdm(range(len(test_name))): doc_name = test_name[i] doc_token = test_token[i] doc_entity = test_entity[i] doc_relation = test_relation[i] listEntityFP = [] listEntityFN = [] ncrf_data = ner.generateDataForOneDoc(doc_token, doc_entity) data.raw_texts, data.raw_Ids = ner.read_instanceFromBuffer( ncrf_data, data.word_alphabet, data.char_alphabet, data.feature_alphabets, data.label_alphabet, data.number_normalized, data.MAX_SENTENCE_LENGTH) decode_results = ner.evaluateWhenTest(data, seq_wordseq, seq_model) entities = ner.translateNCRFPPintoEntities(doc_token, decode_results, doc_name) # entity fn for _, gold in doc_entity.iterrows(): find = False for predict in entities: if gold['type'] == predict.type and gold[ 'start'] == predict.start and gold[ 'end'] == predict.end: find = True break if not find: context_token = doc_token[( doc_token['sent_idx'] == gold['sent_idx'])] sequence = "" for _, token in context_token.iterrows(): if token['start'] == gold['start']: sequence += "[" sequence += token['text'] if token['end'] == gold['end']: sequence += "]" sequence += " " listEntityFN.append("{} | {}\n{}\n".format( gold['text'], gold['type'], sequence)) # entity fp for predict in entities: find = False for _, gold in doc_entity.iterrows(): if gold['type'] == predict.type and gold[ 'start'] == predict.start and gold[ 'end'] == predict.end: find = True break if not find: context_token = doc_token[( doc_token['sent_idx'] == predict.sent_idx)] sequence = "" for _, token in context_token.iterrows(): if token['start'] == predict.start: sequence += "[" sequence += token['text'] if token['end'] == predict.end: sequence += "]" sequence += " " listEntityFP.append("{} | {}\n{}\n".format( predict.text, predict.type, sequence)) test_X, test_other = relation_extraction.getRelationInstanceForOneDoc( doc_token, entities, doc_name, data) relations = relation_extraction.evaluateWhenTest( classify_wordseq, classify_model, test_X, data, test_other, data.re_feature_alphabets[data.re_feature_name2id['[RELATION]']]) listRelationFP = [] listRelationFN = [] # relation fn for _, gold in doc_relation.iterrows(): find = False gold_entity1 = doc_entity[( doc_entity['id'] == gold['entity1_id'])].iloc[0] gold_entity2 = doc_entity[( doc_entity['id'] == gold['entity2_id'])].iloc[0] for predict in relations: predict_entity1 = predict.node1 predict_entity2 = predict.node2 if gold['type'] == predict.type \ and gold_entity1['type']==predict_entity1.type and gold_entity1['start']==predict_entity1.start and gold_entity1['end']==predict_entity1.end \ and gold_entity2['type']==predict_entity2.type and gold_entity2['start']==predict_entity2.start and gold_entity2['end']==predict_entity2.end: find = True break elif gold['type'] == predict.type \ and gold_entity1['type']==predict_entity2.type and gold_entity1['start']==predict_entity2.start and gold_entity1['end']==predict_entity2.end \ and gold_entity2['type']==predict_entity1.type and gold_entity2['start']==predict_entity1.start and gold_entity2['end']==predict_entity1.end: find = True break if not find: former = gold_entity1 if gold_entity1['start'] < gold_entity2[ 'start'] else gold_entity2 latter = gold_entity2 if gold_entity1['start'] < gold_entity2[ 'start'] else gold_entity1 context_token = doc_token[ (doc_token['sent_idx'] >= former['sent_idx']) & (doc_token['sent_idx'] <= latter['sent_idx'])] # print("{}: {} | {}: {}".format(former['id'], former['text'], latter['id'], latter['text'])) sequence = "" for _, token in context_token.iterrows(): if token['start'] == former['start'] or token[ 'start'] == latter['start']: sequence += "[" sequence += token['text'] if token['end'] == former['end'] or token['end'] == latter[ 'end']: sequence += "]" sequence += " " listRelationFN.append("{} | {} | {}\n{}\n".format( former['text'], latter['text'], gold['type'], sequence)) # relation fp for predict in relations: predict_entity1 = predict.node1 predict_entity2 = predict.node2 find = False for _, gold in doc_relation.iterrows(): gold_entity1 = doc_entity[( doc_entity['id'] == gold['entity1_id'])].iloc[0] gold_entity2 = doc_entity[( doc_entity['id'] == gold['entity2_id'])].iloc[0] if gold['type'] == predict.type \ and gold_entity1['type']==predict_entity1.type and gold_entity1['start']==predict_entity1.start and gold_entity1['end']==predict_entity1.end \ and gold_entity2['type']==predict_entity2.type and gold_entity2['start']==predict_entity2.start and gold_entity2['end']==predict_entity2.end: find = True break elif gold['type'] == predict.type \ and gold_entity1['type']==predict_entity2.type and gold_entity1['start']==predict_entity2.start and gold_entity1['end']==predict_entity2.end \ and gold_entity2['type']==predict_entity1.type and gold_entity2['start']==predict_entity1.start and gold_entity2['end']==predict_entity1.end: find = True break if not find: former = predict_entity1 if predict_entity1.start < predict_entity2.start else predict_entity2 latter = predict_entity2 if predict_entity1.start < predict_entity2.start else predict_entity1 context_token = doc_token[ (doc_token['sent_idx'] >= former.sent_idx) & (doc_token['sent_idx'] <= latter.sent_idx)] sequence = "" for _, token in context_token.iterrows(): if token['start'] == former.start or token[ 'start'] == latter.start: sequence += "[" sequence += token['text'] if token['end'] == former.end or token['end'] == latter.end: sequence += "]" sequence += " " listRelationFP.append("{} | {} | {}\n{}\n".format( former.text, latter.text, predict.type, sequence)) with open(os.path.join(error_dir, doc_name + ".txt"), 'w') as fp: fp.write("######## ENTITY FN ERROR ##########\n\n") for item in listEntityFN: fp.write(item) fp.write('\n') fp.write("######## ENTITY FP ERROR ##########\n\n") for item in listEntityFP: fp.write(item) fp.write('\n') fp.write("######## RELATION FN ERROR ##########\n\n") for item in listRelationFN: fp.write(item) fp.write('\n') fp.write("######## RELATION FP ERROR ##########\n\n") for item in listRelationFP: fp.write(item) fp.write('\n')
def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir + ".dset" # 存储data数据 data.save(save_data_name) model = SeqModel(data) # check to load pretrained model if data.use_crf: pretrain_model_path = os.path.join('model_snapshot', 'lan_crf.model') else: pretrain_model_path = os.path.join('model_snapshot', 'lan.model') if data.use_pre_trained_model and os.path.exists(pretrain_model_path): model.load_state_dict(torch.load(pretrain_model_path)) print("load pretrained model success:%s" % pretrain_model_path) pytorch_total_params = sum(p.numel() for p in model.parameters()) print("--------pytorch total params--------") print(pytorch_total_params) optimizer = None if data.optimizer.lower() == "sgd": optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(1) best_dev = -10 best_test = -10 no_imprv_epoch = 0 ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) # print (self.train_Ids) # every 5 epoch decay learning rate if idx % 5 == 0: optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 total_loss = 0 ## set model in train model model.train() model.zero_grad() start = 0 end = start + data.HP_batch_size train_epochs = [] while end <= len(data.train_Ids): train_epochs.append((start, end)) start = end end = end + data.HP_batch_size if end > len(data.train_Ids) > start: train_epochs.append((start, len(data.train_Ids))) for sample_id, (start, end) in enumerate(train_epochs): instance = data.train_Ids[start: end] sample_loss = 0 batch_word, batch_word_len, _, batch_word_recover, batch_label, mask, input_label_seq_tensor = batchify_with_label( instance, data.HP_gpu, data) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( batch_word, batch_word_len, batch_label, mask, input_label_seq_tensor) sample_loss += loss.item() total_loss += loss.item() print("Epoch:%s,no_imprv_epoch:%s,Instance: %s" % ( idx, no_imprv_epoch, sample_id)) right, whole = predict_check(tag_seq, batch_label, mask, data.use_crf) print(" loss: %.4f, acc: %s/%s=%.4f" % ( loss.item(), right, whole, (right + 0.) / whole * 100)) if sample_loss > 1e8 or str(sample_loss) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") exit(1) sys.stdout.flush() loss.backward() if data.whether_clip_grad: nn.utils.clip_grad_norm_(model.parameters(), data.clip_grad) optimizer.step() model.zero_grad() # break epoch_finish = time.time() if total_loss > 1e8 or str(total_loss) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") exit(1) speed, acc, report, f_value, \ ner_acc, ner_p, ner_r, ner_f = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if data.seg: current_score = f_value # current_score = sent_f1 print("Dev: time: %.2fs, speed: %.2fst/s;\n" "acc: %.4f, f_value: %.4f\n" "ner_acc: %.4f, ner_p: %.4f, ner_r: %.4f, ner_f: %.4f\n" "current f1:%.4f" % ( dev_cost, speed, acc, f_value, ner_acc, ner_p, ner_r, ner_f, current_score )) else: current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % ( dev_cost, speed, acc)) # ## decode test speed, acc, report, f_value, \ ner_acc, ner_p, ner_r, ner_f = evaluate(data, model, "test") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if data.seg: print("Test: time: %.2fs, speed: %.2fst/s;\n" "acc: %.4f, f_value: %.4f\n" "ner_acc: %.4f, ner_p: %.4f, ner_r: %.4f, ner_f: %.4f\n" "current f1:%.4f" % ( dev_cost, speed, acc, f_value, ner_acc, ner_p, ner_r, ner_f, current_score )) else: print("Test: time: %.2fs speed: %.2fst/s; acc: %.4f" % ( dev_cost, speed, acc)) if current_score > best_dev: if data.seg: best_test = f_value # best_test = sent_f1 print("Exceed previous best avg f score:", best_dev) else: best_test = acc print("Exceed previous best acc score:", best_dev) if data.use_crf: result_file = "result_crf.txt" model_name = data.model_dir + "_crf.model" else: result_file = "result.txt" model_name = data.model_dir + ".model" with open(result_file, 'w', encoding='utf-8') as w: w.write( "Save current best model in file:%s, iteration:%s/%s, best_test_f_score:%.5f\n" "ner:\n" " precision:%.5f, recall:%.5f, f1_score:%.5f\n" "%s\n\n" % ( model_name, idx, data.HP_iteration, best_test, ner_p, ner_r, ner_f, report)) print("Save current best model in file:", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score no_imprv_epoch = 0 else: # early stop no_imprv_epoch += 1 if no_imprv_epoch >= 10: print("early stop") print("Current best f score in dev", best_dev) print("Current best f score in test", best_test) break if data.seg: print("Current best f score in dev", best_dev) print("Current best f score in test", best_test) else: print("Current best acc score in dev", best_dev) print("Current best acc score in test", best_test) gc.collect()