def initialize_model_and_optimizer(self): if self.data.sentence_classification: self.model = SentClassifier(self.data) else: self.model = SeqLabel(self.data) if self.data.optimizer.lower() == "sgd": self.optimizer = optim.SGD(self.model.parameters(), lr=self.data.HP_lr, momentum=self.data.HP_momentum, weight_decay=self.data.HP_l2) elif self.data.optimizer.lower() == "adagrad": self.optimizer = optim.Adagrad(self.model.parameters(), lr=self.data.HP_lr, weight_decay=self.data.HP_l2) elif self.data.optimizer.lower() == "adadelta": self.optimizer = optim.Adadelta(self.model.parameters(), lr=self.data.HP_lr, weight_decay=self.data.HP_l2) elif self.data.optimizer.lower() == "rmsprop": self.optimizer = optim.RMSprop(self.model.parameters(), lr=self.data.HP_lr, weight_decay=self.data.HP_l2) elif self.data.optimizer.lower() == "adam": self.optimizer = optim.Adam(self.model.parameters(), lr=self.data.HP_lr, weight_decay=self.data.HP_l2) else: print("Optimizer illegal: %s" % (self.data.optimizer)) exit(1)
def load_model_decode(data, name): print("Load Model from file: ", data.model_dir) model = SeqLabel(data) model.load_state_dict(torch.load(data.load_model_dir)) pytorch_total_params = sum(p.numel() for p in model.parameters()) print("Number_parameters:", pytorch_total_params) print("Decode %s data, nbest: %s ..." % (name, data.nbest)) start_time = time.time() summary = evaluate(data, model, name, True, data.nbest) pred_results_tasks = [] pred_scores_tasks = [] range_tasks = len(data.index_of_main_tasks) for idtask in range(range_tasks): speed, acc, p, r, f, pred_results, pred_scores = summary[idtask] pred_results_tasks.append(pred_results) pred_scores_tasks.append(pred_scores) end_time = time.time() time_cost = end_time - start_time if data: print( "%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (name, time_cost, speed, acc, p, r, f)) else: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f" % (name, time_cost, speed, acc)) return pred_results_tasks, pred_scores_tasks
def load_model_decode(data, name): print("Load Model from file: ", data.model_dir) model = SeqLabel(data) ## load model need consider if the model trained in GPU and load in CPU, or vice versa # if not gpu: # model.load_state_dict(torch.load(model_dir)) # # model.load_state_dict(torch.load(model_dir), map_location=lambda storage, loc: storage) # # model = torch.load(model_dir, map_location=lambda storage, loc: storage) # else: # model.load_state_dict(torch.load(model_dir)) # # model = torch.load(model_dir) model.load_state_dict(torch.load(data.load_model_dir)) print("Decode %s data, nbest: %s ..." % (name, data.nbest)) start_time = time.time() speed, acc, p, r, f, pred_results, pred_scores = evaluate( data, model, name, data.nbest) end_time = time.time() time_cost = end_time - start_time if data.seg: print( "%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (name, time_cost, speed, acc, p, r, f)) else: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f" % (name, time_cost, speed, acc)) return pred_results, pred_scores
def load_model_test(data, name): print("Load Model from file: ", data.dset_dir) model = SeqLabel(data) model.load_state_dict(torch.load(data.load_model_dir)) start_time = time.time() speed, p, r, f, pred_results, pred_scores = evaluate(data, model, name) end_time = time.time() time_cost = end_time - start_time if data.seg: print("Test: time: %.2fs, speed: %.2fst/s; [p: %.4f, r: %.4f, f: %.4f]"%(time_cost, speed, p, r, f)) else: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f"%(name, time_cost, speed, acc)) return pred_results, pred_scores
def load_model_decode(data, name): print("Load Model from file: ", data.model_dir) device = torch.device( 'cuda' if torch.cuda.is_available() and data.HP_gpu else 'cpu') if data.sentence_classification: model = SentClassifier(data).to(device) else: model = SeqLabel(data).to(device) ## compute model parameter num n_all_param = sum([p.nelement() for p in model.parameters()]) n_emb_param = sum([ p.nelement() for p in ( model.word_hidden.wordrep.word_embedding.weight, model.word_hidden.wordrep.char_feature.char_embeddings.weight) ]) print("all parameters=%s, emb parameters=%s, other parameters=%s" % (n_all_param, n_emb_param, n_all_param - n_emb_param)) model.load_state_dict(torch.load(data.load_model_dir)) print("Decode %s data, nbest: %s ..." % (name, data.nbest)) start_time = time.time() speed, acc, p, r, f, pred_results, pred_scores, pred_entity_results, pred_prob_results = evaluate( "Target", data, model, name, data.nbest) end_time = time.time() time_cost = end_time - start_time if data.seg: print( "%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (name, time_cost, speed, acc, p, r, f)) else: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f" % (name, time_cost, speed, acc)) return pred_results, pred_scores, pred_entity_results, pred_prob_results
def load_model_decode(data, name): print("Load Model from file: ", data.model_dir) if data.sentence_classification: model = SentClassifier(data) else: model = SeqLabel(data) model.load_state_dict(torch.load(data.load_model_dir)) print("Decode %s data, nbest: %s ..."%(name, data.nbest)) start_time = time.time() speed, acc, p, r, f, pred_results, pred_scores = evaluate(data, model, name, data.nbest) end_time = time.time() time_cost = end_time - start_time if data.seg: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(name, time_cost, speed, acc, p, r, f)) else: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f"%(name, time_cost, speed, acc)) return pred_results, pred_scores
def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) if data.sentence_classification: model = SentClassifier(data) else: model = SeqLabel(data) print(model) loss_function = nn.NLLLoss() if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(1) best_dev = -sys.maxsize - 1 best_dev_only_disco = -sys.maxsize - 1 current_score_disco = -sys.maxsize - 1 # data.HP_iteration = 1 ## start training if data.log_file is not None: f_log = open(data.log_file, "w") f_log.write("\t".join(["Epoch", "F-Score", "F-Score-disco"]) + "\n") f_log_last_output = open(data.log_file + ".last_output", "w") for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 sample_loss = {idtask: 0 for idtask in range(data.HP_tasks)} right_token = {idtask: 0 for idtask in range(data.HP_tasks)} whole_token = {idtask: 0 for idtask in range(data.HP_tasks)} random.shuffle(data.train_Ids) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu, False, False) instance_count += 1 loss, losses, tag_seq = model.calculate_loss(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask, inference=False) for idtask in range(data.HP_tasks): right, whole = predict_check(tag_seq[idtask], batch_label[idtask], mask) sample_loss[idtask] += losses[idtask].item() right_token[idtask] += right whole_token[idtask] += whole if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Task %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, idtask, temp_cost, sample_loss[idtask], right_token[idtask], whole_token[idtask], (right_token[idtask] + 0.) / whole_token[idtask])) if sample_loss[idtask] > 1e8 or str(sample_loss) == "nan": print "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." exit(0) sys.stdout.flush() sample_loss[idtask] = 0 if end % 500 == 0: print( "--------------------------------------------------------------------------" ) total_loss += loss.item() loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start for idtask in range(data.HP_tasks): print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss[idtask], right_token[idtask], whole_token[idtask], (right_token[idtask] + 0.) / whole_token[idtask])) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) print("totalloss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) init_eval_time = time.time() summary = evaluate(data, model, "dev", False, False) print("Evaluation time {}".format(time.time() - init_eval_time)) dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_scores = [] for idtask in range(0, data.HP_tasks): speed, acc, p, r, f, pred_labels, _ = summary[idtask] if data.seg: current_scores.append(f) print( "Task %d Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (idtask, dev_cost, speed, acc, p, r, f)) else: current_scores.append(acc) print("Task %d Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (idtask, dev_cost, speed, acc)) pred_results_tasks = [] pred_scores_tasks = [] for idtask in range(data.HP_tasks): speed, acc, p, r, f, pred_results, pred_scores = summary[idtask] pred_results_tasks.append(pred_results) pred_scores_tasks.append(pred_scores_tasks) data.decode_dir = tempfile.NamedTemporaryFile().name data.write_decoded_results(pred_results_tasks, 'dev') if data.log_file is not None: copyfile(data.decode_dir, f_log_last_output.name) if data.optimize_with_evalb: tmp_trees_file = tempfile.NamedTemporaryFile() command = [ "python", data.disco_decode_script, #"decode.py ", "--input", data.decode_dir, "--output", tmp_trees_file.name, "--disc" if data.disco_encoder is not None else "", "--split_char", data.label_split_char, "--os" if data.dummy_os else "", "--disco_encoder " + data.disco_encoder if data.disco_encoder is not None else "", "" if not data.add_leaf_unary_column else "--add_leaf_unary_column", "--path_reduced_tagset " + data.path_reduced_tagset if data.path_reduced_tagset is not None else "" ] p = subprocess.Popen(" ".join(command), stdout=subprocess.PIPE, shell=True) out, err = p.communicate() out = out.decode("utf-8") if data.disco_encoder is not None: command = [ "discodop", "eval", data.gold_dev_trees, tmp_trees_file.name, data.evalb_param_file, "--fmt", "discbracket" ] p = subprocess.Popen(" ".join(command), stdout=subprocess.PIPE, shell=True) out, err = p.communicate() out = out.decode("utf-8") current_score = float([ l for l in out.split("\n") if l.startswith("labeled f-measure:") ][0].rsplit(" ", 1)[1]) #Computing the score for discontinuous trees only command = [ "discodop", "eval", data.gold_dev_trees, tmp_trees_file.name, data.evalb_param_file, "--fmt", "discbracket", "--disconly" ] p = subprocess.Popen(" ".join(command), stdout=subprocess.PIPE, shell=True) out, err = p.communicate() out = out.decode("utf-8") current_score_disco = float([ l for l in out.split("\n") if l.startswith("labeled f-measure:") ][0].rsplit(" ", 1)[1]) else: command = [ data.evalb, tmp_trees_file.name, data.gold_dev_trees ] #For legacy with how previous models were trained if data.evalb_param_file is not None: command.extend(["-p", data.evalb_param_file]) p = subprocess.Popen(" ".join(command), stdout=subprocess.PIPE, shell=True) out, err = p.communicate() out = out.decode("utf-8") current_score = float([ l for l in out.split("\n") if l.startswith("Bracketing FMeasure") ][0].split("=")[1]) os.remove(data.decode_dir) tagging_score = sum(current_scores) / len(current_scores) print("The tagging accuracy is:", tagging_score) if not data.optimize_with_evalb: current_score = tagging_score print("The overall dev score for this epoch is: {} ".format( current_score)) print("The overall previous best dev score was: {} ".format(best_dev)) if data.disco_encoder is not None: print( "The dev score for this continuous trees in this epoch is: {}". format(current_score_disco)) print("The previous discontinuous score of the best model is: {} ". format(best_dev_only_disco)) if current_score > best_dev: model_name = data.model_dir + ".model" print("Overwriting model in", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score best_dev_only_disco = current_score_disco summary = evaluate(data, model, "test", False) test_finish = time.time() test_cost = test_finish - dev_finish for idtask in range(0, data.HP_tasks): speed, acc, p, r, f, _, _ = summary[idtask] if data.seg: # current_score = f print( "Task %d Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (idtask, test_cost, speed, acc, p, r, f)) else: # current_score = acc print("Task %d Test: time: %.2fs speed: %.2fst/s; acc: %.4f" % (idtask, test_cost, speed, acc)) if data.log_file is not None: f_log.write("{}\t{}\t{}\n".format(idx, current_score, current_score_disco)) f_log.flush() gc.collect()
def train(data): print("Training model...") device = torch.device( 'cuda' if torch.cuda.is_available() and data.HP_gpu else 'cpu') data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) if data.sentence_classification: model = SentClassifier(data).to(device) else: model = SeqLabel(data).to(device) # for name, param in model.named_parameters(): # if param.requires_grad: # print(name) ## compute model parameter num n_all_param = sum([p.nelement() for p in model.parameters()]) n_emb_param = sum([ p.nelement() for p in ( model.word_hidden.wordrep.word_embedding.weight, model.word_hidden.wordrep.char_feature.char_embeddings.weight) ]) print("all parameters=%s, emb parameters=%s, other parameters=%s" % (n_all_param, n_emb_param, n_all_param - n_emb_param)) if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(1) best_dev = -10 test_f = [] dev_f = [] best_epoch = 0 train_dataset_S = Multi_Task_Dataset(data.train_Ids_S, data.HP_batch_size) train_dataset_T = Multi_Task_Dataset(data.train_Ids_T, data.HP_batch_size) total_step = 0 target_end, source_end = False, False epoch_idx = 0 epoch_start = True # this step is the start of an epoch ## start training while epoch_idx < data.HP_iteration: if epoch_start: epoch_start = False epoch_loss = 0 epoch_start_time = time.time() print("Epoch: %s/%s" % (epoch_idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, epoch_idx, data.HP_lr_decay, data.HP_lr) model.train() model.zero_grad() if total_step % 2 == 0: domain_tag = 'Target' batch_instance, target_end = train_dataset_T.next_batch() else: domain_tag = 'Source' batch_instance, source_end = train_dataset_S.next_batch() if len(batch_instance) == 0: continue original_words_batch, batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, batch_entity, mask = \ batchify_with_label(batch_instance, data.HP_gpu, True, data.sentence_classification) loss, entity_loss, atten_probs_loss = model.calculate_loss( original_words_batch, domain_tag, batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, batch_entity, mask) rate = data.HP_target_loss_rate if domain_tag == "Target" else 1.0 # 2:1 for twitter 1.6:1 for bionlp 1.5:1 for broad twitter loss_ = rate * loss + entity_loss + atten_probs_loss epoch_loss += loss_.item() loss_.backward() optimizer.step() model.zero_grad() total_step += 1 ## evaluation if target_end: epoch_finish_time = time.time() epoch_cost = epoch_finish_time - epoch_start_time print("Epoch: %s training finished. Time: %.2fs" % (epoch_idx, epoch_cost)) print("totalloss:", epoch_loss) if epoch_loss > 1e8 or str(epoch_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) continue ## decode Target dev speed, acc, p, r, f, _, _ = evaluate("Target", data, model, "dev") dev_finish_time = time.time() dev_cost = dev_finish_time - epoch_finish_time if data.seg: current_score = f print( "Dev (Target): time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev (Target): time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) dev_f.append(current_score) if current_score > best_dev: best_epoch = epoch_idx if data.seg: print("Exceed previous best f score:", best_dev) else: print("Exceed previous best acc score:", best_dev) model_name = data.model_dir + ".model" print("Save current best model in file:", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score ## decode Target test speed, acc, p, r, f, _, _ = evaluate("Target", data, model, "test") test_finish_time = time.time() test_cost = test_finish_time - dev_finish_time if data.seg: print( "Test (Target): time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) test_f.append(f) else: print( "Test (Target): time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) test_f.append(acc) gc.collect() print("The best f in epoch%s, dev:%.4f, test:%.4f" % (best_epoch, dev_f[best_epoch], test_f[best_epoch])) ## epoch end set epoch_start = True target_end = False epoch_idx += 1 if source_end: epoch_finish_time = time.time() ## decode test Source speed, acc, p, r, f, _, _ = evaluate("Source", data, model, "test") test_finish = time.time() test_cost = test_finish - epoch_finish_time if data.seg: print( "Test (Source): time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) else: print( "Test (Source): time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) source_end = False
def train(data): save_data_name = data.model_dir + ".dset" data.save(save_data_name) model = SeqLabel(data) # 加载预训练 print('loading model %s' % model_path) model.load_state_dict(torch.load(model_path, map_location=map_location)) print('data.seg:', data.seg) optimizer = '' if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) best_dev = -10 print('data.HP_gpu:', data.HP_gpu) for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 # 每500个batch清零 total_loss = 0 # 一个epoch里的完整loss right_token = 0 whole_token = 0 # print("Before Shuffle: first input word list:", data.train_Ids[0][0]) random.shuffle(data.train_Ids) print("Shuffle: first input word list:", data.train_Ids[0][0]) model.train() model.zero_grad() batch_size = data.HP_batch_size # batch_id = 0 train_num = len(data.train_Ids) print('train_num:', train_num) # 训练样本的数量 total_batch = train_num // batch_size + 1 print('total_batch:', total_batch) for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, \ batch_label, mask = batchify_with_label(instance, data.HP_gpu, True, data.sentence_classification) instance_count += 1 loss, tag_seq = model.calculate_loss(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check( tag_seq, batch_label, mask, data.sentence_classification) # pred与gold的校验 right_token += right whole_token += whole sample_loss += loss.item() total_loss += loss.item() if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) sys.stdout.flush() sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) print("total_loss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) speed, acc, p, r, f, _, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_score = f print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) if current_score > best_dev: print("Exceed previous best f score:", best_dev) model_name = data.model_dir + '.' + str(idx) + ".model" print("Save current best torch_model in file:", model_name) # 保存当前epoch结束的模型 torch.save(model.state_dict(), model_name) best_dev = current_score # 每50轮保存一下 if idx % 50 == 0: model_name = data.model_dir + '.' + str(idx) + ".model" print('Save every 50 epoch in file: %s' % model_name) torch.save(model.state_dict(), model_name) speed, acc, p, r, f, _, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) # 对自己add对样本做一下evaluate: speed, acc, p, r, f, _, _ = evaluate(data, model, "raw") raw_finish = time.time() raw_cost = raw_finish - test_finish print( "Raw: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (raw_cost, speed, acc, p, r, f)) gc.collect()
model_name = data.model_dir + '.' + str(idx) + ".model" print('Save every 50 epoch in file: %s' % model_name) torch.save(model.state_dict(), model_name) speed, acc, p, r, f, _, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) # 对自己add对样本做一下evaluate: speed, acc, p, r, f, _, _ = evaluate(data, model, "raw") raw_finish = time.time() raw_cost = raw_finish - test_finish print( "Raw: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (raw_cost, speed, acc, p, r, f)) gc.collect() if __name__ == '__main__': # train(data) model = SeqLabel(data) model.load_state_dict( torch.load('transfer_model/transfer.1.model', map_location=map_location)) speed, acc, p, r, f, _, _ = evaluate(data, model, "raw") raw_finish = time.time()
def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir +".dset" data.save(save_data_name) if data.sentence_classification: model = SentClassifier(data) else: model = SeqLabel(data) print (model) # loss_function = nn.NLLLoss() if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum,weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s"%(data.optimizer)) exit(1) best_dev = -10 # data.HP_iteration = 1 ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" %(idx,data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 train_data = list(zip(data.train_Ids, data.train_texts)) random.shuffle(train_data) data.train_Ids, data.train_texts = zip(*train_data) model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num//batch_size+1 for batch_id in range(total_batch): start = batch_id*batch_size end = (batch_id+1)*batch_size if end >train_num: end = train_num instance = data.train_Ids[start:end] instance_texts = data.train_texts[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, batch_word_text = batchify_with_label(instance, instance_texts , data.HP_gpu, True, data.sentence_classification) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask, batch_word_text) right, whole = predict_check(tag_seq, batch_label, mask, data.sentence_classification) right_token += right whole_token += whole # print("loss:",loss.item()) sample_loss += loss.item() total_loss += loss.item() if end%500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") # exit(1) sys.stdout.flush() sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s"%(idx, epoch_cost, train_num/epoch_cost, total_loss)) print("totalloss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") #exit(1) # continue speed, acc, p, r, f, pred_results, pred_scores = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if data.optimize_with_evalb: with tempfile.NamedTemporaryFile("w",delete=False) as f_decode: if data.decode_dir is None: data.decode_dir = f_decode.name decoded_st_dir = f_decode.name data.write_decoded_results(pred_results, 'dev') command = ["PYTHONPATH="+data.tree2labels,"python", data.evaluate," --input ",decoded_st_dir," --gold ",data.gold_dev_trees," --evalb ",data.evalb,">",f_decode.name+".out"] os.system(" ".join(command)) f_decode = open(f_decode.name+".out","r") current_score = float([l for l in f_decode.read().split("\n") if l.startswith("Bracketing FMeasure")][0].split("=")[1]) print ("Current Score (from EVALB)", current_score, "Previous best dev (from EVALB)", best_dev) elif data.optimize_with_las: with tempfile.NamedTemporaryFile("w",delete=False) as f_decode: if data.decode_dir is None: data.decode_dir = f_decode.name decoded_st_dir = f_decode.name data.write_decoded_results(pred_results, 'dev') #Transforming the output file into a CoNLL file command = [#"PYTHONPATH="+abspath(join(dirname(__file__), data.dep2labels)), "python", data.dep2labels+os.sep+"decode_output_file.py", "--input", decoded_st_dir, "--output", f_decode.name+".out" ] p = Popen(" ".join(command),stdout=subprocess.PIPE, shell=True) out, err = p.communicate() command = ["python", data.conll_ud, f_decode.name+".out", data.gold_dev_trees]#,">",f_decode.name+".out"] p = Popen(" ".join(command),stdout=subprocess.PIPE, shell=True) out, err = p.communicate() out = out.decode("utf-8") current_score = float(out.strip().split(":")[1]) print ("Current Score (from conll_ud)", current_score, "Previous best dev (from conll_ud)", best_dev) else: if data.seg: current_score = f print("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f"%(dev_cost, speed, acc)) if current_score > best_dev: if data.seg: print("Exceed previous best f score:", best_dev) else: print("Exceed previous best acc score:", best_dev) model_name = data.model_dir + ".model" #model_name = data.model_dir +'.'+ str(idx) + ".model" print("Save current best model in file:", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score # ## decode test speed, acc, p, r, f, _,_ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if data.seg: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(test_cost, speed, acc, p, r, f)) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc)) gc.collect()
def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir +".dset" data.save(save_data_name) if data.sentence_classification: model = SentClassifier(data) else: model = SeqLabel(data) optimizer = Optimizer('sgd', 'adam', model, 'gcn', lr=data.HP_lr, lr_gcn=data.HP_lr_gcn, momentum=data.HP_momentum, lr_decay=data.HP_lr_decay) best_dev = -10 for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" %(idx,data.HP_iteration)) instance_count = 0 sample_id = 0 sample_loss = 0 sample_loss_flat = 0 sample_loss_graph = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num//batch_size+1 for batch_id in range(total_batch): start = batch_id*batch_size end = (batch_id+1)*batch_size if end >train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, ans_matrix, wgt_matrix = batchify_with_label(data, instance, data.HP_gpu, True, data.sentence_classification) instance_count += 1 loss_flat, loss_graph, loss, tag_seq = model.calculate_loss(idx, batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask, ans_matrix, wgt_matrix) right, whole = predict_check(tag_seq, batch_label, mask) right_token += right whole_token += whole sample_loss += loss.item() sample_loss_flat += loss_flat.item() sample_loss_graph += loss_graph.item() total_loss += loss.item() if end%500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print(" Instance: %s; Time: %.2fs; loss_flat: %.4f; loss_graph: %.4f; loss: %.4f; acc: %.4f"%(end, temp_cost, sample_loss_flat, sample_loss_graph, sample_loss, (right_token+0.)/whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") exit(1) sys.stdout.flush() sample_loss = 0 sample_loss_flat = 0 sample_loss_graph = 0 loss.backward() if data.HP_clip is not None: torch.nn.utils.clip_grad_norm_(model.parameters(), data.HP_clip) optimizer.step() model.zero_grad() optimizer.update(idx+1, batch_id+1, total_batch) temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss_flat: %.4f; loss_graph: %.4f; loss: %.4f; acc: %.4f"%(end, temp_cost, sample_loss_flat, sample_loss_graph, sample_loss, (right_token+0.)/whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s"%(idx, epoch_cost, train_num/epoch_cost, total_loss)) print("totalloss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") exit(1) # continue speed, p, r, f, _,_ = evaluate(data, model, "dev", idx) dev_finish = time.time() dev_cost = dev_finish - epoch_finish if data.seg: current_score = f print("Test: time: %.2fs, speed: %.2fst/s; [p: %.4f, r: %.4f, f: %.4f]"%(dev_cost, speed, p, r, f)) if current_score > best_dev: if data.seg: print("Exceed previous best f score:", best_dev) else: print("Exceed previous best acc score:", best_dev) model_name = data.model_dir +'.'+ str(idx) + ".model" print("Save current best model in file:", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score # ## decode test speed, p, r, f, _,_ = evaluate(data, model, "test", idx) test_finish = time.time() test_cost = test_finish - dev_finish if data.seg: print("Test: time: %.2fs, speed: %.2fst/s; [p: %.4f, r: %.4f, f: %.4f]"%(test_cost, speed, p, r, f)) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc)) gc.collect()
def train(): total_batch = 0 # model = CnnLstmCrf(config) model = SeqLabel(data) optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=config.momentum, weight_decay=config.l2) if gpu: model = model.cuda() best_dev = -10 for idx in range(config.epoch): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, config.epoch)) optimizer = lr_decay(optimizer, idx, config.lr_decay, config.lr) instance_count = 0 sample_id = 0 sample_loss = 0 # 每500个batch清零 total_loss = 0 # 一个epoch里的完整loss right_token = 0 # 一个epoch里预测正确的token数量 whole_token = 0 random.shuffle(data.train_ids) print("Shuffle: first input word list:", data.train_ids[0][1]) model.train() model.zero_grad() batch_size = config.batch_size train_num = len(data.train_ids) print('batch_size:', batch_size, 'train_num:', train_num) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_ids[start:end] # [char,word,feat,label] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, \ batch_label, mask = batchify_sequence_labeling_with_label(instance, gpu, if_train=True) # loss, tag_seq = model(batch_char, batch_word, batch_features, mask, batch_charrecover, batch_wordlen, batch_label) loss, tag_seq = model.calculate_loss( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask) right_token += right whole_token += whole # print('right_token/whole_token:', right_token/whole_token) sample_loss += loss.item() total_loss += loss.item() if end % 6400 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % ( end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") exit(1) sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % ( end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % ( idx, epoch_cost, train_num / epoch_cost, total_loss)) if total_loss > 1e8 or str(total_loss) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") exit(1) logger.info("Epoch: %s, Total loss: %s" % (idx, total_loss)) speed, acc, p, r, f, _, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_score = f print("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) logger.info( "Epoch: %s, Loss: %s, Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % ( idx, total_loss, dev_cost, speed, acc, p, r, f)) if current_score > best_dev: model_name = config.model_path + '.' + str(idx) + '.model' torch.save(model.state_dict(), model_name) best_dev = current_score # logger.info("data:dev, epoch:%s, f1:%s, precision:%s, recall:%s" % (idx, current_score, p, r)) speed, acc, p, r, f, _, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish logger.info("Epoch: %s, Loss: %s, Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % ( idx, total_loss, test_cost, speed, acc, p, r, f))
class NCRF: def __init__(self): # print("Python Version: %s.%s"%(sys.version_info[0],sys.version_info[1])) # print("PyTorch Version:%s"%(torch.__version__)) # print("Process ID: ", os.getpid()) self.data = Data() self.data.HP_gpu = torch.cuda.is_available() if self.data.HP_gpu: self.data.device = 'cuda' # print("GPU:", self.data.HP_gpu, "; device:", self.data.device) self.optimizer = None self.model = None def read_data_config_file(self, config_dir): self.data.read_config(config_dir) def manual_data_setting(self, setting_dict): ## set data through manual dict, all value should be in string format. self.data.manual_config(setting_dict) def initialize_model_and_optimizer(self): if self.data.sentence_classification: self.model = SentClassifier(self.data) else: self.model = SeqLabel(self.data) if self.data.optimizer.lower() == "sgd": self.optimizer = optim.SGD(self.model.parameters(), lr=self.data.HP_lr, momentum=self.data.HP_momentum, weight_decay=self.data.HP_l2) elif self.data.optimizer.lower() == "adagrad": self.optimizer = optim.Adagrad(self.model.parameters(), lr=self.data.HP_lr, weight_decay=self.data.HP_l2) elif self.data.optimizer.lower() == "adadelta": self.optimizer = optim.Adadelta(self.model.parameters(), lr=self.data.HP_lr, weight_decay=self.data.HP_l2) elif self.data.optimizer.lower() == "rmsprop": self.optimizer = optim.RMSprop(self.model.parameters(), lr=self.data.HP_lr, weight_decay=self.data.HP_l2) elif self.data.optimizer.lower() == "adam": self.optimizer = optim.Adam(self.model.parameters(), lr=self.data.HP_lr, weight_decay=self.data.HP_l2) else: print("Optimizer illegal: %s" % (self.data.optimizer)) exit(1) def initialize_data(self, input_list=None): self.data.initial_alphabets(input_list) if self.data.use_word_emb and self.data.use_word_seq: self.data.build_pretrain_emb() def initialization(self, input_list=None): ## must initialize data before initialize model and optimizer, as alphabet size and pretrain emb matters self.num_ = ''' input_list: [train_list, dev_list, test_list] train_list/dev_list/test_list: [sent_list, label_list, feature_list] sent_list: list of list [[word1, word2,...],...,[wordx, wordy]...] label_list: if sentence_classification: list of labels [label1, label2,...labelx, labely,...] else: list of list [[label1, label2,...],...,[labelx, labely,...]] feature_list: if sentence_classification: list of labels [[feat1, feat2,..],...,[feat1, feat2,..]], len(feature_list)= sentence_num else: list of list [[[feat1, feat2,..],...,[feat1, feat2,..]],...,[[feat1, feat2,..],...,[feat1, feat2,..]]], , len(feature_list)= sentence_num ''' self.initialize_data(input_list) self.and_optimizer = self.initialize_model_and_optimizer() def self_generate_instances(self): self.data.generate_instance('train') self.data.generate_instance('dev') self.data.generate_instance('test') def generate_instances_from_list(self, input_list, name): return self.data.generate_instance_from_list(input_list, name) def save(self, model_dir="ncrf.model"): # print("Save model to file: ", model_dir) the_dict = { 'data': self.data, 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict() } torch.save(the_dict, model_dir) def load(self, model_dir="ncrf.model"): the_dict = torch.load(model_dir) self.data = the_dict['data'] self.data.silence = True ## initialize the model and optimizer befor load state dict self.initialize_model_and_optimizer() self.model.load_state_dict(the_dict['state_dict']) self.optimizer.load_state_dict(the_dict['optimizer']) # print("Model loaded from file: ", model_dir) def train(self, train_Ids=None, save_model_dir=None): ''' train_Ids: list of words, chars and labels, various length. [[words, features, chars, labels],[words, features, chars,labels],...] words: word ids for one sentence. (batch_size, sent_len) features: features ids for one sentence. (batch_size, sent_len, feature_num) chars: char ids for on sentences, various length. (batch_size, sent_len, each_word_length) labels: label ids for one sentence. (batch_size, sent_len) save_model_dir: model name to be saved ''' if train_Ids: self.data.train_Ids = train_Ids # print(self.data.train_Ids[0]) print('-----begin train------') # exit(0) best_dev = -10 best_model = None for idx in range(self.data.HP_iteration): epoch_start = time.time() temp_start = epoch_start # print("Epoch: %s/%s" %(idx,self.data.HP_iteration)) if self.data.optimizer == "SGD": self.optimizer = lr_decay(self.optimizer, idx, self.data.HP_lr_decay, self.data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(self.data.train_Ids) first_list = ", ".join([ self.data.word_alphabet.get_instance(a) for a in self.data.train_Ids[0][0] ]) # print("Shuffle: first input: [%s]" %(first_list)) ## set model in train model self.model.train() batch_size = self.data.HP_batch_size batch_id = 0 train_num = len(self.data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): self.optimizer.zero_grad() start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = self.data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_word_text, batch_label, mask = batchify_with_label( instance, self.data.HP_gpu, True, self.data.sentence_classification) instance_count += 1 loss, tag_seq = self.model.calculate_loss( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_word_text, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask, self.data.sentence_classification) right_token += right whole_token += whole # print("loss:",loss.item()) sample_loss += loss.item() total_loss += loss.item() if end % 300000 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) sys.stdout.flush() sample_loss = 0 loss.backward() self.optimizer.step() temp_time = time.time() temp_cost = temp_time - temp_start # print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) # print("totalloss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) # continue speed, f = evaluate(self.data, self.model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_score = f print("Dev: time: %.2fs, speed: %.2fst/s; f: %.4f" % (dev_cost, speed, f)) if current_score > best_dev: # if self.data.seg: print("Exceed previous best f score:", best_dev) _, f = evaluate(self.data, self.model, "test") print("Test: f: %.4f" % (f)) # _ , f = evaluate(self.data, self.model, "test") # if self.data.seg: # print("Test: f: %.4f"%(f)) # else: # print("Exceed previous best f score:", best_dev) # if save_model_dir == None: # model_name = self.data.model_dir + ".model" # else: # model_name = save_model_dir + ".model" # self.save(model_name) # torch.save(model.state_dict(), model_name) best_dev = current_score # best_model = model_name ## decode test # else: # print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc)) gc.collect() # if best_model != None: # self.load(best_model) # _ , f = evaluate(self.data, self.model, "test") # print("Test: f: %.4f"%(f)) # def evaluate(self): def decode(self, raw_Ids): ''' raw_Ids: list of words, chars and labels, various length. [[words, features, chars, labels],[words, features, chars,labels],...] words: word ids for one sentence. (batch_size, sent_len) features: features ids for one sentence. (batch_size, sent_len, feature_num) chars: char ids for on sentences, various length. (batch_size, sent_len, each_word_length) labels: label ids for one sentence. (batch_size, sent_len) ## label should be padded in raw input ''' instances = raw_Ids ## set model in eval model self.model.eval() batch_size = self.data.HP_batch_size instance_num = len(instances) total_batch = instance_num // batch_size + 1 decode_label = [] for batch_id in tqdm(range(total_batch)): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > instance_num: end = instance_num instance = instances[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_word_text, batch_label, mask = batchify_with_label( instance, self.data.HP_gpu, False, self.data.sentence_classification) tag_seq = self.model(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_word_text, None, mask) tag_seq = tag_seq[batch_wordrecover.cpu()] decode_label += tag_seq.cpu().data.numpy().tolist() return decode_label def decode_prob(self, raw_Ids): ''' raw_Ids: list of words, chars and labels, various length. [[words, features, chars, labels],[words, features, chars,labels],...] words: word ids for one sentence. (batch_size, sent_len) features: features ids for one sentence. (batch_size, sent_len, feature_num) chars: char ids for on sentences, various length. (batch_size, sent_len, each_word_length) labels: label ids for one sentence. (batch_size, sent_len) ## label should be padded in raw input ''' if not self.data.sentence_classification: print( "decode probability is only valid in sentence classification task. Exit." ) exit(0) instances = raw_Ids target_probability_list = [] target_result_list = [] ## set model in eval model self.model.eval() batch_size = self.data.HP_batch_size instance_num = len(instances) total_batch = instance_num // batch_size + 1 for batch_id in tqdm(range(total_batch)): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > instance_num: end = instance_num instance = instances[start:end] if start % 10000 == 0: print("Decode: ", start) if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_word_text, batch_label, mask = batchify_with_label( instance, self.data.HP_gpu, False, self.data.sentence_classification) target_probability, _ = self.model.get_target_probability( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_word_text, None, mask) target_probability = target_probability[batch_wordrecover.cpu()] target_probability_list.append(target_probability) target_probabilities = np.concatenate(target_probability_list, axis=0) return target_probabilities def decode_prob_and_attention_weights(self, raw_Ids): ''' raw_Ids: list of words, chars and labels, various length. [[words, features, chars, labels],[words, features, chars,labels],...] words: word ids for one sentence. (batch_size, sent_len) features: features ids for one sentence. (batch_size, sent_len, feature_num) chars: char ids for on sentences, various length. (batch_size, sent_len, each_word_length) labels: label ids for one sentence. (batch_size, sent_len) ## label should be padded in raw input ''' if not self.data.sentence_classification: print( "decode probability is only valid in sentence classification task. Exit." ) exit(0) if self.data.words2sent_representation.upper( ) != "ATTENTION" and self.data.words2sent_representation.upper( ) != "ATT": print( "attention weights are only valid in attention model. Current: %s, Exit." % (self.data.words2sent_representation)) exit(0) instances = raw_Ids target_probability_list = [] sequence_attention_weight_list = [] ## set model in eval model self.model.eval() batch_size = self.data.HP_batch_size instance_num = len(instances) total_batch = instance_num // batch_size + 1 for batch_id in tqdm(range(total_batch)): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > instance_num: end = instance_num instance = instances[start:end] if start % 10000 == 0: print("Decode: ", start) if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_word_text, batch_label, mask = batchify_with_label( instance, self.data.HP_gpu, False, self.data.sentence_classification) target_probability, weights = self.model.get_target_probability( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_word_text, None, mask) ## target_probability, weights are both numpy target_probability = target_probability[batch_wordrecover.cpu()] weights = weights[batch_wordrecover.cpu()] target_probability_list.append(target_probability) sequence_attention_weight_list += weights.tolist() target_probabilities = np.concatenate(target_probability_list, axis=0) print(len(sequence_attention_weight_list)) ## sequence_attention_weight_list: list with different batch size and many padded 0 return target_probabilities, sequence_attention_weight_list
def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) if data.sentence_classification: model = SentClassifier(data) else: model = SeqLabel(data) loss_function = nn.NLLLoss() if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(1) best_dev = -10 best_dev_uas = -10 # data.HP_iteration = 1 ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 sample_loss = {idtask: 0 for idtask in range(data.HP_tasks)} right_token = {idtask: 0 for idtask in range(data.HP_tasks)} whole_token = {idtask: 0 for idtask in range(data.HP_tasks)} random.shuffle(data.train_Ids) # print("Shuffle: first input word list:", data.train_Ids[0][0]) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu, False, False) instance_count += 1 loss, losses, tag_seq = model.calculate_loss(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask, inference=False) for idtask in range(data.HP_tasks): right, whole = predict_check(tag_seq[idtask], batch_label[idtask], mask) sample_loss[idtask] += losses[idtask].item() right_token[idtask] += right whole_token[idtask] += whole if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Task %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, idtask, temp_cost, sample_loss[idtask], right_token[idtask], whole_token[idtask], (right_token[idtask] + 0.) / whole_token[idtask])) if sample_loss[idtask] > 1e8 or str(sample_loss) == "nan": print "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." exit(0) sys.stdout.flush() sample_loss[idtask] = 0 if end % 500 == 0: print( "--------------------------------------------------------------------------" ) total_loss += loss.item() loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start for idtask in range(data.HP_tasks): print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss[idtask], right_token[idtask], whole_token[idtask], (right_token[idtask] + 0.) / whole_token[idtask])) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) print("totalloss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) summary = evaluate(data, model, "dev", False, False) dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_scores = [] for idtask in range(0, data.HP_tasks): speed, acc, p, r, f, pred_labels, _ = summary[idtask] if data.seg: current_scores.append(f) print( "Task %d Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (idtask, dev_cost, speed, acc, p, r, f)) else: current_scores.append(acc) print("Task %d Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (idtask, dev_cost, speed, acc)) pred_results_tasks = [] pred_scores_tasks = [] for idtask in range(data.HP_tasks): speed, acc, p, r, f, pred_results, pred_scores = summary[idtask] pred_results_tasks.append(pred_results) pred_scores_tasks.append(pred_scores_tasks) with tempfile.NamedTemporaryFile() as f_decode_mt: with tempfile.NamedTemporaryFile() as f_decode_st: # If we are learning multiple task we move it as a sequence # labeling if len(data.index_of_main_tasks) > 1: data.decode_dir = f_decode_mt.name data.write_decoded_results(pred_results_tasks, 'dev') else: if data.decode_dir is None: data.decode_dir = f_decode_st.name data.write_decoded_results(pred_results_tasks, 'dev') output_nn = open(data.decode_dir, encoding='utf-8') tmp = tempfile.NamedTemporaryFile().name decode_dependencies.decode(output_nn, tmp) current_score, current_uas = decode_dependencies.evaluate_dependencies( data.gold_dev_dep, tmp) print("Current Score (from LAS)", current_score) print("Current Score (from UAS)", current_uas) if current_score > best_dev: if data.seg: print("Exceed previous best f score:", best_dev) else: print("Exceed previous best acc score (from LAS):", best_dev) model_name = data.model_dir + ".model" # print ("Overwritting model to", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score else: print("sofar the best (from LAS)" + repr(best_dev)) if current_uas > best_dev_uas: if data.seg: print("Exceed previous best f score:", best_dev_uas) else: print("Exceed previous best acc score (from UAS):", best_dev_uas) best_dev_uas = current_uas else: print("sofar the best (from UAS)" + repr(best_dev_uas)) summary = evaluate(data, model, "test", False) test_finish = time.time() test_cost = test_finish - dev_finish for idtask in range(0, data.HP_tasks): speed, acc, p, r, f, _, _ = summary[idtask] if data.seg: current_score = f print( "Task %d Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (idtask, test_cost, speed, acc, p, r, f)) else: current_score = acc print("Task %d Test: time: %.2fs speed: %.2fst/s; acc: %.4f" % (idtask, test_cost, speed, acc)) gc.collect()
def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) if data.sentence_classification: model = SentClassifier(data) else: model = SeqLabel(data) # loss_function = nn.NLLLoss() if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(1) best_dev = -10 # data.HP_iteration = 1 ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) print("Shuffle: first input word list:", data.train_Ids[0][0]) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu, True, data.sentence_classification) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask, data.sentence_classification) right_token += right whole_token += whole # print("loss:",loss.item()) sample_loss += loss.item() total_loss += loss.item() if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) sys.stdout.flush() sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) print("totalloss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) # continue speed, acc, p, r, f, _, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if data.seg: current_score = f print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) if current_score > best_dev: if data.seg: print("Exceed previous best f score:", best_dev) else: print("Exceed previous best acc score:", best_dev) model_name = data.model_dir + '.' + str(idx) + ".model" print("Save current best model in file:", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score # ## decode test speed, acc, p, r, f, _, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if data.seg: print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) gc.collect()
def load_ncrf_model(data): model = SeqLabel(data) print('loading model:', data.load_model_dir) model.load_state_dict( torch.load(data.load_model_dir, map_location=torch.device('cpu'))) return model