def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) if data.sentence_classification: model = SentClassifier(data) else: model = SeqLabel(data) # loss_function = nn.NLLLoss() if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(1) best_dev = -10 best_model = None dev_info = {'acc': [], 'p': [], 'f': [], 'r': []} test_info = {'acc': [], 'p': [], 'f': [], 'r': []} # data.HP_iteration = 1 ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) print("Shuffle: first input word list:", data.train_Ids[0][0]) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu, True, data.sentence_classification) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask, data.sentence_classification) right_token += right whole_token += whole # print("loss:",loss.item()) sample_loss += loss.item() total_loss += loss.item() if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) sys.stdout.flush() sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) print("totalloss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) # continue speed, acc, p, r, f, _, _ = evaluate(data, model, "dev") dev_info['acc'].append(acc) dev_info['p'].append(p) dev_info['r'].append(r) dev_info['f'].append(f) dev_finish = time.time() dev_cost = dev_finish - epoch_finish if data.seg: current_score = f print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) if current_score > best_dev: if data.seg: print("Exceed previous best f score:", best_dev) else: print("Exceed previous best acc score:", best_dev) model_name = data.model_dir + '.' + str(idx) + ".model" best_model_dict = deepcopy(model.state_dict()) best_dev = current_score # ## decode test if idx % 10 == 0: evaluate_and_print(data, model, 'test') evaluate_and_print(data, model, 'test_augment') evaluate_and_print(data, model, 'test_harder') gc.collect() # add test for best model print("======BEST MODEL TEST======") print("Save current best model in file:", model_name) torch.save(best_model_dict, model_name) model.load_state_dict(best_model_dict) acc, p, r, f = evaluate_and_print_return(data, model, 'test') acc_a, p_a, r_a, f_a = evaluate_and_print_return(data, model, 'test_augment') acc_h, p_h, r_h, f_h = evaluate_and_print_return(data, model, 'test_harder') print("======BEST DEV=======: {}".format(best_dev)) return best_dev, acc, p, r, f, acc_a, p_a, r_a, f_a, acc_h, p_h, r_h, f_h, dev_info
def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) if data.output_tsv_path: # Use line buffering output_tsv = open(data.output_tsv_path, "w", buffering=1) print("\t".join(TSV_HEADER), file=output_tsv) else: output_tsv = None if data.sentence_classification: model = SentClassifier(data) else: model = SeqLabel(data) if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(1) best_dev = -10 # data.HP_iteration = 1 ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) print("Shuffle: first input word list:", data.train_Ids[0][0]) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu, True, data.sentence_classification) instance_count += 1 loss, tag_seq = model.calculate_loss(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask, data.sentence_classification) right_token += right whole_token += whole # print("loss:",loss.item()) sample_loss += loss.item() total_loss += loss.item() if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) sys.stdout.flush() sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) print("totalloss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) # continue speed, (acc, p, r, f), (internal_acc, internal_p, internal_r, internal_f), _, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if data.seg: current_score = f print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) if current_score > best_dev: if data.seg: print("Exceed previous best f score:", best_dev) else: print("Exceed previous best acc score:", best_dev) model_name = data.model_dir + ".best.model" print("Save current best model in file:", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score if output_tsv: print("\t".join( str(item) for item in [ data.tagScheme, data.random_seed, idx + 1, "Dev", total_loss, acc, p, r, f, internal_acc, internal_p, internal_r, internal_f ]), file=output_tsv) # ## decode test speed, (acc, p, r, f), (internal_acc, internal_p, internal_r, internal_f), _, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if data.seg: print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) if output_tsv: print("\t".join( str(item) for item in [ data.tagScheme, data.random_seed, idx + 1, "Test", total_loss, acc, p, r, f, internal_acc, internal_p, internal_r, internal_f ]), file=output_tsv) gc.collect() if output_tsv: output_tsv.close()
def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) if data.sentence_classification: model = SentClassifier(data) else: model = SeqLabel(data) print(model) loss_function = nn.NLLLoss() if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(1) best_dev = -sys.maxsize - 1 best_dev_only_disco = -sys.maxsize - 1 current_score_disco = -sys.maxsize - 1 # data.HP_iteration = 1 ## start training if data.log_file is not None: f_log = open(data.log_file, "w") f_log.write("\t".join(["Epoch", "F-Score", "F-Score-disco"]) + "\n") f_log_last_output = open(data.log_file + ".last_output", "w") for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 sample_loss = {idtask: 0 for idtask in range(data.HP_tasks)} right_token = {idtask: 0 for idtask in range(data.HP_tasks)} whole_token = {idtask: 0 for idtask in range(data.HP_tasks)} random.shuffle(data.train_Ids) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu, False, False) instance_count += 1 loss, losses, tag_seq = model.calculate_loss(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask, inference=False) for idtask in range(data.HP_tasks): right, whole = predict_check(tag_seq[idtask], batch_label[idtask], mask) sample_loss[idtask] += losses[idtask].item() right_token[idtask] += right whole_token[idtask] += whole if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Task %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, idtask, temp_cost, sample_loss[idtask], right_token[idtask], whole_token[idtask], (right_token[idtask] + 0.) / whole_token[idtask])) if sample_loss[idtask] > 1e8 or str(sample_loss) == "nan": print "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." exit(0) sys.stdout.flush() sample_loss[idtask] = 0 if end % 500 == 0: print( "--------------------------------------------------------------------------" ) total_loss += loss.item() loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start for idtask in range(data.HP_tasks): print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss[idtask], right_token[idtask], whole_token[idtask], (right_token[idtask] + 0.) / whole_token[idtask])) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) print("totalloss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) init_eval_time = time.time() summary = evaluate(data, model, "dev", False, False) print("Evaluation time {}".format(time.time() - init_eval_time)) dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_scores = [] for idtask in range(0, data.HP_tasks): speed, acc, p, r, f, pred_labels, _ = summary[idtask] if data.seg: current_scores.append(f) print( "Task %d Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (idtask, dev_cost, speed, acc, p, r, f)) else: current_scores.append(acc) print("Task %d Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (idtask, dev_cost, speed, acc)) pred_results_tasks = [] pred_scores_tasks = [] for idtask in range(data.HP_tasks): speed, acc, p, r, f, pred_results, pred_scores = summary[idtask] pred_results_tasks.append(pred_results) pred_scores_tasks.append(pred_scores_tasks) data.decode_dir = tempfile.NamedTemporaryFile().name data.write_decoded_results(pred_results_tasks, 'dev') if data.log_file is not None: copyfile(data.decode_dir, f_log_last_output.name) if data.optimize_with_evalb: tmp_trees_file = tempfile.NamedTemporaryFile() command = [ "python", data.disco_decode_script, #"decode.py ", "--input", data.decode_dir, "--output", tmp_trees_file.name, "--disc" if data.disco_encoder is not None else "", "--split_char", data.label_split_char, "--os" if data.dummy_os else "", "--disco_encoder " + data.disco_encoder if data.disco_encoder is not None else "", "" if not data.add_leaf_unary_column else "--add_leaf_unary_column", "--path_reduced_tagset " + data.path_reduced_tagset if data.path_reduced_tagset is not None else "" ] p = subprocess.Popen(" ".join(command), stdout=subprocess.PIPE, shell=True) out, err = p.communicate() out = out.decode("utf-8") if data.disco_encoder is not None: command = [ "discodop", "eval", data.gold_dev_trees, tmp_trees_file.name, data.evalb_param_file, "--fmt", "discbracket" ] p = subprocess.Popen(" ".join(command), stdout=subprocess.PIPE, shell=True) out, err = p.communicate() out = out.decode("utf-8") current_score = float([ l for l in out.split("\n") if l.startswith("labeled f-measure:") ][0].rsplit(" ", 1)[1]) #Computing the score for discontinuous trees only command = [ "discodop", "eval", data.gold_dev_trees, tmp_trees_file.name, data.evalb_param_file, "--fmt", "discbracket", "--disconly" ] p = subprocess.Popen(" ".join(command), stdout=subprocess.PIPE, shell=True) out, err = p.communicate() out = out.decode("utf-8") current_score_disco = float([ l for l in out.split("\n") if l.startswith("labeled f-measure:") ][0].rsplit(" ", 1)[1]) else: command = [ data.evalb, tmp_trees_file.name, data.gold_dev_trees ] #For legacy with how previous models were trained if data.evalb_param_file is not None: command.extend(["-p", data.evalb_param_file]) p = subprocess.Popen(" ".join(command), stdout=subprocess.PIPE, shell=True) out, err = p.communicate() out = out.decode("utf-8") current_score = float([ l for l in out.split("\n") if l.startswith("Bracketing FMeasure") ][0].split("=")[1]) os.remove(data.decode_dir) tagging_score = sum(current_scores) / len(current_scores) print("The tagging accuracy is:", tagging_score) if not data.optimize_with_evalb: current_score = tagging_score print("The overall dev score for this epoch is: {} ".format( current_score)) print("The overall previous best dev score was: {} ".format(best_dev)) if data.disco_encoder is not None: print( "The dev score for this continuous trees in this epoch is: {}". format(current_score_disco)) print("The previous discontinuous score of the best model is: {} ". format(best_dev_only_disco)) if current_score > best_dev: model_name = data.model_dir + ".model" print("Overwriting model in", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score best_dev_only_disco = current_score_disco summary = evaluate(data, model, "test", False) test_finish = time.time() test_cost = test_finish - dev_finish for idtask in range(0, data.HP_tasks): speed, acc, p, r, f, _, _ = summary[idtask] if data.seg: # current_score = f print( "Task %d Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (idtask, test_cost, speed, acc, p, r, f)) else: # current_score = acc print("Task %d Test: time: %.2fs speed: %.2fst/s; acc: %.4f" % (idtask, test_cost, speed, acc)) if data.log_file is not None: f_log.write("{}\t{}\t{}\n".format(idx, current_score, current_score_disco)) f_log.flush() gc.collect()
def train(data): logging.info("Training model...") data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) if data.sentence_classification: model = SentClassifier(data) else: model = SeqLabel(data) # loss_function = nn.NLLLoss() if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: logging.info("Optimizer illegal: %s" % (data.optimizer)) exit(1) best_dev = -10 # start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start logging.info("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) logging.info("Shuffle: first input word list:%s" % data.train_Ids[0][0]) # set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num # 一个batch内的input instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, \ batch_charrecover, batch_label, mask = batchify_with_label(instance, data.HP_gpu, True, data.sentence_classification) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask, data.sentence_classification) right_token += right whole_token += whole # logging.info("loss:",loss.item()) sample_loss += loss.item() total_loss += loss.item() if end % 50000 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time logging.info( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": logging.info( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) sys.stdout.flush() sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start logging.info( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start logging.info( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) logging.info("totalloss: %s" % total_loss) if total_loss > 1e8 or str(total_loss) == "nan": logging.info( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) # continue speed, acc, p, r, f, _, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if data.seg: current_score = f logging.info( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: current_score = acc logging.info("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) if current_score > best_dev: if data.seg: logging.info("Exceed previous best f score: %s" % best_dev) else: logging.info("Exceed previous best acc score:%s" % best_dev) model_name = data.model_dir + '.' + str(idx) + ".model" logging.info("Save current best model in file: %s" % model_name) torch.save(model.state_dict(), model_name) best_dev = current_score # decode test speed, acc, p, r, f, _, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if data.seg: logging.info( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) else: logging.info("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) gc.collect()
def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) if data.sentence_classification: model = SentClassifier(data) else: model = SeqLabel(data) if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(1) def freeze_net(model): for p in model.word_hidden.wordrep.word_embedding.parameters(): p.requires_grad = False if data.tune_wordemb == False: freeze_net(model) best_dev = -10 best_test = -10 bad_counter = 0 # data.HP_iteration = 1 ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 cc = list(zip(data.train_Ids, data.train_texts)) random.shuffle(cc) data.train_Ids[:], data.train_texts[:] = zip(*cc) print("Shuffle: first input word list:", data.train_Ids[0][0]) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] instance_text = data.train_texts[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, batch_elmo_char, batch_adj = batchify_with_label( instance, instance_text, data.HP_gpu, True, data.sentence_classification) instance_count += 1 loss, tag_seq = model.calculate_loss(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask, batch_elmo_char, batch_adj) right, whole = predict_check(tag_seq, batch_label, mask, data.sentence_classification) right_token += right whole_token += whole # print("loss:",loss.item()) sample_loss += loss.item() total_loss += loss.item() if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) sys.stdout.flush() sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) print("totalloss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) # continue speed, acc, p, r, f, _, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if data.seg: current_score = f print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) if current_score > best_dev: if data.seg: print("Exceed previous best dev f score:", best_dev) else: print("Exceed previous best dev acc score:", best_dev) # model_name = data.model_dir +'.'+ str(idx) + ".model" model_name = data.model_dir + ".dev.model" # print("Save current best model in file:", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score # bad_counter = 0 # else: # bad_counter += 1 # ## decode test speed, acc, p, r, f, _, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if data.seg: current_score = f print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) else: current_score = acc print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) if current_score > best_test: if data.seg: print("Exceed previous best test f score:", best_test) else: print("Exceed previous best test acc score:", best_test) model_name = data.model_dir + ".test.model" torch.save(model.state_dict(), model_name) best_test = current_score bad_counter = 0 else: bad_counter += 1 gc.collect() if bad_counter >= data.patience: print('Early Stop!') break
def train(data): print("Training model...") device = torch.device( 'cuda' if torch.cuda.is_available() and data.HP_gpu else 'cpu') data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) if data.sentence_classification: model = SentClassifier(data).to(device) else: model = SeqLabel(data).to(device) # for name, param in model.named_parameters(): # if param.requires_grad: # print(name) ## compute model parameter num n_all_param = sum([p.nelement() for p in model.parameters()]) n_emb_param = sum([ p.nelement() for p in ( model.word_hidden.wordrep.word_embedding.weight, model.word_hidden.wordrep.char_feature.char_embeddings.weight) ]) print("all parameters=%s, emb parameters=%s, other parameters=%s" % (n_all_param, n_emb_param, n_all_param - n_emb_param)) if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(1) best_dev = -10 test_f = [] dev_f = [] best_epoch = 0 train_dataset_S = Multi_Task_Dataset(data.train_Ids_S, data.HP_batch_size) train_dataset_T = Multi_Task_Dataset(data.train_Ids_T, data.HP_batch_size) total_step = 0 target_end, source_end = False, False epoch_idx = 0 epoch_start = True # this step is the start of an epoch ## start training while epoch_idx < data.HP_iteration: if epoch_start: epoch_start = False epoch_loss = 0 epoch_start_time = time.time() print("Epoch: %s/%s" % (epoch_idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, epoch_idx, data.HP_lr_decay, data.HP_lr) model.train() model.zero_grad() if total_step % 2 == 0: domain_tag = 'Target' batch_instance, target_end = train_dataset_T.next_batch() else: domain_tag = 'Source' batch_instance, source_end = train_dataset_S.next_batch() if len(batch_instance) == 0: continue original_words_batch, batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, batch_entity, mask = \ batchify_with_label(batch_instance, data.HP_gpu, True, data.sentence_classification) loss, entity_loss, atten_probs_loss = model.calculate_loss( original_words_batch, domain_tag, batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, batch_entity, mask) rate = data.HP_target_loss_rate if domain_tag == "Target" else 1.0 # 2:1 for twitter 1.6:1 for bionlp 1.5:1 for broad twitter loss_ = rate * loss + entity_loss + atten_probs_loss epoch_loss += loss_.item() loss_.backward() optimizer.step() model.zero_grad() total_step += 1 ## evaluation if target_end: epoch_finish_time = time.time() epoch_cost = epoch_finish_time - epoch_start_time print("Epoch: %s training finished. Time: %.2fs" % (epoch_idx, epoch_cost)) print("totalloss:", epoch_loss) if epoch_loss > 1e8 or str(epoch_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) continue ## decode Target dev speed, acc, p, r, f, _, _ = evaluate("Target", data, model, "dev") dev_finish_time = time.time() dev_cost = dev_finish_time - epoch_finish_time if data.seg: current_score = f print( "Dev (Target): time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev (Target): time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) dev_f.append(current_score) if current_score > best_dev: best_epoch = epoch_idx if data.seg: print("Exceed previous best f score:", best_dev) else: print("Exceed previous best acc score:", best_dev) model_name = data.model_dir + ".model" print("Save current best model in file:", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score ## decode Target test speed, acc, p, r, f, _, _ = evaluate("Target", data, model, "test") test_finish_time = time.time() test_cost = test_finish_time - dev_finish_time if data.seg: print( "Test (Target): time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) test_f.append(f) else: print( "Test (Target): time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) test_f.append(acc) gc.collect() print("The best f in epoch%s, dev:%.4f, test:%.4f" % (best_epoch, dev_f[best_epoch], test_f[best_epoch])) ## epoch end set epoch_start = True target_end = False epoch_idx += 1 if source_end: epoch_finish_time = time.time() ## decode test Source speed, acc, p, r, f, _, _ = evaluate("Source", data, model, "test") test_finish = time.time() test_cost = test_finish - epoch_finish_time if data.seg: print( "Test (Source): time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) else: print( "Test (Source): time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) source_end = False
def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) if data.sentence_classification: model = SentClassifier(data) else: model = SeqLabel(data) loss_function = nn.NLLLoss() if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(1) best_dev = -10 best_dev_uas = -10 # data.HP_iteration = 1 ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 sample_loss = {idtask: 0 for idtask in range(data.HP_tasks)} right_token = {idtask: 0 for idtask in range(data.HP_tasks)} whole_token = {idtask: 0 for idtask in range(data.HP_tasks)} random.shuffle(data.train_Ids) # print("Shuffle: first input word list:", data.train_Ids[0][0]) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu, False, False) instance_count += 1 loss, losses, tag_seq = model.calculate_loss(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask, inference=False) for idtask in range(data.HP_tasks): right, whole = predict_check(tag_seq[idtask], batch_label[idtask], mask) sample_loss[idtask] += losses[idtask].item() right_token[idtask] += right whole_token[idtask] += whole if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Task %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, idtask, temp_cost, sample_loss[idtask], right_token[idtask], whole_token[idtask], (right_token[idtask] + 0.) / whole_token[idtask])) if sample_loss[idtask] > 1e8 or str(sample_loss) == "nan": print "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." exit(0) sys.stdout.flush() sample_loss[idtask] = 0 if end % 500 == 0: print( "--------------------------------------------------------------------------" ) total_loss += loss.item() loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start for idtask in range(data.HP_tasks): print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss[idtask], right_token[idtask], whole_token[idtask], (right_token[idtask] + 0.) / whole_token[idtask])) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) print("totalloss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) summary = evaluate(data, model, "dev", False, False) dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_scores = [] for idtask in range(0, data.HP_tasks): speed, acc, p, r, f, pred_labels, _ = summary[idtask] if data.seg: current_scores.append(f) print( "Task %d Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (idtask, dev_cost, speed, acc, p, r, f)) else: current_scores.append(acc) print("Task %d Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (idtask, dev_cost, speed, acc)) pred_results_tasks = [] pred_scores_tasks = [] for idtask in range(data.HP_tasks): speed, acc, p, r, f, pred_results, pred_scores = summary[idtask] pred_results_tasks.append(pred_results) pred_scores_tasks.append(pred_scores_tasks) with tempfile.NamedTemporaryFile() as f_decode_mt: with tempfile.NamedTemporaryFile() as f_decode_st: # If we are learning multiple task we move it as a sequence # labeling if len(data.index_of_main_tasks) > 1: data.decode_dir = f_decode_mt.name data.write_decoded_results(pred_results_tasks, 'dev') else: if data.decode_dir is None: data.decode_dir = f_decode_st.name data.write_decoded_results(pred_results_tasks, 'dev') output_nn = open(data.decode_dir, encoding='utf-8') tmp = tempfile.NamedTemporaryFile().name decode_dependencies.decode(output_nn, tmp) current_score, current_uas = decode_dependencies.evaluate_dependencies( data.gold_dev_dep, tmp) print("Current Score (from LAS)", current_score) print("Current Score (from UAS)", current_uas) if current_score > best_dev: if data.seg: print("Exceed previous best f score:", best_dev) else: print("Exceed previous best acc score (from LAS):", best_dev) model_name = data.model_dir + ".model" # print ("Overwritting model to", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score else: print("sofar the best (from LAS)" + repr(best_dev)) if current_uas > best_dev_uas: if data.seg: print("Exceed previous best f score:", best_dev_uas) else: print("Exceed previous best acc score (from UAS):", best_dev_uas) best_dev_uas = current_uas else: print("sofar the best (from UAS)" + repr(best_dev_uas)) summary = evaluate(data, model, "test", False) test_finish = time.time() test_cost = test_finish - dev_finish for idtask in range(0, data.HP_tasks): speed, acc, p, r, f, _, _ = summary[idtask] if data.seg: current_score = f print( "Task %d Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (idtask, test_cost, speed, acc, p, r, f)) else: current_score = acc print("Task %d Test: time: %.2fs speed: %.2fst/s; acc: %.4f" % (idtask, test_cost, speed, acc)) gc.collect()
def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) if data.sentence_classification: model = SentClassifier(data) else: model = SeqLabel(data) if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(1) best_dev = -10 # data.HP_iteration = 1 ## start training iters_without_change = 0 previous_f = 0 with open(data.log_dir, "w") as log_file: for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) print("Shuffle: first input word list:", data.train_Ids[0][0]) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu, True, data.sentence_classification) instance_count += 1 loss, tag_seq = model.calculate_loss( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask, data.sentence_classification) right_token += right whole_token += whole # print("loss:",loss.item()) sample_loss += loss.item() total_loss += loss.item() if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) sys.stdout.flush() sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) print("totalloss:", total_loss) if total_loss > 1e8 or total_loss is np.nan: print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) # continue speed, acc, p, r, f, _, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if data.seg: current_score = f print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) if current_score > best_dev: if data.seg: print("Exceed previous best f score:", best_dev) else: print("Exceed previous best acc score:", best_dev) model_name = data.model_dir + '.' + str(idx) + ".model" print("Save current best model in file:", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score # ## decode test speed, acc, p, r, f_test, _, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if data.seg: print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f_test)) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) log_entry = {"iteration": idx, "train_f": f} log_file.write(json.dumps(log_entry) + "\n") if abs(f - previous_f) < data.stopping_criterion: iters_without_change += 1 else: iters_without_change = 0 if iters_without_change == data.iters_without_change: print( f"Model f-measure has not changed in {iters_without_change} iterations. Stopping." ) gc.collect() previous_f = f
def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir +".dset" data.save(save_data_name) if data.sentence_classification: model = SentClassifier(data) else: model = SeqLabel(data) print (model) # loss_function = nn.NLLLoss() if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum,weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s"%(data.optimizer)) exit(1) best_dev = -10 # data.HP_iteration = 1 ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" %(idx,data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 train_data = list(zip(data.train_Ids, data.train_texts)) random.shuffle(train_data) data.train_Ids, data.train_texts = zip(*train_data) model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num//batch_size+1 for batch_id in range(total_batch): start = batch_id*batch_size end = (batch_id+1)*batch_size if end >train_num: end = train_num instance = data.train_Ids[start:end] instance_texts = data.train_texts[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, batch_word_text = batchify_with_label(instance, instance_texts , data.HP_gpu, True, data.sentence_classification) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask, batch_word_text) right, whole = predict_check(tag_seq, batch_label, mask, data.sentence_classification) right_token += right whole_token += whole # print("loss:",loss.item()) sample_loss += loss.item() total_loss += loss.item() if end%500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") # exit(1) sys.stdout.flush() sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s"%(idx, epoch_cost, train_num/epoch_cost, total_loss)) print("totalloss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") #exit(1) # continue speed, acc, p, r, f, pred_results, pred_scores = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if data.optimize_with_evalb: with tempfile.NamedTemporaryFile("w",delete=False) as f_decode: if data.decode_dir is None: data.decode_dir = f_decode.name decoded_st_dir = f_decode.name data.write_decoded_results(pred_results, 'dev') command = ["PYTHONPATH="+data.tree2labels,"python", data.evaluate," --input ",decoded_st_dir," --gold ",data.gold_dev_trees," --evalb ",data.evalb,">",f_decode.name+".out"] os.system(" ".join(command)) f_decode = open(f_decode.name+".out","r") current_score = float([l for l in f_decode.read().split("\n") if l.startswith("Bracketing FMeasure")][0].split("=")[1]) print ("Current Score (from EVALB)", current_score, "Previous best dev (from EVALB)", best_dev) elif data.optimize_with_las: with tempfile.NamedTemporaryFile("w",delete=False) as f_decode: if data.decode_dir is None: data.decode_dir = f_decode.name decoded_st_dir = f_decode.name data.write_decoded_results(pred_results, 'dev') #Transforming the output file into a CoNLL file command = [#"PYTHONPATH="+abspath(join(dirname(__file__), data.dep2labels)), "python", data.dep2labels+os.sep+"decode_output_file.py", "--input", decoded_st_dir, "--output", f_decode.name+".out" ] p = Popen(" ".join(command),stdout=subprocess.PIPE, shell=True) out, err = p.communicate() command = ["python", data.conll_ud, f_decode.name+".out", data.gold_dev_trees]#,">",f_decode.name+".out"] p = Popen(" ".join(command),stdout=subprocess.PIPE, shell=True) out, err = p.communicate() out = out.decode("utf-8") current_score = float(out.strip().split(":")[1]) print ("Current Score (from conll_ud)", current_score, "Previous best dev (from conll_ud)", best_dev) else: if data.seg: current_score = f print("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f"%(dev_cost, speed, acc)) if current_score > best_dev: if data.seg: print("Exceed previous best f score:", best_dev) else: print("Exceed previous best acc score:", best_dev) model_name = data.model_dir + ".model" #model_name = data.model_dir +'.'+ str(idx) + ".model" print("Save current best model in file:", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score # ## decode test speed, acc, p, r, f, _,_ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if data.seg: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(test_cost, speed, acc, p, r, f)) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc)) gc.collect()