def train(data): save_data_name = data.model_dir + ".dset" data.save(save_data_name) model = SeqLabel(data) # 加载预训练 print('loading model %s' % model_path) model.load_state_dict(torch.load(model_path, map_location=map_location)) print('data.seg:', data.seg) optimizer = '' if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) best_dev = -10 print('data.HP_gpu:', data.HP_gpu) for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 # 每500个batch清零 total_loss = 0 # 一个epoch里的完整loss right_token = 0 whole_token = 0 # print("Before Shuffle: first input word list:", data.train_Ids[0][0]) random.shuffle(data.train_Ids) print("Shuffle: first input word list:", data.train_Ids[0][0]) model.train() model.zero_grad() batch_size = data.HP_batch_size # batch_id = 0 train_num = len(data.train_Ids) print('train_num:', train_num) # 训练样本的数量 total_batch = train_num // batch_size + 1 print('total_batch:', total_batch) for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, \ batch_label, mask = batchify_with_label(instance, data.HP_gpu, True, data.sentence_classification) instance_count += 1 loss, tag_seq = model.calculate_loss(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check( tag_seq, batch_label, mask, data.sentence_classification) # pred与gold的校验 right_token += right whole_token += whole sample_loss += loss.item() total_loss += loss.item() if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) sys.stdout.flush() sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) print("total_loss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) speed, acc, p, r, f, _, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_score = f print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) if current_score > best_dev: print("Exceed previous best f score:", best_dev) model_name = data.model_dir + '.' + str(idx) + ".model" print("Save current best torch_model in file:", model_name) # 保存当前epoch结束的模型 torch.save(model.state_dict(), model_name) best_dev = current_score # 每50轮保存一下 if idx % 50 == 0: model_name = data.model_dir + '.' + str(idx) + ".model" print('Save every 50 epoch in file: %s' % model_name) torch.save(model.state_dict(), model_name) speed, acc, p, r, f, _, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) # 对自己add对样本做一下evaluate: speed, acc, p, r, f, _, _ = evaluate(data, model, "raw") raw_finish = time.time() raw_cost = raw_finish - test_finish print( "Raw: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (raw_cost, speed, acc, p, r, f)) gc.collect()
def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) model = SeqLabel(data) loss_function = nn.NLLLoss() if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(0) best_dev = -10 # data.HP_iteration = 1 ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask) right_token += right whole_token += whole sample_loss += loss.data[0] total_loss += loss.data[0] if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) sys.stdout.flush() sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) # continue speed, acc, p, r, f, _, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if data.seg: current_score = f print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) if current_score > best_dev: if data.seg: print("Exceed previous best f score:", best_dev) else: print("Exceed previous best acc score:", best_dev) model_name = data.model_dir + '.' + str(idx) + ".model" print("Save current best model in file:", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score # ## decode test speed, acc, p, r, f, _, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if data.seg: print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) gc.collect()
def train(): total_batch = 0 # model = CnnLstmCrf(config) model = SeqLabel(data) optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=config.momentum, weight_decay=config.l2) if gpu: model = model.cuda() best_dev = -10 for idx in range(config.epoch): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, config.epoch)) optimizer = lr_decay(optimizer, idx, config.lr_decay, config.lr) instance_count = 0 sample_id = 0 sample_loss = 0 # 每500个batch清零 total_loss = 0 # 一个epoch里的完整loss right_token = 0 # 一个epoch里预测正确的token数量 whole_token = 0 random.shuffle(data.train_ids) print("Shuffle: first input word list:", data.train_ids[0][1]) model.train() model.zero_grad() batch_size = config.batch_size train_num = len(data.train_ids) print('batch_size:', batch_size, 'train_num:', train_num) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_ids[start:end] # [char,word,feat,label] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, \ batch_label, mask = batchify_sequence_labeling_with_label(instance, gpu, if_train=True) # loss, tag_seq = model(batch_char, batch_word, batch_features, mask, batch_charrecover, batch_wordlen, batch_label) loss, tag_seq = model.calculate_loss( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask) right_token += right whole_token += whole # print('right_token/whole_token:', right_token/whole_token) sample_loss += loss.item() total_loss += loss.item() if end % 6400 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % ( end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") exit(1) sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % ( end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % ( idx, epoch_cost, train_num / epoch_cost, total_loss)) if total_loss > 1e8 or str(total_loss) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") exit(1) logger.info("Epoch: %s, Total loss: %s" % (idx, total_loss)) speed, acc, p, r, f, _, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_score = f print("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) logger.info( "Epoch: %s, Loss: %s, Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % ( idx, total_loss, dev_cost, speed, acc, p, r, f)) if current_score > best_dev: model_name = config.model_path + '.' + str(idx) + '.model' torch.save(model.state_dict(), model_name) best_dev = current_score # logger.info("data:dev, epoch:%s, f1:%s, precision:%s, recall:%s" % (idx, current_score, p, r)) speed, acc, p, r, f, _, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish logger.info("Epoch: %s, Loss: %s, Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % ( idx, total_loss, test_cost, speed, acc, p, r, f))