def load_model_decode(model_dir, data, name, gpu, seg=True): data.HP_gpu = gpu print( "Load Model from file: ", model_dir) model = SeqModel(data) model.load_state_dict(torch.load(model_dir)) print(("Decode %s data ..."%(name))) start_time = time.time() speed, acc, p, r, f, pred_results, gazs = evaluate(data, model, name) end_time = time.time() time_cost = end_time - start_time if seg: print(("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(name, time_cost, speed, acc, p, r, f))) else: print(("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f"%(name, time_cost, speed, acc))) return pred_results
def train(data, save_model_dir, seg=True): print("Training with {} model.".format(data.model_type)) #data.show_data_summary() model = SeqModel(data) print("finish building model.") parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adamax(parameters, lr=data.HP_lr) best_dev = -1 best_dev_p = -1 best_dev_r = -1 best_test = -1 best_test_p = -1 best_test_r = -1 ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print(("Epoch: %s/%s" % (idx, data.HP_iteration))) optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_loss = 0 batch_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] words = data.train_texts[start:end] if not instance: continue gaz_list, batch_word, batch_biword, batch_wordlen, batch_label, layer_gaz, gaz_count, gaz_chars, gaz_mask, gazchar_mask, mask, batch_bert, bert_mask = batchify_with_label( instance, data.HP_gpu, data.HP_num_layer) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( gaz_list, batch_word, batch_biword, batch_wordlen, layer_gaz, gaz_count, gaz_chars, gaz_mask, gazchar_mask, mask, batch_label, batch_bert, bert_mask) right, whole = predict_check(tag_seq, batch_label, mask) right_token += right whole_token += whole sample_loss += loss.data total_loss += loss.data batch_loss += loss if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print(( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token))) sys.stdout.flush() sample_loss = 0 if end % data.HP_batch_size == 0: batch_loss.backward() optimizer.step() model.zero_grad() batch_loss = 0 temp_time = time.time() temp_cost = temp_time - temp_start print((" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token))) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print(( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss))) speed, acc, p, r, f, pred_labels, gazs = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if seg: current_score = f print(( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f))) else: current_score = acc print(("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc))) if current_score > best_dev: if seg: print("Exceed previous best f score:", best_dev) else: print("Exceed previous best acc score:", best_dev) model_name = save_model_dir torch.save(model.state_dict(), model_name) #best_dev = current_score best_dev_p = p best_dev_r = r # ## decode test speed, acc, p, r, f, pred_labels, gazs = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if seg: current_test_score = f print(( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f))) else: current_test_score = acc print(("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc))) if current_score > best_dev: best_dev = current_score best_test = current_test_score best_test_p = p best_test_r = r print("Best dev score: p:{}, r:{}, f:{}".format( best_dev_p, best_dev_r, best_dev)) print("Test score: p:{}, r:{}, f:{}".format(best_test_p, best_test_r, best_test)) gc.collect() with open(data.result_file, "a") as f: f.write(save_model_dir + '\n') f.write("Best dev score: p:{}, r:{}, f:{}\n".format( best_dev_p, best_dev_r, best_dev)) f.write("Test score: p:{}, r:{}, f:{}\n\n".format( best_test_p, best_test_r, best_test)) f.close()
def load_model(self, model_dir): model = SeqModel(self.data) model.load_state_dict(torch.load(model_dir)) return model