def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) model = SeqModel(data) for name, param in model.named_parameters(): if param.requires_grad: print(name) if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(1) best_dev_f1 = -1 test_f1 = [] best_epoch = 0 # start training for idx in range(data.HP_iteration): epoch_start = time.time() print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 total_perplexity_1 = 0 total_perplexity_2 = 0 total_loss_1 = 0 total_loss_2 = 0 total_loss_3 = 0 total_loss_4 = 0 random.shuffle(data.source_train_idx) random.shuffle(data.target_train_idx) random.shuffle(data.source_lm_idx) random.shuffle(data.target_lm_idx) model.train() model.zero_grad() batch_size_1 = data.HP_batch_size train_num_1 = len(data.source_train_idx) train_num_2 = len(data.target_train_idx) train_num_3 = len(data.source_lm_idx) train_num_4 = len(data.target_lm_idx) batch_num = train_num_1 // batch_size_1 + 1 batch_size_2 = train_num_2 // batch_num batch_size_3 = train_num_3 // batch_num batch_size_4 = train_num_4 // batch_num for batch_id in range(batch_num): instance_1 = data.source_train_idx[batch_id * batch_size_1: (batch_id + 1) * batch_size_1 if ((batch_id + 1) * batch_size_1) < train_num_1 else train_num_1] instance_2 = data.target_train_idx[batch_id * batch_size_2: (batch_id + 1) * batch_size_2 if ((batch_id + 1) * batch_size_2) < train_num_2 else train_num_2] instance_3 = data.source_lm_idx[batch_id * batch_size_3: (batch_id + 1) * batch_size_3 if ((batch_id + 1) * batch_size_3) < train_num_3 else train_num_3] instance_4 = data.target_lm_idx[batch_id * batch_size_4: (batch_id + 1) * batch_size_4 if ((batch_id + 1) * batch_size_4) < train_num_4 else train_num_4] if not instance_1 or not instance_2: continue # NER batch_word_1, batch_wordlen_1, batch_wordrecover_1, batch_char_1, batch_charlen_1, \ batch_charrecover_1, batch_label_1, lm_seq_tensor_1, mask_1 = batchify_with_label(instance_1, data.HP_gpu) batch_word_2, batch_wordlen_2, batch_wordrecover_2, batch_char_2, batch_charlen_2, \ batch_charrecover_2, batch_label_2, lm_seq_tensor_2, mask_2 = batchify_with_label(instance_2, data.HP_gpu) # LM batch_word_3, batch_wordlen_3, batch_wordrecover_3, batch_char_3, batch_charlen_3, \ batch_charrecover_3, batch_label_3, lm_seq_tensor_3, mask_3 = batchify_with_label(instance_3 + instance_1, data.HP_gpu) batch_word_4, batch_wordlen_4, batch_wordrecover_4, batch_char_4, batch_charlen_4, \ batch_charrecover_4, batch_label_4, lm_seq_tensor_4, mask_4 = batchify_with_label(instance_4 + instance_2, data.HP_gpu) batch_word = [batch_word_1, batch_word_2, batch_word_3, batch_word_4] batch_wordlen = [batch_wordlen_1, batch_wordlen_2, batch_wordlen_3, batch_wordlen_4] batch_char = [batch_char_1, batch_char_2, batch_char_3, batch_char_4] batch_charlen = [batch_charlen_1, batch_charlen_2, batch_charlen_3, batch_charlen_4] batch_charrecover = [batch_charrecover_1, batch_charrecover_2, batch_charrecover_3, batch_charrecover_4] batch_label = [batch_label_1, batch_label_2, batch_label_3, batch_label_4] lm_seq_tensor = [lm_seq_tensor_1, lm_seq_tensor_2, lm_seq_tensor_3, lm_seq_tensor_4] mask = [mask_1, mask_2, mask_3, mask_4] instance_count += 1 loss_ = [] perplexity_ = [] # source language model loss, perplexity, tag_seq = model.loss('model1', batch_word[2], batch_wordlen[2], batch_char[2], batch_charlen[2], batch_charrecover[2], batch_label[2], lm_seq_tensor[2], mask[2]) loss_.append(loss) perplexity_.append(perplexity) # source NER loss, perplexity, tag_seq = model.loss('model2', batch_word[0], batch_wordlen[0], batch_char[0], batch_charlen[0], batch_charrecover[0], batch_label[0], lm_seq_tensor[0], mask[0]) loss_.append(loss) # target language model loss, perplexity, tag_seq = model.loss('model3', batch_word[3], batch_wordlen[3], batch_char[3], batch_charlen[3], batch_charrecover[3], batch_label[3], lm_seq_tensor[3], mask[3]) loss_.append(loss) perplexity_.append(perplexity) loss = 0 model_num = len(loss_) for loss_id in range(model_num): loss += loss_[loss_id] loss.backward() optimizer.step() model.zero_grad() total_loss_1 += loss_[0].data[0] total_loss_2 += loss_[1].data[0] total_loss_3 += loss_[2].data[0] total_perplexity_1 += perplexity_[0].data[0] total_perplexity_2 += perplexity_[1].data[0] epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start source_lm_perplexity = math.exp(total_perplexity_1 / batch_num) target_lm_perplexity = math.exp(total_perplexity_2 / batch_num) print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % ( idx, epoch_cost, train_num_1 / epoch_cost, total_loss_2)) print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total perplexity: %.4f" % ( idx, epoch_cost, train_num_3 / epoch_cost, source_lm_perplexity)) print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % ( idx, epoch_cost, train_num_2 / epoch_cost, total_loss_4)) print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total perplexity: %.4f" % ( idx, epoch_cost, train_num_4 / epoch_cost, target_lm_perplexity)) if total_loss_1 > 1e8 or str(total_loss_1) == "nan" or total_loss_2 > 1e8 or str( total_loss_2) == "nan" or total_loss_3 > 1e8 or str(total_loss_3) == "nan" or total_loss_4 > 1e8 or str( total_loss_4) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") exit(1) # dev-test speed, acc, p, r, f, _, _ = evaluate(data, model, "dev-test") test_f1.append(f[1]) dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_score = f[0] print("Dev-Source: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % ( dev_cost, speed, acc[0], p[0], r[0], f[0])) print("Test-Target: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % ( dev_cost, speed, acc[1], p[1], r[1], f[1])) if current_score > best_dev_f1: best_epoch = idx print("Exceed previous best f score:", best_dev_f1) model_name = data.model_dir + ".model" print("Save current best model in file:", model_name) torch.save(model.state_dict(), model_name) best_dev_f1 = current_score if current_score > 0.72: print("change optim sgd:") optimizer = optim.SGD(model.parameters(), lr=0.015, momentum=data.HP_momentum, weight_decay=data.HP_l2) print("The best Source-domain dev f-score: %.4f, Target-domain f-score: %.4f" % (best_dev_f1, test_f1[best_epoch]))
def train(train_data): print("Training model...") train_data.show_data_summary() save_data_name = train_data.init_dir + ".init" train_data.save(save_data_name) model = SeqModel(train_data) for name, param in model.named_parameters(): if param.requires_grad: print(name) if train_data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=train_data.HP_lr, momentum=train_data.HP_momentum, weight_decay=train_data.HP_l2) elif train_data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=train_data.HP_lr, weight_decay=train_data.HP_l2) elif train_data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=train_data.HP_lr, weight_decay=train_data.HP_l2) elif train_data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=train_data.HP_lr, weight_decay=train_data.HP_l2) elif train_data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=train_data.HP_lr, weight_decay=train_data.HP_l2) else: print("Optimizer illegal: %s" % train_data.optimizer) exit(1) best_dev = -10 dev_f = [] test_f = [] best_epoch = 0 for idx in range(train_data.HP_iteration): epoch_start = time.time() print("Epoch: %s/%s" % (idx, train_data.HP_iteration)) if train_data.optimizer.lower() == "sgd": optimizer = lr_decay(optimizer, idx, train_data.HP_lr_decay, train_data.HP_lr) random.shuffle(train_data.ner_1_train_idx) random.shuffle(train_data.ner_2_train_idx) random.shuffle(train_data.lm_1_idx) random.shuffle(train_data.lm_2_idx) model.train() model.zero_grad() ner_1_loss = 0 ner_2_loss = 0 lm_1_perplexity = 0 lm_2_perplexity = 0 ner_1_batch_size = train_data.HP_batch_size batch_nums = len(train_data.ner_1_train_idx) // ner_1_batch_size + 1 ner_2_batch_size = len(train_data.ner_2_train_idx) // batch_nums lm_1_batch_size = len(train_data.lm_1_idx) // batch_nums lm_2_batch_size = len(train_data.lm_2_idx) // batch_nums print("batch size: ", ner_1_batch_size, ner_2_batch_size, lm_1_batch_size, lm_2_batch_size) for batch_id in range(batch_nums): ner_1_data = train_data.ner_1_train_idx[batch_id * ner_1_batch_size: (batch_id + 1) * ner_1_batch_size if\ (batch_id + 1) * ner_1_batch_size < len(train_data.ner_1_train_idx) else len(train_data.ner_1_train_idx)] ner_2_data = train_data.ner_2_train_idx[batch_id * ner_2_batch_size: (batch_id + 1) * ner_2_batch_size if\ (batch_id + 1) * ner_2_batch_size < len(train_data.ner_2_train_idx) else len(train_data.ner_2_train_idx)] lm_1_data = train_data.lm_1_idx[batch_id * lm_1_batch_size: (batch_id + 1) * lm_1_batch_size if \ (batch_id + 1) * lm_1_batch_size < len(train_data.lm_1_idx) else len(train_data.lm_1_idx)] lm_2_data = train_data.lm_2_idx[batch_id * lm_2_batch_size: (batch_id + 1) * lm_2_batch_size if \ (batch_id + 1) * lm_2_batch_size < len(train_data.lm_2_idx) else len(train_data.lm_2_idx)] ner_1_batch_data = batchify_with_label(ner_1_data, train_data.HP_gpu) if train_data.mode == 'supervised': ner_2_batch_data = batchify_with_label(ner_2_data, train_data.HP_gpu) lm_1_batch_data = batchify_with_label(lm_1_data, train_data.HP_gpu) lm_2_batch_data = batchify_with_label(lm_2_data, train_data.HP_gpu) losses = [] perplexities = [] # word_seq_tensor, word_seq_lengths, word_seq_recover, char_seq_tensor, char_seq_lengths, # char_seq_recover, label_seq_tensor, lm_seq_tensor, mask loss, perplexity, tag_seq_forward, tag_seq_backward, tag_seq = \ model.loss('ner1', ner_1_batch_data[0], ner_1_batch_data[1], ner_1_batch_data[3], ner_1_batch_data[4], ner_1_batch_data[5], ner_1_batch_data[6], ner_1_batch_data[7], ner_1_batch_data[8]) losses.append(loss) if train_data.mode == 'supervised': loss, perplexity, tag_seq_forward, tag_seq_backward, tag_seq = \ model.loss('ner2', ner_2_batch_data[0], ner_2_batch_data[1], ner_2_batch_data[3], ner_2_batch_data[4], ner_2_batch_data[5], ner_2_batch_data[6], ner_2_batch_data[7], ner_2_batch_data[8]) losses.append(loss) loss, perplexity, tag_seq_forward, tag_seq_backward, tag_seq = \ model.loss('lm1', lm_1_batch_data[0], lm_1_batch_data[1], lm_1_batch_data[3], lm_1_batch_data[4], lm_1_batch_data[5], lm_1_batch_data[6], lm_1_batch_data[7], lm_1_batch_data[8]) losses.append(loss) perplexities.append(perplexity) loss, perplexity, tag_seq_forward, tag_seq_backward, tag_seq = \ model.loss('lm2', lm_2_batch_data[0], lm_2_batch_data[1], lm_2_batch_data[3], lm_2_batch_data[4], lm_2_batch_data[5], lm_2_batch_data[6], lm_2_batch_data[7], lm_2_batch_data[8]) losses.append(loss) perplexities.append(perplexity) model_loss = 0 loss_rate = [0.8, 1, 0.5, 0.5 ] if train_data.mode == 'supervised' else [1, 1, 1] for loss_id in range(len(losses)): model_loss += losses[loss_id] * loss_rate[loss_id] model_loss.backward() optimizer.step() model.zero_grad() ner_1_loss += losses[0].data[0] ner_2_loss += losses[1].data[0] lm_1_perplexity += perplexities[0].data[0] lm_2_perplexity += perplexities[1].data[0] epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print("Epoch: %s training finished. Time: %.2fs." % (idx, epoch_cost)) print("ner 1 total loss: %s" % ner_1_loss) if train_data.mode == 'supervised': print("ner 2 total loss: %s" % ner_2_loss) print("lm 1 perplexity: %.4f" % math.exp(lm_1_perplexity / batch_nums)) print("lm 2 perplexity: %.4f" % math.exp(lm_2_perplexity / batch_nums)) if ner_1_loss > 1e8 or str( ner_1_loss) == "nan" or ner_2_loss > 1e8 or str( ner_2_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) evaluate('ner1', train_data.ner_1_dev_idx, train_data.label_alphabet_ner_1, train_data, model) if train_data.mode == 'supervised': p, r, f = evaluate('ner2', train_data.ner_2_dev_idx, train_data.label_alphabet_ner_2, train_data, model) else: p, r, f = evaluate('ner2', train_data.ner_2_dev_idx, train_data.label_alphabet_ner_1, train_data, model) dev_f.append(f) if f > best_dev: best_epoch = idx print("Exceed previous best f score:", best_dev) model_name = train_data.model_dir + ".model" print("Save current best model in file:", model_name) torch.save(model.state_dict(), model_name) best_dev = f evaluate('ner1', train_data.ner_1_test_idx, train_data.label_alphabet_ner_1, train_data, model) if train_data.mode == 'supervised': p, r, f = evaluate('ner2', train_data.ner_2_test_idx, train_data.label_alphabet_ner_2, train_data, model) else: p, r, f = evaluate('ner2', train_data.ner_2_test_idx, train_data.label_alphabet_ner_1, train_data, model) test_f.append(f) print("the best dev score is in epoch %s, dev:%.4f, test:%.4f" % (best_epoch, dev_f[best_epoch], test_f[best_epoch]))
def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) model = SeqModel(data) for name, param in model.named_parameters(): if param.requires_grad: print(name) loss_function = nn.NLLLoss() if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(1) best_dev = -10 dev_f = [] test_f = [] perplexity_1 = [] perplexity_2 = [] best_epoch = 0 # data.HP_iteration = 1 LM_data = data.train_Ids_2 ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 total_perplexity_1 = 0 total_perplexity_2 = 0 total_loss_1 = 0 total_loss_2 = 0 total_loss_3 = 0 total_loss_4 = 0 random.shuffle(data.train_Ids_1) random.shuffle(data.train_Ids_2) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 ###co-train for 4 models train_num_1 = len(data.train_Ids_1) train_num_2 = len(data.train_Ids_2) train_num_3 = len(LM_data) total_batch_1 = train_num_1 // batch_size + 1 batch_size_2 = train_num_2 // total_batch_1 l_batch_num_2 = train_num_2 - total_batch_1 * batch_size_2 start_2 = end_2 = 0 for batch_id in range(total_batch_1): start = batch_id * batch_size end = (batch_id + 1) * batch_size start_2 = end_2 if batch_id < l_batch_num_2: end_2 = start_2 + (batch_size_2 + 1) else: end_2 = start_2 + batch_size_2 if end > train_num_1: end = train_num_1 if end_2 > train_num_2: end_2 = train_num_2 instance_1 = data.train_Ids_1[start:end] instance_2 = data.train_Ids_2[start_2:end_2] if not instance_1 or not instance_2: continue #seq label batch_word_1, batch_features_1, batch_wordlen_1, batch_wordrecover_1, batch_char_1, batch_charlen_1, batch_charrecover_1, batch_label_1, lm_seq_tensor_1, mask_1 = batchify_with_label( instance_1, data.HP_gpu) batch_word_2, batch_features_2, batch_wordlen_2, batch_wordrecover_2, batch_char_2, batch_charlen_2, batch_charrecover_2, batch_label_2, lm_seq_tensor_2, mask_2 = batchify_with_label( instance_2, data.HP_gpu) batch_word = [batch_word_1, batch_word_2] batch_features = [batch_features_1, batch_features_2] batch_wordlen = [batch_wordlen_1, batch_wordlen_2] batch_char = [batch_char_1, batch_char_2] batch_charlen = [batch_charlen_1, batch_charlen_2] batch_charrecover = [batch_charrecover_1, batch_charrecover_2] batch_label = [batch_label_1, batch_label_2] lm_seq_tensor = [lm_seq_tensor_1, lm_seq_tensor_2] mask = [mask_1, mask_2] instance_count += 1 loss_ = [] perplexity_ = [] # LM 1 loss, perplexity, tag_seq_forward, tag_seq_backward, tag_seq = model.loss( 'model1', batch_word[0], batch_features[0], batch_wordlen[0], batch_char[0], batch_charlen[0], batch_charrecover[0], batch_label[0], lm_seq_tensor[0], mask[0]) loss_.append(loss) perplexity_.append(perplexity) #seq label 1 loss, perplexity, tag_seq_forward, tag_seq_backward, tag_seq = model.loss( 'model2', batch_word[0], batch_features[0], batch_wordlen[0], batch_char[0], batch_charlen[0], batch_charrecover[0], batch_label[0], lm_seq_tensor[0], mask[0]) loss_.append(loss) # LM 2 loss, perplexity, tag_seq_forward, tag_seq_backward, tag_seq = model.loss( 'model3', batch_word[1], batch_features[1], batch_wordlen[1], batch_char[1], batch_charlen[1], batch_charrecover[1], batch_label[1], lm_seq_tensor[1], mask[1]) loss_.append(loss) perplexity_.append(perplexity) #seq label 2 loss, perplexity, tag_seq_forward, tag_seq_backward, tag_seq = model.loss( 'model4', batch_word[1], batch_features[1], batch_wordlen[1], batch_char[1], batch_charlen[1], batch_charrecover[1], batch_label[1], lm_seq_tensor[1], mask[1]) loss_.append(loss) loss_rate = [1.0, 1.0, 1.0, 2.0] loss = 0 model_num = len(loss_) for loss_id in range(model_num): loss += loss_[loss_id] * loss_rate[loss_id] loss.backward() optimizer.step() model.zero_grad() total_loss_1 += loss_[0].data[0] total_loss_2 += loss_[1].data[0] total_loss_3 += loss_[2].data[0] total_loss_4 += loss_[3].data[0] total_perplexity_1 += perplexity_[0].data[0] total_perplexity_2 += perplexity_[1].data[0] epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start LM_perplex_1 = math.exp(total_perplexity_1 / total_batch_1) LM_perplex_2 = math.exp(total_perplexity_2 / total_batch_1) perplexity_1.append(LM_perplex_1) perplexity_2.append(LM_perplex_2) print("Epoch: %s training finished. Time: %.2fs" % (idx, epoch_cost)) print("Epoch: %s training finished. Time: %.2fs, total loss: %s" % (idx, epoch_cost, total_loss_2)) print("totalloss:", total_loss_2) print( "Epoch: %s training finished. Time: %.2fs, total perplexity: %.4f" % (idx, epoch_cost, LM_perplex_1)) print("Epoch: %s training finished. Time: %.2fs, total loss: %s" % (idx, epoch_cost, total_loss_4)) print("totalloss:", total_loss_4) print( "Epoch: %s training finished. Time: %.2fs, total perplexity: %.4f" % (idx, epoch_cost, LM_perplex_2)) speed, acc, p, r, f, _, _ = evaluate(data, model, "dev") dev_f.append(f[1]) dev_finish = time.time() dev_cost = dev_finish - epoch_finish if data.seg: current_score = f[1] print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc[0], p[0], r[0], f[0])) print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc[1], p[1], r[1], f[1])) else: current_score = acc[1] print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc[0])) print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc[1])) if current_score > best_dev: best_epoch = idx if data.seg: print("Exceed previous best f score:", best_dev) else: print("Exceed previous best acc score:", best_dev) # model_name = data.model_dir +'.'+ str(idx) + ".model" model_name = data.model_dir + ".model" print("Save current best model in file:", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score # ## decode test speed, acc, p, r, f, _, _ = evaluate(data, model, "test") test_f.append(f[1]) test_finish = time.time() test_cost = test_finish - dev_finish if data.seg: print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc[0], p[0], r[0], f[0])) print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc[1], p[1], r[1], f[1])) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc[0])) print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc[1])) gc.collect() print("the best dev score is in epoch %s, dev:%.4f, test:%.4f" % (best_epoch, dev_f[best_epoch], test_f[best_epoch])) with open('data/fscore_13PC.txt', 'w') as ft: ft.write('dev f scores:\n') for t in dev_f: ft.write(str(round(t, 6))) ft.write(' ') ft.write('\n') ft.write('test f scores:\n') for t in test_f: ft.write(str(round(t, 6))) ft.write(' ') ft.write('\n') ft.write('LM 1 perplexity:\n') for t in perplexity_1: ft.write(str(round(t, 6))) ft.write(' ') ft.write('\n') ft.write('LM 2 perplexity:\n') for t in perplexity_2: ft.write(str(round(t, 6))) ft.write(' ') if data.task_emb_save_dir is not None: with open('data/task_emb.txt', 'w') as ft: for task, i in data.task_alphabet.iteritems(): ft.write(task) ft.write(' ') for t in model.word_hidden.LSTM_param_generator.task_emb_vocab.weight.data[ i]: ft.write(str(round(t, 6))) ft.write(' ') ft.write('\n') if data.domain_emb_save_dir is not None: with open('data/domain_emb.txt', 'w') as fd: for domain, i in data.domain_alphabet.iteritems(): fd.write(domain) fd.write(' ') for t in model.word_hidden.LSTM_param_generator.domain_emb_vocab.weight.data[ i]: fd.write(str(round(t, 6))) fd.write(' ') fd.write('\n')