def ner_evaluate(data, wordrep, hiddenlist, model, name): if name == "train": instances = data.train_Ids elif name == "dev": instances = data.dev_Ids elif name == 'test': instances = data.test_Ids elif name == 'raw': instances = data.raw_Ids else: print "Error: wrong evaluate name,", name wordrep.eval() for hidden in hiddenlist: hidden.eval() model.eval() batch_size = data.HP_batch_size correct = 0 total = 0 train_num = len(instances) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = instances[start:end] if not instance: continue with torch.no_grad(): batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, _ \ = batchify_with_label(instance, data.HP_gpu, True) ner_word_rep = wordrep.forward(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, None, None) ner_hidden = ner_word_rep for i in range(opt.hidden_num): ner_lstm_out, ner_att_out = hiddenlist[i].forward( ner_hidden, batch_wordlen, False) ner_hidden = ner_lstm_out tag_seq = model(ner_hidden, mask) for idx in range(mask.shape[0]): for idy in range(mask.shape[1]): if mask[idx][idy] != 0: total += 1 if tag_seq[idx][idy] == batch_label[idx][idy]: correct += 1 acc = 1.0 * correct / total return acc
def ner_evaluateWhenTest(data, wordrep, hiddenlist, model): instances = data.raw_Ids nbest_pred_results = [] wordrep.eval() for hidden in hiddenlist: hidden.eval() model.eval() batch_size = data.HP_batch_size train_num = len(instances) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = instances[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, _ \ = batchify_with_label(instance, data.HP_gpu, True) ner_word_rep = wordrep.forward(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, None, None) ner_hidden = ner_word_rep for i in range(opt.hidden_num): ner_lstm_out, ner_att_out = hiddenlist[i].forward( ner_hidden, batch_wordlen, False) ner_hidden = ner_lstm_out scores, nbest_tag_seq = model.decode_nbest(ner_hidden, mask, data.nbest) nbest_pred_result = ner.recover_nbest_label(nbest_tag_seq, mask, data.label_alphabet, batch_wordrecover) nbest_pred_results += nbest_pred_result return nbest_pred_results
def joint_train(data, old_data, opt): if not os.path.exists(opt.output): os.makedirs(opt.output) if opt.pretrained_model_dir != 'None': seq_model = SeqModel(data) if opt.test_in_cpu: seq_model.load_state_dict( torch.load(os.path.join(opt.pretrained_model_dir, 'ner_model.pkl'), map_location='cpu')) else: seq_model.load_state_dict( torch.load( os.path.join(opt.pretrained_model_dir, 'ner_model.pkl'))) if (data.label_alphabet_size != seq_model.crf.tagset_size)\ or (data.HP_hidden_dim != seq_model.hidden2tag.weight.size(1)): raise RuntimeError("ner_model not compatible") seq_wordseq = WordSequence(data, False, True, True, True) if ((data.word_emb_dim != seq_wordseq.wordrep.word_embedding.embedding_dim)\ or (data.char_emb_dim != seq_wordseq.wordrep.char_feature.char_embeddings.embedding_dim)\ or (data.feature_emb_dims[0] != seq_wordseq.wordrep.feature_embedding_dims[0])\ or (data.feature_emb_dims[1] != seq_wordseq.wordrep.feature_embedding_dims[1])): raise RuntimeError("ner_wordseq not compatible") old_seq_wordseq = WordSequence(old_data, False, True, True, True) if opt.test_in_cpu: old_seq_wordseq.load_state_dict( torch.load(os.path.join(opt.pretrained_model_dir, 'ner_wordseq.pkl'), map_location='cpu')) else: old_seq_wordseq.load_state_dict( torch.load( os.path.join(opt.pretrained_model_dir, 'ner_wordseq.pkl'))) # sd = old_seq_wordseq.lstm.state_dict() for old, new in zip(old_seq_wordseq.lstm.parameters(), seq_wordseq.lstm.parameters()): new.data.copy_(old) vocab_size = old_seq_wordseq.wordrep.word_embedding.num_embeddings seq_wordseq.wordrep.word_embedding.weight.data[ 0: vocab_size, :] = old_seq_wordseq.wordrep.word_embedding.weight.data[ 0:vocab_size, :] vocab_size = old_seq_wordseq.wordrep.char_feature.char_embeddings.num_embeddings seq_wordseq.wordrep.char_feature.char_embeddings.weight.data[ 0: vocab_size, :] = old_seq_wordseq.wordrep.char_feature.char_embeddings.weight.data[ 0:vocab_size, :] for i, feature_embedding in enumerate( old_seq_wordseq.wordrep.feature_embeddings): vocab_size = feature_embedding.num_embeddings seq_wordseq.wordrep.feature_embeddings[i].weight.data[ 0:vocab_size, :] = feature_embedding.weight.data[ 0:vocab_size, :] # for word in data.word_alphabet.iteritems(): # # old_seq_wordseq.wordrep.word_embedding.weight.data data.word_alphabet.get_index(word) # classify_wordseq = WordSequence(data, True, False, True, False) if ((data.word_emb_dim != classify_wordseq.wordrep.word_embedding.embedding_dim)\ or (data.re_feature_emb_dims[data.re_feature_name2id['[POSITION]']] != classify_wordseq.wordrep.position1_emb.embedding_dim)\ or (data.feature_emb_dims[1] != classify_wordseq.wordrep.feature_embedding_dims[0])): raise RuntimeError("re_wordseq not compatible") old_classify_wordseq = WordSequence(old_data, True, False, True, False) if opt.test_in_cpu: old_classify_wordseq.load_state_dict( torch.load(os.path.join(opt.pretrained_model_dir, 're_wordseq.pkl'), map_location='cpu')) else: old_classify_wordseq.load_state_dict( torch.load( os.path.join(opt.pretrained_model_dir, 're_wordseq.pkl'))) for old, new in zip(old_classify_wordseq.lstm.parameters(), classify_wordseq.lstm.parameters()): new.data.copy_(old) vocab_size = old_classify_wordseq.wordrep.word_embedding.num_embeddings classify_wordseq.wordrep.word_embedding.weight.data[ 0: vocab_size, :] = old_classify_wordseq.wordrep.word_embedding.weight.data[ 0:vocab_size, :] vocab_size = old_classify_wordseq.wordrep.position1_emb.num_embeddings classify_wordseq.wordrep.position1_emb.weight.data[ 0: vocab_size, :] = old_classify_wordseq.wordrep.position1_emb.weight.data[ 0:vocab_size, :] vocab_size = old_classify_wordseq.wordrep.position2_emb.num_embeddings classify_wordseq.wordrep.position2_emb.weight.data[ 0: vocab_size, :] = old_classify_wordseq.wordrep.position2_emb.weight.data[ 0:vocab_size, :] vocab_size = old_classify_wordseq.wordrep.feature_embeddings[ 0].num_embeddings classify_wordseq.wordrep.feature_embeddings[0].weight.data[ 0:vocab_size, :] = old_classify_wordseq.wordrep.feature_embeddings[ 0].weight.data[0:vocab_size, :] classify_model = ClassifyModel(data) old_classify_model = ClassifyModel(old_data) if opt.test_in_cpu: old_classify_model.load_state_dict( torch.load(os.path.join(opt.pretrained_model_dir, 're_model.pkl'), map_location='cpu')) else: old_classify_model.load_state_dict( torch.load( os.path.join(opt.pretrained_model_dir, 're_model.pkl'))) if (data.re_feature_alphabet_sizes[data.re_feature_name2id['[RELATION]']] != old_classify_model.linear.weight.size(0) or (data.re_feature_emb_dims[data.re_feature_name2id['[ENTITY_TYPE]']] != old_classify_model.entity_type_emb.embedding_dim) \ or (data.re_feature_emb_dims[ data.re_feature_name2id['[ENTITY]']] != old_classify_model.entity_emb.embedding_dim) or (data.re_feature_emb_dims[ data.re_feature_name2id['[TOKEN_NUM]']] != old_classify_model.tok_num_betw_emb.embedding_dim) \ or (data.re_feature_emb_dims[ data.re_feature_name2id['[ENTITY_NUM]']] != old_classify_model.et_num_emb.embedding_dim) \ ): raise RuntimeError("re_model not compatible") vocab_size = old_classify_model.entity_type_emb.num_embeddings classify_model.entity_type_emb.weight.data[ 0:vocab_size, :] = old_classify_model.entity_type_emb.weight.data[ 0:vocab_size, :] vocab_size = old_classify_model.entity_emb.num_embeddings classify_model.entity_emb.weight.data[ 0:vocab_size, :] = old_classify_model.entity_emb.weight.data[ 0:vocab_size, :] vocab_size = old_classify_model.tok_num_betw_emb.num_embeddings classify_model.tok_num_betw_emb.weight.data[ 0:vocab_size, :] = old_classify_model.tok_num_betw_emb.weight.data[ 0:vocab_size, :] vocab_size = old_classify_model.et_num_emb.num_embeddings classify_model.et_num_emb.weight.data[ 0:vocab_size, :] = old_classify_model.et_num_emb.weight.data[ 0:vocab_size, :] else: seq_model = SeqModel(data) seq_wordseq = WordSequence(data, False, True, True, True) classify_wordseq = WordSequence(data, True, False, True, False) classify_model = ClassifyModel(data) iter_parameter = itertools.chain( *map(list, [seq_wordseq.parameters(), seq_model.parameters()])) seq_optimizer = optim.Adam(iter_parameter, lr=data.HP_lr, weight_decay=data.HP_l2) iter_parameter = itertools.chain(*map( list, [classify_wordseq.parameters(), classify_model.parameters()])) classify_optimizer = optim.Adam(iter_parameter, lr=data.HP_lr, weight_decay=data.HP_l2) if data.tune_wordemb == False: my_utils.freeze_net(seq_wordseq.wordrep.word_embedding) my_utils.freeze_net(classify_wordseq.wordrep.word_embedding) re_X_positive = [] re_Y_positive = [] re_X_negative = [] re_Y_negative = [] relation_vocab = data.re_feature_alphabets[ data.re_feature_name2id['[RELATION]']] my_collate = my_utils.sorted_collate1 for i in range(len(data.re_train_X)): x = data.re_train_X[i] y = data.re_train_Y[i] if y != relation_vocab.get_index("</unk>"): re_X_positive.append(x) re_Y_positive.append(y) else: re_X_negative.append(x) re_Y_negative.append(y) re_dev_loader = DataLoader(my_utils.RelationDataset( data.re_dev_X, data.re_dev_Y), data.HP_batch_size, shuffle=False, collate_fn=my_collate) # re_test_loader = DataLoader(my_utils.RelationDataset(data.re_test_X, data.re_test_Y), data.HP_batch_size, shuffle=False, collate_fn=my_collate) best_ner_score = -1 best_re_score = -1 count_performance_not_grow = 0 for idx in range(data.HP_iteration): epoch_start = time.time() seq_wordseq.train() seq_wordseq.zero_grad() seq_model.train() seq_model.zero_grad() classify_wordseq.train() classify_wordseq.zero_grad() classify_model.train() classify_model.zero_grad() batch_size = data.HP_batch_size random.shuffle(data.train_Ids) ner_train_num = len(data.train_Ids) ner_total_batch = ner_train_num // batch_size + 1 re_train_loader, re_train_iter = makeRelationDataset( re_X_positive, re_Y_positive, re_X_negative, re_Y_negative, data.unk_ratio, True, my_collate, data.HP_batch_size) re_total_batch = len(re_train_loader) total_batch = max(ner_total_batch, re_total_batch) min_batch = min(ner_total_batch, re_total_batch) for batch_id in range(total_batch): if batch_id < ner_total_batch: start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > ner_train_num: end = ner_train_num instance = data.train_Ids[start:end] batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, \ batch_permute_label = batchify_with_label(instance, data.HP_gpu) hidden = seq_wordseq.forward(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, None, None) hidden_adv = None loss, tag_seq = seq_model.neg_log_likelihood_loss( hidden, hidden_adv, batch_label, mask) loss.backward() seq_optimizer.step() seq_wordseq.zero_grad() seq_model.zero_grad() if batch_id < re_total_batch: [batch_word, batch_features, batch_wordlen, batch_wordrecover, \ batch_char, batch_charlen, batch_charrecover, \ position1_seq_tensor, position2_seq_tensor, e1_token, e1_length, e2_token, e2_length, e1_type, e2_type, \ tok_num_betw, et_num], [targets, targets_permute] = my_utils.endless_get_next_batch_without_rebatch1( re_train_loader, re_train_iter) hidden = classify_wordseq.forward(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, position1_seq_tensor, position2_seq_tensor) hidden_adv = None loss, pred = classify_model.neg_log_likelihood_loss( hidden, hidden_adv, batch_wordlen, e1_token, e1_length, e2_token, e2_length, e1_type, e2_type, tok_num_betw, et_num, targets) loss.backward() classify_optimizer.step() classify_wordseq.zero_grad() classify_model.zero_grad() epoch_finish = time.time() logging.info("epoch: %s training finished. Time: %.2fs" % (idx, epoch_finish - epoch_start)) _, _, _, _, ner_score, _, _ = ner.evaluate(data, seq_wordseq, seq_model, "dev") logging.info("ner evaluate: f: %.4f" % (ner_score)) if ner_score > best_ner_score: logging.info("new best score: ner: %.4f" % (ner_score)) best_ner_score = ner_score torch.save(seq_wordseq.state_dict(), os.path.join(opt.output, 'ner_wordseq.pkl')) torch.save(seq_model.state_dict(), os.path.join(opt.output, 'ner_model.pkl')) count_performance_not_grow = 0 # _, _, _, _, test_ner_score, _, _ = ner.evaluate(data, seq_wordseq, seq_model, "test") # logging.info("ner evaluate on test: f: %.4f" % (test_ner_score)) else: count_performance_not_grow += 1 re_score = relation_extraction.evaluate(classify_wordseq, classify_model, re_dev_loader) logging.info("re evaluate: f: %.4f" % (re_score)) if re_score > best_re_score: logging.info("new best score: re: %.4f" % (re_score)) best_re_score = re_score torch.save(classify_wordseq.state_dict(), os.path.join(opt.output, 're_wordseq.pkl')) torch.save(classify_model.state_dict(), os.path.join(opt.output, 're_model.pkl')) count_performance_not_grow = 0 # test_re_score = relation_extraction.evaluate(classify_wordseq, classify_model, re_test_loader) # logging.info("re evaluate on test: f: %.4f" % (test_re_score)) else: count_performance_not_grow += 1 if count_performance_not_grow > 2 * data.patience: logging.info("early stop") break logging.info("train finished")
def train(data, ner_dir, re_dir): task_num = 2 init_alpha = 1.0 / task_num if opt.hidden_num <= 1: step_alpha = 0 else: step_alpha = (1.0 - init_alpha) / (opt.hidden_num - 1) ner_wordrep = WordRep(data, False, True, True, data.use_char) ner_hiddenlist = [] for i in range(opt.hidden_num): if i == 0: input_size = data.word_emb_dim+data.HP_char_hidden_dim+data.feature_emb_dims[data.feature_name2id['[Cap]']]+ \ data.feature_emb_dims[data.feature_name2id['[POS]']] output_size = data.HP_hidden_dim else: input_size = data.HP_hidden_dim output_size = data.HP_hidden_dim temp = HiddenLayer(data, input_size, output_size) ner_hiddenlist.append(temp) seq_model = SeqModel(data) re_wordrep = WordRep(data, True, False, True, False) re_hiddenlist = [] for i in range(opt.hidden_num): if i == 0: input_size = data.word_emb_dim + data.feature_emb_dims[data.feature_name2id['[POS]']]+\ 2*data.re_feature_emb_dims[data.re_feature_name2id['[POSITION]']] output_size = data.HP_hidden_dim else: input_size = data.HP_hidden_dim output_size = data.HP_hidden_dim temp = HiddenLayer(data, input_size, output_size) re_hiddenlist.append(temp) classify_model = ClassifyModel(data) iter_parameter = itertools.chain( *map(list, [ner_wordrep.parameters(), seq_model.parameters()] + [f.parameters() for f in ner_hiddenlist])) ner_optimizer = optim.Adam(iter_parameter, lr=data.HP_lr, weight_decay=data.HP_l2) iter_parameter = itertools.chain( *map(list, [re_wordrep.parameters(), classify_model.parameters()] + [f.parameters() for f in re_hiddenlist])) re_optimizer = optim.Adam(iter_parameter, lr=data.HP_lr, weight_decay=data.HP_l2) if data.tune_wordemb == False: my_utils.freeze_net(ner_wordrep.word_embedding) my_utils.freeze_net(re_wordrep.word_embedding) re_X_positive = [] re_Y_positive = [] re_X_negative = [] re_Y_negative = [] relation_vocab = data.re_feature_alphabets[ data.re_feature_name2id['[RELATION]']] my_collate = my_utils.sorted_collate1 for i in range(len(data.re_train_X)): x = data.re_train_X[i] y = data.re_train_Y[i] if y != relation_vocab.get_index("</unk>"): re_X_positive.append(x) re_Y_positive.append(y) else: re_X_negative.append(x) re_Y_negative.append(y) re_test_loader = DataLoader(my_utils.RelationDataset( data.re_test_X, data.re_test_Y), data.HP_batch_size, shuffle=False, collate_fn=my_collate) best_ner_score = -1 best_re_score = -1 for idx in range(data.HP_iteration): epoch_start = time.time() ner_wordrep.train() ner_wordrep.zero_grad() for hidden_layer in ner_hiddenlist: hidden_layer.train() hidden_layer.zero_grad() seq_model.train() seq_model.zero_grad() re_wordrep.train() re_wordrep.zero_grad() for hidden_layer in re_hiddenlist: hidden_layer.train() hidden_layer.zero_grad() classify_model.train() classify_model.zero_grad() batch_size = data.HP_batch_size random.shuffle(data.train_Ids) ner_train_num = len(data.train_Ids) ner_total_batch = ner_train_num // batch_size + 1 re_train_loader, re_train_iter = makeRelationDataset( re_X_positive, re_Y_positive, re_X_negative, re_Y_negative, data.unk_ratio, True, my_collate, data.HP_batch_size) re_total_batch = len(re_train_loader) total_batch = max(ner_total_batch, re_total_batch) min_batch = min(ner_total_batch, re_total_batch) for batch_id in range(total_batch): if batch_id < min_batch: start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > ner_train_num: end = ner_train_num instance = data.train_Ids[start:end] ner_batch_word, ner_batch_features, ner_batch_wordlen, ner_batch_wordrecover, ner_batch_char, ner_batch_charlen, \ ner_batch_charrecover, ner_batch_label, ner_mask, ner_batch_permute_label = batchify_with_label(instance, data.HP_gpu) [re_batch_word, re_batch_features, re_batch_wordlen, re_batch_wordrecover, re_batch_char, re_batch_charlen, re_batch_charrecover, re_position1_seq_tensor, re_position2_seq_tensor, re_e1_token, re_e1_length, re_e2_token, re_e2_length, re_e1_type, re_e2_type, re_tok_num_betw, re_et_num], [re_targets, re_targets_permute] = \ my_utils.endless_get_next_batch_without_rebatch1(re_train_loader, re_train_iter) if ner_batch_word.size(0) != re_batch_word.size(0): continue # if batch size is not equal, we ignore such batch ner_word_rep = ner_wordrep.forward( ner_batch_word, ner_batch_features, ner_batch_wordlen, ner_batch_char, ner_batch_charlen, ner_batch_charrecover, None, None) re_word_rep = re_wordrep.forward( re_batch_word, re_batch_features, re_batch_wordlen, re_batch_char, re_batch_charlen, re_batch_charrecover, re_position1_seq_tensor, re_position2_seq_tensor) alpha = init_alpha ner_hidden = ner_word_rep re_hidden = re_word_rep for i in range(opt.hidden_num): if alpha > 0.99: use_attn = False ner_lstm_out, ner_att_out = ner_hiddenlist[i].forward( ner_hidden, ner_batch_wordlen, use_attn) re_lstm_out, re_att_out = re_hiddenlist[i].forward( re_hidden, re_batch_wordlen, use_attn) ner_hidden = ner_lstm_out re_hidden = re_lstm_out else: use_attn = True ner_lstm_out, ner_att_out = ner_hiddenlist[i].forward( ner_hidden, ner_batch_wordlen, use_attn) re_lstm_out, re_att_out = re_hiddenlist[i].forward( re_hidden, re_batch_wordlen, use_attn) ner_hidden = alpha * ner_lstm_out + ( 1 - alpha) * re_att_out.unsqueeze(1) re_hidden = alpha * re_lstm_out + ( 1 - alpha) * ner_att_out.unsqueeze(1) alpha += step_alpha ner_loss, ner_tag_seq = seq_model.neg_log_likelihood_loss( ner_hidden, ner_batch_label, ner_mask) re_loss, re_pred = classify_model.neg_log_likelihood_loss( re_hidden, re_batch_wordlen, re_e1_token, re_e1_length, re_e2_token, re_e2_length, re_e1_type, re_e2_type, re_tok_num_betw, re_et_num, re_targets) ner_loss.backward(retain_graph=True) re_loss.backward() ner_optimizer.step() re_optimizer.step() ner_wordrep.zero_grad() for hidden_layer in ner_hiddenlist: hidden_layer.zero_grad() seq_model.zero_grad() re_wordrep.zero_grad() for hidden_layer in re_hiddenlist: hidden_layer.zero_grad() classify_model.zero_grad() else: if batch_id < ner_total_batch: start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > ner_train_num: end = ner_train_num instance = data.train_Ids[start:end] batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, \ batch_permute_label = batchify_with_label(instance, data.HP_gpu) ner_word_rep = ner_wordrep.forward( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, None, None) ner_hidden = ner_word_rep for i in range(opt.hidden_num): ner_lstm_out, ner_att_out = ner_hiddenlist[i].forward( ner_hidden, batch_wordlen, False) ner_hidden = ner_lstm_out loss, tag_seq = seq_model.neg_log_likelihood_loss( ner_hidden, batch_label, mask) loss.backward() ner_optimizer.step() ner_wordrep.zero_grad() for hidden_layer in ner_hiddenlist: hidden_layer.zero_grad() seq_model.zero_grad() if batch_id < re_total_batch: [batch_word, batch_features, batch_wordlen, batch_wordrecover, \ batch_char, batch_charlen, batch_charrecover, \ position1_seq_tensor, position2_seq_tensor, e1_token, e1_length, e2_token, e2_length, e1_type, e2_type, \ tok_num_betw, et_num], [targets, targets_permute] = my_utils.endless_get_next_batch_without_rebatch1( re_train_loader, re_train_iter) re_word_rep = re_wordrep.forward( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, position1_seq_tensor, position2_seq_tensor) re_hidden = re_word_rep for i in range(opt.hidden_num): re_lstm_out, re_att_out = re_hiddenlist[i].forward( re_hidden, batch_wordlen, False) re_hidden = re_lstm_out loss, pred = classify_model.neg_log_likelihood_loss( re_hidden, batch_wordlen, e1_token, e1_length, e2_token, e2_length, e1_type, e2_type, tok_num_betw, et_num, targets) loss.backward() re_optimizer.step() re_wordrep.zero_grad() for hidden_layer in re_hiddenlist: hidden_layer.zero_grad() classify_model.zero_grad() epoch_finish = time.time() print("epoch: %s training finished. Time: %.2fs" % (idx, epoch_finish - epoch_start)) ner_score = ner_evaluate(data, ner_wordrep, ner_hiddenlist, seq_model, "test") print("ner evaluate: f: %.4f" % (ner_score)) re_score = re_evaluate(re_wordrep, re_hiddenlist, classify_model, re_test_loader) print("re evaluate: f: %.4f" % (re_score)) if ner_score + re_score > best_ner_score + best_re_score: print("new best score: ner: %.4f , re: %.4f" % (ner_score, re_score)) best_ner_score = ner_score best_re_score = re_score torch.save(ner_wordrep.state_dict(), os.path.join(ner_dir, 'wordrep.pkl')) for i, hidden_layer in enumerate(ner_hiddenlist): torch.save(hidden_layer.state_dict(), os.path.join(ner_dir, 'hidden_{}.pkl'.format(i))) torch.save(seq_model.state_dict(), os.path.join(ner_dir, 'model.pkl')) torch.save(re_wordrep.state_dict(), os.path.join(re_dir, 'wordrep.pkl')) for i, hidden_layer in enumerate(re_hiddenlist): torch.save(hidden_layer.state_dict(), os.path.join(re_dir, 'hidden_{}.pkl'.format(i))) torch.save(classify_model.state_dict(), os.path.join(re_dir, 'model.pkl'))
def pipeline(data, ner_dir, re_dir): seq_model = SeqModel(data) seq_wordseq = WordSequence(data, False, True, True, data.use_char) classify_wordseq = WordSequence(data, True, False, True, False) classify_model = ClassifyModel(data) if torch.cuda.is_available(): classify_model = classify_model.cuda(data.HP_gpu) iter_parameter = itertools.chain( *map(list, [seq_wordseq.parameters(), seq_model.parameters()])) seq_optimizer = optim.Adam(iter_parameter, lr=opt.ner_lr, weight_decay=data.HP_l2) iter_parameter = itertools.chain(*map( list, [classify_wordseq.parameters(), classify_model.parameters()])) classify_optimizer = optim.Adam(iter_parameter, lr=opt.re_lr, weight_decay=data.HP_l2) if data.tune_wordemb == False: my_utils.freeze_net(seq_wordseq.wordrep.word_embedding) my_utils.freeze_net(classify_wordseq.wordrep.word_embedding) re_X_positive = [] re_Y_positive = [] re_X_negative = [] re_Y_negative = [] relation_vocab = data.re_feature_alphabets[ data.re_feature_name2id['[RELATION]']] my_collate = my_utils.sorted_collate1 for i in range(len(data.re_train_X)): x = data.re_train_X[i] y = data.re_train_Y[i] if y != relation_vocab.get_index("</unk>"): re_X_positive.append(x) re_Y_positive.append(y) else: re_X_negative.append(x) re_Y_negative.append(y) re_test_loader = DataLoader(my_utils.RelationDataset( data.re_test_X, data.re_test_Y), data.HP_batch_size, shuffle=False, collate_fn=my_collate) best_ner_score = -1 best_re_score = -1 for idx in range(data.HP_iteration): epoch_start = time.time() seq_wordseq.train() seq_wordseq.zero_grad() seq_model.train() seq_model.zero_grad() classify_wordseq.train() classify_wordseq.zero_grad() classify_model.train() classify_model.zero_grad() batch_size = data.HP_batch_size random.shuffle(data.train_Ids) ner_train_num = len(data.train_Ids) ner_total_batch = ner_train_num // batch_size + 1 re_train_loader, re_train_iter = makeRelationDataset( re_X_positive, re_Y_positive, re_X_negative, re_Y_negative, data.unk_ratio, True, my_collate, data.HP_batch_size) re_total_batch = len(re_train_loader) total_batch = max(ner_total_batch, re_total_batch) min_batch = min(ner_total_batch, re_total_batch) for batch_id in range(total_batch): if batch_id < ner_total_batch: start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > ner_train_num: end = ner_train_num instance = data.train_Ids[start:end] batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, \ batch_permute_label = batchify_with_label(instance, data.HP_gpu) hidden = seq_wordseq.forward(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, None, None) hidden_adv = None loss, tag_seq = seq_model.neg_log_likelihood_loss( hidden, hidden_adv, batch_label, mask) loss.backward() seq_optimizer.step() seq_wordseq.zero_grad() seq_model.zero_grad() if batch_id < re_total_batch: [batch_word, batch_features, batch_wordlen, batch_wordrecover, \ batch_char, batch_charlen, batch_charrecover, \ position1_seq_tensor, position2_seq_tensor, e1_token, e1_length, e2_token, e2_length, e1_type, e2_type, \ tok_num_betw, et_num], [targets, targets_permute] = my_utils.endless_get_next_batch_without_rebatch1( re_train_loader, re_train_iter) hidden = classify_wordseq.forward(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, position1_seq_tensor, position2_seq_tensor) hidden_adv = None loss, pred = classify_model.neg_log_likelihood_loss( hidden, hidden_adv, batch_wordlen, e1_token, e1_length, e2_token, e2_length, e1_type, e2_type, tok_num_betw, et_num, targets) loss.backward() classify_optimizer.step() classify_wordseq.zero_grad() classify_model.zero_grad() epoch_finish = time.time() print("epoch: %s training finished. Time: %.2fs" % (idx, epoch_finish - epoch_start)) # _, _, _, _, f, _, _ = ner.evaluate(data, seq_wordseq, seq_model, "test") ner_score = ner.evaluate1(data, seq_wordseq, seq_model, "test") print("ner evaluate: f: %.4f" % (ner_score)) re_score = relation_extraction.evaluate(classify_wordseq, classify_model, re_test_loader) print("re evaluate: f: %.4f" % (re_score)) if ner_score + re_score > best_ner_score + best_re_score: print("new best score: ner: %.4f , re: %.4f" % (ner_score, re_score)) best_ner_score = ner_score best_re_score = re_score torch.save(seq_wordseq.state_dict(), os.path.join(ner_dir, 'wordseq.pkl')) torch.save(seq_model.state_dict(), os.path.join(ner_dir, 'model.pkl')) torch.save(classify_wordseq.state_dict(), os.path.join(re_dir, 'wordseq.pkl')) torch.save(classify_model.state_dict(), os.path.join(re_dir, 'model.pkl'))