def test_model(data, decoder, sess, show_rate, is_visualize, simple=True): sents_id = [] predictes = [] gold = [] for batch_i in range(data.batch_number): batch_data = data.next_batch(is_random=False) predict_answer = decoder.predict(batch_data, sess) gold_answer = batch_data.all_triples predictes.extend(predict_answer) gold.extend(gold_answer) sents_id.extend(batch_data.sentence_fw) try: assert len(predictes) == len(gold) except AssertionError: logger.info('Error, predictes number (%d) not equal gold number (%d)' % (len(predictes), len(gold))) exit() f1, precision, recall = evaluation.compare(predictes, gold, config, show_rate, simple=simple) if not simple: evaluation.error_analyse(predictes, gold, config, entity_or_relation='entity') evaluation.error_analyse(predictes, gold, config, entity_or_relation='relation') if is_visualize: visualize_normal_file = os.path.join(config.runner_path, 'visualize_normal_instance.txt') visualize_multi_file = os.path.join(config.runner_path, 'visualize_multi_instance.txt') visualize_overlap_file = os.path.join(config.runner_path, 'visualize_overlap_instance.txt') print visualize_normal_file print visualize_multi_file print visualize_overlap_file evaluation.visualize(sents_id, gold, predictes, [visualize_normal_file, visualize_multi_file, visualize_overlap_file], config) return f1, precision, recall
def experiment_data_autocorrelation(): """Test how the hidden labels in the data correlate among themselves""" testset = generate_dataset(N_train) ls = testset.get_layers()[1:] # Compare the (hidden) labels to themselves metrics = compare(ls, ls) return locals()
def rel_test(self, seq2seq) -> Tuple[Tuple[float, float, float]]: predicts = [] gold = [] loss = 0.0 data = prepare.load_data(self.mode) if mode == 'test': data = prepare.test_process(data) else: data = prepare.process(data) data = data_prepare.Data(data, config.batch_size, config) for batch_i in tqdm(range(data.batch_number)): batch_data = data.next_batch(is_random=False) pred_action_list, pred_logits_list = self.test_step( batch_data, seq2seq) predicts.extend(pred_action_list) gold.extend(batch_data.all_triples) mean_loss = 0.0 if self.config.losstype == 1: ##1.原来################################### for t in range(seq2seq.decoder.decodelen): # print(pred_logits_list[t]) mean_loss = mean_loss + F.nll_loss( pred_logits_list[t], torch.from_numpy(batch_data.standard_outputs).to( self.device).to(torch.long)[:, t]) # print(pred_logits_list[t], # torch.from_numpy(batch_data.standard_outputs).to(self.device).to( # torch.long)[:, t]) # print(torch.from_numpy(batch_data.standard_outputs).to(self.device).to( # torch.long)[:, t],pred_logits_list[t].shape,F.nll_loss(pred_logits_list[t], # torch.from_numpy(batch_data.standard_outputs).to(self.device).to( # torch.long)[:, t]),loss) mean_loss /= pred_logits_list[0].shape[0] if (batch_i < 1000): loss += mean_loss loss /= 1000 f1, precision, recall = evaluation.compare(predicts, gold, self.config, show_rate=None, simple=True) (r_f1, r_precision, r_recall), (e_f1, e_precision, e_recall) = evaluation.rel_entity_compare( predicts, gold, self.config) return loss.item(), (f1, precision, recall), (r_f1, r_precision, r_recall), (e_f1, e_precision, e_recall)
def test(self) -> Tuple[float, float, float]: predicts = [] gold = [] for batch_i in range(self.data.batch_number): batch_data = self.data.next_batch(is_random=False) pred_action_list, pred_logits_list = self.test_step(batch_data) pred_action_list = pred_action_list.cpu().numpy() predicts.extend(pred_action_list) gold.extend(batch_data.all_triples) f1, precision, recall = evaluation.compare(predicts, gold, self.config, show_rate=None, simple=True) self.data.reset() return f1, precision, recall
def train_step(self, batch: data_prepare.InputData) -> torch.Tensor: self.optimizer.zero_grad() sentence = batch.sentence_fw sentence_eos = batch.input_sentence_append_eos all_events = batch.standard_outputs all_triples = batch.all_triples all_events = torch.from_numpy(all_events).to(self.device).to( torch.long) sentence = torch.from_numpy(sentence).to(self.device) sentence_eos = torch.from_numpy(sentence_eos).to(self.device) lengths = torch.Tensor(batch.input_sentence_length).int().tolist() pred_action_list, pred_logits_list = self.seq2seq( sentence, sentence_eos, lengths) if self.config.losstype == 1: ##1.原来################################### loss = 0 for t in range(self.seq2seq.decoder.decodelen): # print(pred_logits_list[t]) loss = loss + self.loss(pred_logits_list[t], all_events[:, t]) # break (r_f1, r_precision, r_recall), (e_f1, e_precision, e_recall) = evaluation.rel_entity_compare( pred_action_list, batch.all_triples, self.config) f1, precision, recall = evaluation.compare(pred_action_list, batch.all_triples, self.config, show_rate=None, simple=True) loss.backward() self.optimizer.step() return loss, (f1, precision, recall), (r_f1, r_precision, r_recall), (e_f1, e_precision, e_recall)
def test(self) -> Tuple[float, float, float]: predicts = [] gold = [] data = prepare.load_data(mode) if mode == 'test': data = prepare.test_process(data) else: data = prepare.process(data) data = data_prepare.Data(data, config.batch_size, config) for batch_i in range(data.batch_number): batch_data = data.next_batch(is_random=False) pred_action_list, pred_logits_list = self.test_step(batch_data) pred_action_list = pred_action_list.cpu().numpy() predicts.extend(pred_action_list) gold.extend(batch_data.all_triples) f1, precision, recall = evaluation.compare(predicts, gold, self.config, show_rate=None, simple=True) data.reset() return f1, precision, recall
def event_test(self, seq2seq) -> Tuple[Tuple[float, float, float]]: predicts = [] gold = [] data = prepare.load_data(self.mode) if mode == 'test': data = prepare.test_process(data) else: data = prepare.process(data) data = data_prepare.Data(data, config.batch_size, config) loss = 0.0 # Loss=nn.NLLLoss() for batch_i in tqdm(range(data.batch_number)): batch_data = data.next_batch(is_random=False) pred_action_list, pred_logits_list = self.test_step( batch_data, seq2seq) pred_action_list = pred_action_list.cpu().numpy() gold.extend(batch_data.standard_outputs) # for i in range() predicts.extend([ pred_action_list[:, i] for i in range(pred_action_list.shape[1]) ]) if decoder_type == 'onecrf': loss += pred_logits_list #crf的时候输出的是loss else: if self.config.losstype == 1: ##1.原来################################### for t in range(seq2seq.decoder.decodelen): # print(pred_logits_list[t]) loss = loss + F.nll_loss( pred_logits_list[t], torch.from_numpy(batch_data.standard_outputs).to( self.device).to(torch.long)[:, t]) elif self.config.losstype == 2: ##2.loss2,排列组合################################### all_events = torch.from_numpy( batch_data.standard_outputs).to(self.device).to( torch.long) all_triples = batch_data.all_triples lengths = batch_data.input_sentence_length # print(pred_logits_list) # print(pred_action_list) for i in range(all_events.shape[0]): # print(all_triples[i]) now_loss = 0. triple_num = min( len(all_triples[i]) // (lengths[i] + 1), self.config.triple_number) # print(pred_action_list[:self.max_sentence_length*triple_num,i].shape) # pred_logits_list_event[:1+triple_num,i] # print(pred_logits_list_entity[:self.max_sentence_length*triple_num,i].shape) for j in range(triple_num): glob = all_events[i, j * (self.max_sentence_length + 1):(j + 1) * (self.max_sentence_length + 1)] # print(glob.shape) for k in range(triple_num): # print(pred_logits_list.shape,pred_logits_list[k*(self.max_sentence_length+1):(k+1)*(self.max_sentence_length+1),i].shape) now_loss += F.nll_loss( pred_logits_list[ k * (self.max_sentence_length + 1):(k + 1) * (self.max_sentence_length + 1), i], glob) if (triple_num != 0): now_loss /= (triple_num * triple_num) # print(pred_logits_list[triple_num*(self.max_sentence_length+1)+1,i],all_events[i,triple_num*(self.max_sentence_length+1)+1]) loss += now_loss + F.nll_loss( pred_logits_list[ triple_num * (self.max_sentence_length + 1):triple_num * (self.max_sentence_length + 1) + 1, i], all_events[i, triple_num * (self.max_sentence_length + 1):triple_num * (self.max_sentence_length + 1) + 1]) # for t in range(seq2seq.decoder.decodelen): # # print(pred_logits_list[t], batch_data.standard_outputs[:, t]) # loss += F.nll_loss(pred_logits_list[t],torch.from_numpy(batch_data.standard_outputs).to(self.device).to(torch.long)[:, t]).item() # print(loss) loss /= batch_i # for g in gold: # for i in range(5): # l=g[i*(config.max_sentence_length+1):(i+1)*(config.max_sentence_length+1)] # if(l[0]>30): # print(l) require_f1, require_precision, require_recall = evaluation.event_entity_yaoqiu_compare( predicts, gold, self.config) f1, precision, recall = evaluation.compare(predicts, gold, self.config, show_rate=None, simple=True) (r_f1, r_precision, r_recall), (e_f1, e_precision, e_recall) = evaluation.rel_entity_compare( predicts, gold, self.config) (event_f1, event_precision, event_recall), (entity_f1, entity_precision, entity_recall) = evaluation.event_entity_compare( predicts, gold, self.config) data.reset() return loss.item(), (require_f1, require_precision, require_recall), ( f1, precision, r_recall), (r_f1, r_precision, r_recall), ( e_f1, e_precision, e_recall), (event_f1, event_precision, event_recall), (entity_f1, entity_precision, entity_recall)
def main(): emb = gensim.models.KeyedVectors.load_word2vec_format(embeddings_path, binary=False) #emb = {} featurizer = W2V_POS_DEPREL_Featurizer(emb, device) input_dim = featurizer.input_dim criterion = nn.CrossEntropyLoss() model = BiLSTMParser(input_dim, 100, 3, device) optimizer = torch.optim.Adam(model.parameters()) train_sentences = load_conllu(TRAIN_PATH) dev_sentences = load_conllu(DEV_PATH) test_sentences = load_conllu(TEST_PATH) top_f1 = -float("inf") for e in range(epochs): model.train() total_loss = 0 train_indices = list(range(len(train_sentences))) random.shuffle(train_indices) for i in tqdm(train_indices): s = train_sentences[i] total_loss += train_sent(model, s, featurizer, criterion, optimizer) print("Train loss: ", total_loss) model.eval() corr = 0 total = 0 for sent in dev_sentences[-10:]: print_example(model, sent, featurizer) TP, FP, FN, SENTS = (0, 0, 0, 0) no_sents = 0 dev_loss = 0 for s in tqdm(dev_sentences): ncorr, ntotal, iob_out, loss = test_sent(model, s, featurizer, criterion) dev_loss += loss corr += ncorr total += ntotal fixed_iob = fix(iob_out) iob_ann = "".join([tok["misc"]["iob"] for tok in s]) comparison = compare(iob_ann, fixed_iob) TP += comparison[0] FP += comparison[1] FN += comparison[2] SENTS += int(fixed_iob == iob_ann) no_sents += 1 precision = TP / (TP + FP) recall = TP / (TP + FN) f1 = 2 * (precision * recall) / (precision + recall) sent_acc = SENTS / no_sents print("precision: ", precision) print("recall: ", recall) print("f1: ", f1) print("sents_acc: ", sent_acc) print("Dev loss: ", dev_loss) print(corr / total) if f1 > top_f1: top_f1 = f1 torch.save(model.state_dict(), type(model).__name__ + ".model") # test set evaluation state_dict = torch.load(type(model).__name__ + ".model") model.load_state_dict(state_dict) model.eval() corr = 0 total = 0 TP, FP, FN, SENTS = (0, 0, 0, 0) no_sents = 0 for s in tqdm(test_sentences): ncorr, ntotal, iob_out, loss = test_sent(model, s, featurizer, criterion) corr += ncorr total += ntotal fixed_iob = fix(iob_out) iob_ann = "".join([tok["misc"]["iob"] for tok in s]) comparison = compare(iob_ann, fixed_iob) TP += comparison[0] FP += comparison[1] FN += comparison[2] SENTS += int(fixed_iob == iob_ann) no_sents += 1 precision = TP / (TP + FP) recall = TP / (TP + FN) f1 = 2 * (precision * recall) / (precision + recall) sent_acc = SENTS / no_sents print("\n\n TEST SET\n\n") print("precision: ", precision) print("recall: ", recall) print("f1: ", f1) print("sents_acc: ", sent_acc) print(corr / total)