def evaluate(self, label_list, seq_len_list, data, epoch=None): def get_f1(pred, truth): p_hit = 0 p_not = 0 n_hit = 0 n_not = 0 acc1 = 0 acc2 = 0 for it in zip(pred, truth): if it[0] == it[1] and sum(it[1]): p_hit += 1 if it[0] != it[1] and sum(it[1]): p_not += 1 if it[0] != it[1] and not sum(it[1]): n_not += 1 if it[0] == it[1] and not sum(it[1]): n_hit += 1 if it[0] == it[1]: acc1 += 1 cnt = 0 for i in range(len(it[0])): if it[0][i] == it[1][i]: cnt +=1 acc2 += (cnt/len(it[0])) P = p_hit / (p_hit + p_not) N = n_hit / (n_hit + n_not) F1 = P * N * 2 / (P + N +0.00001) ACC1 = acc1 / len(pred) ACC2 = acc2 / len(pred) return P, N, F1, ACC1, ACC2 true_label = [] pred_label = [] sent_list = [] for label_, (sent, tag) in zip(label_list, data): p_ind = [] t_ind = [] if sum(label_) == 0 and sum(tag) == 0: pass else: for i, p in enumerate(label_): if p: p_ind.append(i) for i, p in enumerate(tag): if p: t_ind.append(i) sent_list.append(id2sentence(sent, self.vocab)+'\n'+str(p_ind)+'VS'+str(t_ind)) true_label.append(tag) pred_label.append(label_) import codecs with codecs.open('result.txt','w',encoding='utf-8') as f: f.write('\n'.join(sent_list)) print(get_f1(pred_label, true_label))
def split_sent(self, label_list,seq_len_list, data): sent_list = [] for label_, (sent, tag) in zip(label_list, data): split_ind = [] for i, p in enumerate(label_): if p: split_ind.append(i) resent = id2sentence(sent, self.vocab).split() out_sent = copy.deepcopy(resent) for ind in range(len(resent)): if ind in split_ind: out_sent[ind] = resent[ind] + '。' else: out_sent[ind] = resent[ind] sent_list.append(''.join(out_sent)) return ' '.join(sent_list)
for target_sentence in target_batch: target_sentence = data_helper.sentence2id( target_sentence, vocab=decoder_vocab, max_sentence_length=decoder_sentence_length, is_target=True) target_token_ids.append(target_sentence) batch_preds, batch_loss, _ = sess.run( [predictions, loss, train_op], feed_dict={ encoder_inputs: input_token_ids, encoder_sequence_length: input_sentence_lengths, decoder_inputs: target_token_ids, }) epoch_loss += batch_loss all_preds.append(batch_preds) if epoch % 400 == 0: print 'Epoch={}'.format(epoch) print 'Epoch loss={}'.format(epoch_loss) for input_batch, target_batch, batch_preds in zip( input_batches, target_batches, all_preds): for input_sentence, target_sentence, pred in zip( input_batch, target_batch, batch_preds): print '\t' + input_sentence print '\t => ' + data_helper.id2sentence( pred, reverse_vocab=decoder_reverse_vocab) print '\tCorrect answer:' + target_sentence