Ejemplo n.º 1
0
 def evaluate(self, label_list, seq_len_list, data, epoch=None):
     def get_f1(pred, truth):
         p_hit = 0
         p_not = 0
         n_hit = 0
         n_not = 0
         acc1 = 0
         acc2 = 0
         for it in zip(pred, truth):
             if it[0] == it[1] and sum(it[1]):
                 p_hit += 1
             if it[0] != it[1] and sum(it[1]):
                 p_not += 1
             if it[0] != it[1] and not sum(it[1]):
                 n_not += 1
             if it[0] == it[1] and not sum(it[1]):
                 n_hit += 1
             if it[0] == it[1]:
                 acc1 += 1
             cnt = 0
             for i in range(len(it[0])):
                 if it[0][i] == it[1][i]:
                     cnt +=1
             acc2 += (cnt/len(it[0]))
         P = p_hit / (p_hit + p_not)
         N = n_hit / (n_hit + n_not)
         F1 = P * N * 2 / (P + N +0.00001)
         ACC1 = acc1 / len(pred)
         ACC2 = acc2 / len(pred)
         return P, N, F1, ACC1, ACC2
     true_label = []
     pred_label = []
     sent_list = []
     for label_, (sent, tag) in zip(label_list, data):
         p_ind = []
         t_ind = []
         if sum(label_) == 0 and sum(tag) == 0:
             pass
         else:
             for i, p in enumerate(label_):
                 if p:
                     p_ind.append(i)
             for i, p in enumerate(tag):
                 if p:
                     t_ind.append(i)
             sent_list.append(id2sentence(sent, self.vocab)+'\n'+str(p_ind)+'VS'+str(t_ind))
         true_label.append(tag)
         pred_label.append(label_)
     import codecs
     with codecs.open('result.txt','w',encoding='utf-8') as f:
         f.write('\n'.join(sent_list))
     print(get_f1(pred_label, true_label))
Ejemplo n.º 2
0
 def split_sent(self, label_list,seq_len_list, data):
     sent_list = []
     for label_, (sent, tag) in zip(label_list, data):
         split_ind = []
         for i, p in enumerate(label_):
             if p:
                 split_ind.append(i)
         resent = id2sentence(sent, self.vocab).split()
         out_sent = copy.deepcopy(resent)
         for ind in range(len(resent)):
             if ind in split_ind:
                 out_sent[ind] = resent[ind] + '。'
             else:
                 out_sent[ind] = resent[ind]
         sent_list.append(''.join(out_sent))
     return ' '.join(sent_list)
Ejemplo n.º 3
0
            for target_sentence in target_batch:
                target_sentence = data_helper.sentence2id(
                    target_sentence,
                    vocab=decoder_vocab,
                    max_sentence_length=decoder_sentence_length,
                    is_target=True)
                target_token_ids.append(target_sentence)

            batch_preds, batch_loss, _ = sess.run(
                [predictions, loss, train_op],
                feed_dict={
                    encoder_inputs: input_token_ids,
                    encoder_sequence_length: input_sentence_lengths,
                    decoder_inputs: target_token_ids,
                })

            epoch_loss += batch_loss
            all_preds.append(batch_preds)

        if epoch % 400 == 0:
            print 'Epoch={}'.format(epoch)
            print 'Epoch loss={}'.format(epoch_loss)
            for input_batch, target_batch, batch_preds in zip(
                    input_batches, target_batches, all_preds):
                for input_sentence, target_sentence, pred in zip(
                        input_batch, target_batch, batch_preds):
                    print '\t' + input_sentence
                    print '\t => ' + data_helper.id2sentence(
                        pred, reverse_vocab=decoder_reverse_vocab)
                    print '\tCorrect answer:' + target_sentence