def visualize(sents_id, gold, predict, files_name, config, is_relation_first=True):
    print 'Visualizing ...'
    print config.words2id_filename
    print config.relations2id_filename
    words2id = json.load(open(config.words2id_filename, 'r'))
    relations2id = json.load(open(config.relations2id_filename, 'r'))
    id2words = _reverse_dict_(words2id)
    id2relations = _reverse_dict_(relations2id)

    f1 = open(files_name[0], 'w')
    f2 = open(files_name[1], 'w')
    f3 = open(files_name[2], 'w')
    for d, g, p in zip(sents_id, gold, predict):
        if data_prepare.is_normal_triple(g, is_relation_first):
            f = f1
        elif data_prepare.is_multi_label(g, is_relation_first):
            f = f2
        else:
            f = f3

        f.write(sent_id2sent_str(d, id2words))
        f.write('\n')
        g_triples = _triplelist2triples_(g, config)
        p_triples = _triplelist2triples_(p, config)
        g_triples_string = triples2triples_str(g_triples, d, id2words, id2relations, is_relation_first, config)
        p_triples_string = triples2triples_str(p_triples, d, id2words, id2relations, is_relation_first, config)
        f.write('Gold:   \t' + g_triples_string)
        f.write('\n')
        f.write('Predict:\t' + p_triples_string)
        f.write('\n\n')
    f1.close()
    f2.close()
    f3.close()
예제 #2
0
def compare(predict, gold, config, show_rate=None, simple=True):
    normal_triples_gold = []  # normal triples
    normal_triples_predict = []  # normal triples
    multi_label_gold = []  # multi label triples
    multi_label_predict = []  # multi label triples
    over_lapping_gold = []  # overlapping triples
    over_lapping_predict = []  # overlapping triples
    is_relation_first = True
    for p, g in zip(predict, gold):
        if data_prepare.is_normal_triple(g, is_relation_first):
            normal_triples_gold.append(g)
            normal_triples_predict.append(p)
        if data_prepare.is_multi_label(g, is_relation_first):
            multi_label_gold.append(g)
            multi_label_predict.append(p)
        if data_prepare.is_over_lapping(g, is_relation_first):
            over_lapping_gold.append(g)
            over_lapping_predict.append(p)
    f1, precision, recall = compare_(predict, gold, 'ALL', config, show_rate)
    if simple:
        return f1, precision, recall
    compare_(normal_triples_predict, normal_triples_gold, 'Normal-Triples',
             config, show_rate)
    compare_(multi_label_predict, multi_label_gold, 'Multi-Label', config,
             show_rate)
    compare_(over_lapping_predict, over_lapping_gold, 'Over-Lapping', config,
             show_rate)

    # sentences contains 1, 2, 3_实体识别, 4_实体识别_1_3的O, and >5 triples
    triples_size_1_gold, triples_size_2_gold, triples_size_3_gold, triples_size_4_gold, triples_size_5_gold = [], [], [], [], []
    triples_size_1_predict, triples_size_2_predict, triples_size_3_predict, triples_size_4_predict, triples_size_5_predict = [], [], [], [], []
    for p, g in zip(predict, gold):
        g_triples = set([tuple(g[i:i + 3]) for i in range(0, len(g), 3)])
        if len(g_triples) == 1:
            triples_size_1_predict.append(p)
            triples_size_1_gold.append(g)
        elif len(g_triples) == 2:
            triples_size_2_predict.append(p)
            triples_size_2_gold.append(g)
        elif len(g_triples) == 3:
            triples_size_3_predict.append(p)
            triples_size_3_gold.append(g)
        elif len(g_triples) == 4:
            triples_size_4_predict.append(p)
            triples_size_4_gold.append(g)
        else:
            triples_size_5_predict.append(p)
            triples_size_5_gold.append(g)
    compare_(triples_size_1_predict, triples_size_1_gold, 'Sentence-1-Triple',
             config, show_rate)
    compare_(triples_size_2_predict, triples_size_2_gold, 'Sentence-2-Triple',
             config, show_rate)
    compare_(triples_size_3_predict, triples_size_3_gold,
             'Sentence-3_实体识别-Triple', config, show_rate)
    compare_(triples_size_4_predict, triples_size_4_gold,
             'Sentence-4_实体识别_1_3的O-Triple', config, show_rate)
    compare_(triples_size_5_predict, triples_size_5_gold, 'Sentence-5-Triple',
             config, show_rate)
    return None, None, None