def visualize(sents_id, gold, predict, files_name, config, is_relation_first=True): print 'Visualizing ...' print config.words2id_filename print config.relations2id_filename words2id = json.load(open(config.words2id_filename, 'r')) relations2id = json.load(open(config.relations2id_filename, 'r')) id2words = _reverse_dict_(words2id) id2relations = _reverse_dict_(relations2id) f1 = open(files_name[0], 'w') f2 = open(files_name[1], 'w') f3 = open(files_name[2], 'w') for d, g, p in zip(sents_id, gold, predict): if data_prepare.is_normal_triple(g, is_relation_first): f = f1 elif data_prepare.is_multi_label(g, is_relation_first): f = f2 else: f = f3 f.write(sent_id2sent_str(d, id2words)) f.write('\n') g_triples = _triplelist2triples_(g, config) p_triples = _triplelist2triples_(p, config) g_triples_string = triples2triples_str(g_triples, d, id2words, id2relations, is_relation_first, config) p_triples_string = triples2triples_str(p_triples, d, id2words, id2relations, is_relation_first, config) f.write('Gold: \t' + g_triples_string) f.write('\n') f.write('Predict:\t' + p_triples_string) f.write('\n\n') f1.close() f2.close() f3.close()
def compare(predict, gold, config, show_rate=None, simple=True): normal_triples_gold = [] # normal triples normal_triples_predict = [] # normal triples multi_label_gold = [] # multi label triples multi_label_predict = [] # multi label triples over_lapping_gold = [] # overlapping triples over_lapping_predict = [] # overlapping triples is_relation_first = True for p, g in zip(predict, gold): if data_prepare.is_normal_triple(g, is_relation_first): normal_triples_gold.append(g) normal_triples_predict.append(p) if data_prepare.is_multi_label(g, is_relation_first): multi_label_gold.append(g) multi_label_predict.append(p) if data_prepare.is_over_lapping(g, is_relation_first): over_lapping_gold.append(g) over_lapping_predict.append(p) f1, precision, recall = compare_(predict, gold, 'ALL', config, show_rate) if simple: return f1, precision, recall compare_(normal_triples_predict, normal_triples_gold, 'Normal-Triples', config, show_rate) compare_(multi_label_predict, multi_label_gold, 'Multi-Label', config, show_rate) compare_(over_lapping_predict, over_lapping_gold, 'Over-Lapping', config, show_rate) # sentences contains 1, 2, 3_实体识别, 4_实体识别_1_3的O, and >5 triples triples_size_1_gold, triples_size_2_gold, triples_size_3_gold, triples_size_4_gold, triples_size_5_gold = [], [], [], [], [] triples_size_1_predict, triples_size_2_predict, triples_size_3_predict, triples_size_4_predict, triples_size_5_predict = [], [], [], [], [] for p, g in zip(predict, gold): g_triples = set([tuple(g[i:i + 3]) for i in range(0, len(g), 3)]) if len(g_triples) == 1: triples_size_1_predict.append(p) triples_size_1_gold.append(g) elif len(g_triples) == 2: triples_size_2_predict.append(p) triples_size_2_gold.append(g) elif len(g_triples) == 3: triples_size_3_predict.append(p) triples_size_3_gold.append(g) elif len(g_triples) == 4: triples_size_4_predict.append(p) triples_size_4_gold.append(g) else: triples_size_5_predict.append(p) triples_size_5_gold.append(g) compare_(triples_size_1_predict, triples_size_1_gold, 'Sentence-1-Triple', config, show_rate) compare_(triples_size_2_predict, triples_size_2_gold, 'Sentence-2-Triple', config, show_rate) compare_(triples_size_3_predict, triples_size_3_gold, 'Sentence-3_实体识别-Triple', config, show_rate) compare_(triples_size_4_predict, triples_size_4_gold, 'Sentence-4_实体识别_1_3的O-Triple', config, show_rate) compare_(triples_size_5_predict, triples_size_5_gold, 'Sentence-5-Triple', config, show_rate) return None, None, None