def read_logmap_mapping(filename): mappings_dict = dict() with open(filename) as f: for line in f.readlines(): m = line.strip().split('|') left_c = uri_prefix(m[0]) right_c = uri_prefix(m[1]) if left_c in mappings_dict: mappings_dict[left_c].append(right_c) else: mappings_dict[left_c] = [right_c] return mappings_dict
def append_super_class(c, p): p.append(uri_prefix(uri=c.iri)) supclasses = super_classes(c=c) if owl.Thing in supclasses or len(supclasses) == 0 or supclasses is None: return p else: return append_super_class(c=supclasses[0], p=p)
def get_class_name(o): c_name = dict() for c in o.classes(): name = c.name labels = c.label.en + c.label names = [name, labels[0]] if len(labels) > 0 else [name, None] c_name[uri_prefix(uri=c.iri)] = names return c_name
def read_oaei_mappings(file_name): tree = ET.parse(file_name) mappings_str = list() all_mappings_str = list() for t in tree.getroot().getchildren(): for m in t.getchildren(): if 'map' in m.tag: for c in m.getchildren(): mapping = list() mv = '?' for i, v in enumerate(c.getchildren()): if i < 2: for value in v.attrib.values(): mapping.append(uri_prefix(value).lower()) break if i == 3: mv = v.text all_mappings_str.append('|'.join(mapping)) if not mv == '?': mappings_str.append('|'.join(mapping)) return mappings_str, all_mappings_str
left_paths = [ line.strip().split(',') for line in open(FLAGS.left_path_file).readlines() ] right_paths = [ line.strip().split(',') for line in open(FLAGS.right_path_file).readlines() ] left_names = json.load(open(FLAGS.left_class_name_file)) right_names = json.load(open(FLAGS.right_class_name_file)) mappings, mappings_n = list(), list() with open(FLAGS.candidate_file) as f: for i, line in enumerate(f.readlines()): m = line.strip().split(', ')[1] if ', ' in line else line.strip() m_split = m.split('|') c1 = uri_prefix(uri=m_split[0]) c2 = uri_prefix(uri=m_split[1]) n1 = get_label(cls=c1, paths=left_paths, names=left_names, label_type='path', keep_uri=(FLAGS.keep_uri == 'yes')) n2 = get_label(cls=c2, paths=right_paths, names=right_names, label_type='path', keep_uri=(FLAGS.keep_uri == 'yes')) origin = 'i=%d|%s|%s' % (i + 1, c1, c2) name = '%s|%s' % (n1, n2) mappings.append(origin)
all_mappings_str.append('|'.join(mapping)) if not mv == '?': mappings_str.append('|'.join(mapping)) return mappings_str, all_mappings_str if __name__ == "__main__": ref_mappings_str, ref_all_mappings_str = read_oaei_mappings(file_name=FLAGS.oaei_GS_file) ref_excluded_mappings_str = set(ref_all_mappings_str) - set(ref_mappings_str) anchor_mappings_str = list() with open(FLAGS.anchor_mapping_file) as f: for line in f.readlines(): tmp = line.strip().split('|') anchor_mappings_str.append('%s|%s' % (uri_prefix(tmp[0]).lower(), uri_prefix(tmp[1]).lower())) pred_mappings_str = list() with open(FLAGS.prediction_out_file) as f: lines = f.readlines() for j in range(0, len(lines), 3): tmp = lines[j].split('|') if float(tmp[3]) >= FLAGS.threshold: pred_mappings_str.append('%s|%s' % (tmp[1].lower(), tmp[2].lower())) for a in anchor_mappings_str: if a not in pred_mappings_str: pred_mappings_str.append(a) recall_num = 0 for s in ref_mappings_str:
s_n = 0 for sample in samples: tmp = sample.split('|') if '%s|%s' % (tmp[0], tmp[1]) not in GS: s_n += 1 if len(tmp) >= 4 and tmp[3] == 'true': sv_n += 1 print( 'All three systems: sampled mappings not in GS: %d, correct samples: %d, sampled precision: %f' % (s_n, sv_n, sv_n / s_n)) from lib.Label import uri_prefix mappings = set() for line in open(LogMap_output_mapping_file).readlines(): tmp = line.strip().split('|') c1 = uri_prefix(uri=tmp[0]) c2 = uri_prefix(uri=tmp[1]) mappings.add('%s|%s' % (c2, c2)) for line in open(LogMap_ML_output_mapping_file).readlines(): mappings.add(line.strip()) for line in open(AML_output_mapping_file).readlines(): mappings.add(line.strip()) G_M_n = len(mappings - GS) print('mappings out of GS: %d' % G_M_n) num = len(GS) + sv_n / s_n * G_M_n print('approximate GS size: %d' % num) print('approximate recall: %.3f' % (app_TP / num))
left_paths = [ line.strip().split(',') for line in open(FLAGS.left_path_file).readlines() ] right_paths = [ line.strip().split(',') for line in open(FLAGS.right_path_file).readlines() ] mappings = list() rule_violated_mappings = list() with open(FLAGS.anchor_mapping_file) as f: for i, line in enumerate(f.readlines()): tmp = line.strip().split(', ')[1] if ', ' in line else line.strip() tmp2 = tmp.split('|') c1 = uri_prefix(uri=tmp2[0]) c2 = uri_prefix(uri=tmp2[1]) n1 = get_label(cls=c1, paths=left_paths, names=left_names, label_type='path', keep_uri=(FLAGS.keep_uri == 'yes')) n2 = get_label(cls=c2, paths=right_paths, names=right_names, label_type='path', keep_uri=(FLAGS.keep_uri == 'yes')) if not n1 == '""' and not n2 == '""': if violate_rules(p1_str=n1, p2_str=n2,