Ejemplo n.º 1
0
def main(path_to_serialized_model):
    # Deserialize model
    network = fieldnetwork.deserialize_network(path_to_serialized_model)
    # Create client
    store_client = StoreHandler()

    # Load glove model
    print("Loading language model...")
    path_to_glove_model = "../glove/glove.6B.100d.txt"
    glove_api.load_model(path_to_glove_model)
    print("Loading language model...OK")

    # Retrieve indexes
    schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl')
    content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl')

    om = SSAPI(network, store_client, schema_sim_index, content_sim_index)

    om.add_krs([("dbpedia", "cache_onto/dbpedia.pkl")], parsed=True)

    matchings = om.find_matchings()

    print("Found: " + str(len(matchings)))
    for m in matchings:
        print(m)

    return om
Ejemplo n.º 2
0
def test_find_links(path_to_serialized_model, matchings):
    # Deserialize model
    network = fieldnetwork.deserialize_network(path_to_serialized_model)
    # Create client
    store_client = StoreHandler()

    # Load glove model
    print("Loading language model...")
    path_to_glove_model = "../glove/glove.6B.100d.txt"
    glove_api.load_model(path_to_glove_model)
    print("Loading language model...OK")

    # Retrieve indexes
    schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl')
    content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl')

    om = SSAPI(network, store_client, schema_sim_index, content_sim_index)

    om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True)
    om.add_krs([("clo", "cache_onto/clo.pkl")], parsed=True)
    om.add_krs([("bao", "cache_onto/bao.pkl")], parsed=True)

    links = om.find_links(matchings)
    for link in links:
        print(link)
Ejemplo n.º 3
0
def generate_matchings(input_model_path, input_ontology_name_path, output_file):
    # Deserialize model
    network = fieldnetwork.deserialize_network(input_model_path)
    # Create client
    store_client = StoreHandler()

    # Load glove model
    print("Loading language model...")
    path_to_glove_model = "../glove/glove.6B.100d.txt"
    glove_api.load_model(path_to_glove_model)
    print("Loading language model...OK")

    # Retrieve indexes
    schema_sim_index = io.deserialize_object(input_model_path + 'schema_sim_index.pkl')
    content_sim_index = io.deserialize_object(input_model_path + 'content_sim_index.pkl')

    # Create ontomatch api
    om = SSAPI(network, store_client, schema_sim_index, content_sim_index)
    for onto_name, onto_parsed_path in input_ontology_name_path:
        # Load parsed ontology
        om.add_krs([(onto_name, onto_parsed_path)], parsed=True)

    matchings = om.find_matchings()

    with open(output_file, 'w') as f:
        for m in matchings:
            f.write(str(m) + '\n')

    print("Done!")
Ejemplo n.º 4
0
def test(path_to_serialized_model):
    # Deserialize model
    network = fieldnetwork.deserialize_network(path_to_serialized_model)
    # Create client
    store_client = StoreHandler()

    # Load glove model
    print("Loading language model...")
    path_to_glove_model = "../glove/glove.6B.100d.txt"
    glove_api.load_model(path_to_glove_model)
    print("Loading language model...OK")

    # Retrieve indexes
    schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl')
    content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl')

    # Create ontomatch api
    om = SSAPI(network, store_client, schema_sim_index, content_sim_index)
    # Load parsed ontology
    om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True)
    om.add_krs([("clo", "cache_onto/clo.pkl")], parsed=True)
    om.add_krs([("bao", "cache_onto/bao.pkl")], parsed=True)
    #om.add_krs([("go", "cache_onto/go.pkl")], parsed=True)  # parse again

    print("Finding matchings...")
    st = time.time()
    matchings = om.find_matchings()
    et = time.time()
    print("Finding matchings...OK")
    print("Took: " + str(et-st))

    for k, v in matchings:
        print(v)

    return om
Ejemplo n.º 5
0
 def add_data_model(self, path_to_serialized_model):
     print('Loading data model ... ')
     self.network = fieldnetwork.deserialize_network(
         path_to_serialized_model)
     self.schema_sim_index = io.deserialize_object(
         path_to_serialized_model + 'schema_sim_index.pkl')
     self.content_sim_index = io.deserialize_object(
         path_to_serialized_model + 'content_sim_index.pkl')
Ejemplo n.º 6
0
def test_fuzzy(path_to_serialized_model):
    # Deserialize model
    network = fieldnetwork.deserialize_network(path_to_serialized_model)
    # Create client
    store_client = StoreHandler()

    # Retrieve indexes
    schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl')
    content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl')

    # Create ontomatch api
    om = SSAPI(network, store_client, schema_sim_index, content_sim_index)
    # Load parsed ontology
    om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True)

    matchings = matcherlib.find_hierarchy_content_fuzzy(om.kr_handlers, store_client)

    for m in matchings:
        print(m)
Ejemplo n.º 7
0
    path_to_sem_model = argv[4]
    path_to_results = argv[5]
    path_to_gold_standard = argv[6]

    # Deserialize model
    network = fieldnetwork.deserialize_network(path_to_serialized_model)
    # Create client
    store_client = StoreHandler()

    # Load glove model
    print("Loading language model...")
    glove_api.load_model(path_to_sem_model)
    print("Loading language model...OK")

    # Retrieve indexes
    schema_sim_index = io.deserialize_object(path_to_serialized_model +
                                             'schema_sim_index.pkl')
    content_sim_index = io.deserialize_object(path_to_serialized_model +
                                              'content_sim_index.pkl')

    # Create ontomatch api
    om = SSAPI(network, store_client, schema_sim_index, content_sim_index)
    # Load parsed ontology
    om.add_krs([(onto_name, path_to_ontology)], parsed=True)

    # # Build content sim
    om.priv_build_content_sim(0.6)

    print("Benchmarking matchers and linkers")
    generate_matchings(network, store_client, om, path_to_results)
    precision, recall = combine_and_report_results(om, path_to_results,
                                                   path_to_gold_standard)
Ejemplo n.º 8
0
def test_4_n_42(path_to_serialized_model):
    # Deserialize model
    network = fieldnetwork.deserialize_network(path_to_serialized_model)
    # Create client
    store_client = StoreHandler()

    # Load glove model
    print("Loading language model...")
    path_to_glove_model = "../glove/glove.6B.100d.txt"
    glove_api.load_model(path_to_glove_model)
    print("Loading language model...OK")

    # Retrieve indexes
    schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl')
    content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl')

    # Create ontomatch api
    om = SSAPI(network, store_client, schema_sim_index, content_sim_index)
    # Load parsed ontology
    #om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True)
    #om.add_krs([("clo", "cache_onto/clo.pkl")], parsed=True)
    #om.add_krs([("bao", "cache_onto/bao.pkl")], parsed=True)
    om.add_krs([("dbpedia", "cache_onto/dbpedia.pkl")], parsed=True)  # parse again

    # L6: [Relations] -> [Class names] (semantic groups)

    print("Finding L6 matchings...")
    st = time.time()
    l6_matchings, sem_coh_groups = matcherlib.find_sem_coh_matchings(om.network, om.kr_handlers)
    print("Finding L6 matchings...OK, " + str(len(l6_matchings)) + " found")
    et = time.time()
    print("Took: " + str(et - st))

    for m in l6_matchings:
        print(m)

    for k, v in sem_coh_groups.items():
        print(str(k) + " -> " + str(v))

    exit()

    print("Finding matchings...")
    st = time.time()
    # L4: [Relation names] -> [Class names] (syntax)
    print("Finding L4 matchings...")
    st = time.time()
    l4_matchings = matcherlib.find_relation_class_name_matchings(om.network, om.kr_handlers)
    print("Finding L4 matchings...OK, " + str(len(l4_matchings)) + " found")
    et = time.time()
    print("Took: " + str(et - st))

    print("computing fanout")
    fanout = defaultdict(int)
    for m in l4_matchings:
        sch, cla = m
        fanout[sch] += 1
    ordered = sorted(fanout.items(), key=operator.itemgetter(1), reverse=True)
    for o in ordered:
        print(o)

    # for match in l4_matchings:
    #    print(match)

    # L4.2: [Relation names] -> [Class names] (semantic)
    print("Finding L42 matchings...")
    st = time.time()
    l42_matchings = matcherlib.find_relation_class_name_sem_matchings(om.network, om.kr_handlers)
    print("Finding L42 matchings...OK, " + str(len(l42_matchings)) + " found")
    et = time.time()
    print("Took: " + str(et - st))
    et = time.time()
    print("Finding matchings...OK")
    print("Took: " + str(et - st))

    print("are l4 subsumed by l42?")
    not_in_l42 = 0
    not_subsumed = []
    for m in l4_matchings:
        if m not in l42_matchings:
            not_in_l42 += 1
            not_subsumed.append(m)
    print("NOT-subsumed: " + str(not_in_l42))

    """
    # L5: [Attribute names] -> [Class names] (syntax)
    print("Finding L5 matchings...")
    st = time.time()
    l5_matchings = matcherlib.find_relation_class_attr_name_matching(om.network, om.kr_handlers)
    print("Finding L5 matchings...OK, " + str(len(l5_matchings)) + " found")
    et = time.time()
    print("Took: " + str(et - st))

    # for match in l5_matchings:
    #    print(match)

    # l52_matchings = []

    # L52: [Attribute names] -> [Class names] (semantic)
    print("Finding L52 matchings...")
    st = time.time()
    l52_matchings = matcherlib.find_relation_class_attr_name_sem_matchings(om.network, om.kr_handlers)
    print("Finding L52 matchings...OK, " + str(len(l52_matchings)) + " found")
    et = time.time()
    print("Took: " + str(et - st))

    """

    with open('OUTPUT_442_only', 'w') as f:
        f.write("L4" + '\n')
        for m in l4_matchings:
            f.write(str(m) + '\n')
        f.write("L42" + '\n')
        for m in l42_matchings:
            f.write(str(m) + '\n')
        f.write("L5" + '\n')