Exemple #1
0
def test_find_links(path_to_serialized_model, matchings):
    # Deserialize model
    network = fieldnetwork.deserialize_network(path_to_serialized_model)
    # Create client
    store_client = StoreHandler()

    # Load glove model
    print("Loading language model...")
    path_to_glove_model = "../glove/glove.6B.100d.txt"
    glove_api.load_model(path_to_glove_model)
    print("Loading language model...OK")

    # Retrieve indexes
    schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl')
    content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl')

    om = SSAPI(network, store_client, schema_sim_index, content_sim_index)

    om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True)
    om.add_krs([("clo", "cache_onto/clo.pkl")], parsed=True)
    om.add_krs([("bao", "cache_onto/bao.pkl")], parsed=True)

    links = om.find_links(matchings)
    for link in links:
        print(link)
    def test_ranking_certainty_chem(self):
        path = '../models/chemical/'
        network = deserialize_network(path)
        api = API(network)
        api.init_store()

        table = 'activities'
        table_drs = api.drs_from_table(table)

        sim_tables = api.similar_content_to(table_drs)

        sim_tables.rank_certainty()

        print("All columns CERTAINTY: ")
        sim_tables.pretty_print_columns_with_scores()
        print("")
        print("All tables CERTAINTY: ")
        sim_tables.print_tables_with_scores()
        print("")

        sim_tables.rank_coverage()

        print("All columns COVERAGE: ")
        sim_tables.pretty_print_columns_with_scores()
        print("")
        print("All tables COVERAGE: ")
        sim_tables.print_tables_with_scores()
        print("")
Exemple #3
0
def main(path_to_serialized_model):
    print('Loading: ' + str(path_to_serialized_model))
    network = fieldnetwork.deserialize_network(path_to_serialized_model)
    store_client = StoreHandler()
    api = API(network, store_client)
    ip_shell = InteractiveShellEmbed(banner1=init_banner, exit_msg=exit_banner)
    ip_shell()
Exemple #4
0
def generate_matchings(input_model_path, input_ontology_name_path, output_file):
    # Deserialize model
    network = fieldnetwork.deserialize_network(input_model_path)
    # Create client
    store_client = StoreHandler()

    # Load glove model
    print("Loading language model...")
    path_to_glove_model = "../glove/glove.6B.100d.txt"
    glove_api.load_model(path_to_glove_model)
    print("Loading language model...OK")

    # Retrieve indexes
    schema_sim_index = io.deserialize_object(input_model_path + 'schema_sim_index.pkl')
    content_sim_index = io.deserialize_object(input_model_path + 'content_sim_index.pkl')

    # Create ontomatch api
    om = SSAPI(network, store_client, schema_sim_index, content_sim_index)
    for onto_name, onto_parsed_path in input_ontology_name_path:
        # Load parsed ontology
        om.add_krs([(onto_name, onto_parsed_path)], parsed=True)

    matchings = om.find_matchings()

    with open(output_file, 'w') as f:
        for m in matchings:
            f.write(str(m) + '\n')

    print("Done!")
Exemple #5
0
def main(path_to_serialized_model):
    print('Loading: ' + str(path_to_serialized_model))
    network = fieldnetwork.deserialize_network(path_to_serialized_model)
    api = API(network)
    api.init_store()
    ip_shell = InteractiveShellEmbed(banner1=init_banner, exit_msg=exit_banner)
    ip_shell()
Exemple #6
0
def main(path_to_serialized_model):
    # Deserialize model
    network = fieldnetwork.deserialize_network(path_to_serialized_model)
    # Create client
    store_client = StoreHandler()

    # Load glove model
    print("Loading language model...")
    path_to_glove_model = "../glove/glove.6B.100d.txt"
    glove_api.load_model(path_to_glove_model)
    print("Loading language model...OK")

    # Retrieve indexes
    schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl')
    content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl')

    om = SSAPI(network, store_client, schema_sim_index, content_sim_index)

    om.add_krs([("dbpedia", "cache_onto/dbpedia.pkl")], parsed=True)

    matchings = om.find_matchings()

    print("Found: " + str(len(matchings)))
    for m in matchings:
        print(m)

    return om
Exemple #7
0
def test(path_to_serialized_model):
    # Deserialize model
    network = fieldnetwork.deserialize_network(path_to_serialized_model)
    # Create client
    store_client = StoreHandler()

    # Load glove model
    print("Loading language model...")
    path_to_glove_model = "../glove/glove.6B.100d.txt"
    glove_api.load_model(path_to_glove_model)
    print("Loading language model...OK")

    # Retrieve indexes
    schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl')
    content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl')

    # Create ontomatch api
    om = SSAPI(network, store_client, schema_sim_index, content_sim_index)
    # Load parsed ontology
    om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True)
    om.add_krs([("clo", "cache_onto/clo.pkl")], parsed=True)
    om.add_krs([("bao", "cache_onto/bao.pkl")], parsed=True)
    #om.add_krs([("go", "cache_onto/go.pkl")], parsed=True)  # parse again

    print("Finding matchings...")
    st = time.time()
    matchings = om.find_matchings()
    et = time.time()
    print("Finding matchings...OK")
    print("Took: " + str(et-st))

    for k, v in matchings:
        print(v)

    return om
Exemple #8
0
 def add_data_model(self, path_to_serialized_model):
     print('Loading data model ... ')
     self.network = fieldnetwork.deserialize_network(
         path_to_serialized_model)
     self.schema_sim_index = io.deserialize_object(
         path_to_serialized_model + 'schema_sim_index.pkl')
     self.content_sim_index = io.deserialize_object(
         path_to_serialized_model + 'content_sim_index.pkl')
Exemple #9
0
def init_system(path_to_serialized_model, create_reporting=False):
    print_md('Loading: *' + str(path_to_serialized_model) + "*")
    sl = time.time()
    network = fieldnetwork.deserialize_network(path_to_serialized_model)
    store_client = StoreHandler()
    api = API(network=network, store_client=store_client)
    if create_reporting:
        reporting = Report(network)
    api.helper.help()
    el = time.time()
    print("Took " + str(el - sl) + " to load model")
    return api, reporting
Exemple #10
0
def __init_system(path_to_serialized_model, create_reporting=True):
    print_md('Loading: *' + str(path_to_serialized_model) + "*")
    sl = time.time()
    network = fieldnetwork.deserialize_network(path_to_serialized_model)
    api = oldAPI(network)
    if create_reporting:
        reporting = Report(network)
    api.init_store()
    api.help()
    el = time.time()
    print("Took " + str(el - sl) + " to load all data")
    return api, reporting
Exemple #11
0
def read_table_columns(path_to_serialized_model, network=False):
    # If the network is not provided, then we use the path to deserialize from disk
    if not network:
        network = fieldnetwork.deserialize_network(path_to_serialized_model)
    source_ids = network._get_underlying_repr_table_to_ids()
    col_info = network._get_underlying_repr_id_to_field_info()
    cols = []
    # for table_name, field_ids in ...
    for k, v in source_ids.items():
        db_name = None
        for el in v:
            (db_name, sn_name, fn_name, data_type) = col_info[el]
            cols.append(fn_name)
        yield (db_name, k, cols)
        cols.clear()
Exemple #12
0
def test_fuzzy(path_to_serialized_model):
    # Deserialize model
    network = fieldnetwork.deserialize_network(path_to_serialized_model)
    # Create client
    store_client = StoreHandler()

    # Retrieve indexes
    schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl')
    content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl')

    # Create ontomatch api
    om = SSAPI(network, store_client, schema_sim_index, content_sim_index)
    # Load parsed ontology
    om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True)

    matchings = matcherlib.find_hierarchy_content_fuzzy(om.kr_handlers, store_client)

    for m in matchings:
        print(m)
Exemple #13
0
    def export(self, path_to_model):
        field_network = fieldnetwork.deserialize_network(path_to_model)

        # Create index to speed up MATCHes
        with self._driver.session() as session:
            session.run("CREATE INDEX ON :Node(nid)")

        for relation_label in Relation:

            # relation_hits is a generator. We could consume it to a list and then iterate over it,
            # but this would probably consume too much memory in most scenarios
            relation_hits = field_network.enumerate_relation(relation_label,
                                                             as_str=False)
            for a, b in tqdm(
                    relation_hits,
                    desc=f'Storing {relation_label} relations to Neo4j',
                    unit='relation'):
                with self._driver.session() as session:
                    # Step 1: add nodes
                    session.run(
                        "CREATE (n:Node {nid:$nid,db_name:$db_name,source:$source,field:$field,score:$score}) RETURN id(n)",
                        nid=a.nid,
                        db_name=a.db_name,
                        source=a.source_name,
                        field=a.field_name,
                        score=a.score)
                    session.run(
                        "CREATE (n:Node {nid:$nid,source:$source,field:$field,score:$score}) RETURN id(n)",
                        nid=b.nid,
                        db_name=b.db_name,
                        source=b.source_name,
                        field=b.field_name,
                        score=b.score)

                    session.run(
                        f"MATCH (a:Node),(b:Node)"
                        " WHERE a.nid=$nid_a AND b.nid=$nid_b "
                        "CREATE (a)-[r: {relation_label}]->(b) RETURN type(r)".
                        format(relation_label=str(relation_label).replace(
                            'Relation.', '')),
                        nid_a=a.nid,
                        nid_b=b.nid)  # .single().value()
class TestReporting(unittest.TestCase):
    # create store handler
    store_client = StoreHandler()
    # read graph
    path = '../test/test4/'
    network = deserialize_network(path)
    api = API(network)
    api.init_store()

    def test_compute_statistics(self):
        r = Report(self.network)
        ncols = r.num_columns
        ntables = r.num_tables
        ncontent = r.num_content_sim_relations
        nschema = r.num_schema_sim_relations
        npkfk = r.num_pkfk_relations
        print("Num cols: " + str(ncols))
        print("Num tables: " + str(ntables))
        print("Num content sim relations: " + str(ncontent))
        print("Num schema sim relations: " + str(nschema))
        print("Num PKFK relations: " + str(npkfk))
Exemple #15
0
def main(args):
    model_path = args.model_path
    separator = args.separator

    store_client = StoreHandler()
    network = fieldnetwork.deserialize_network(model_path)
    dod = DoD(network=network,
              store_client=store_client,
              csv_separator=separator)

    attrs = args.list_attributes.split(";")
    values = args.list_values.split(";")
    print(attrs)
    print(values)
    assert len(attrs) == len(values)

    i = 0
    for mjp, attrs_project, metadata in dod.virtual_schema_iterative_search(
            attrs, values, debug_enumerate_all_jps=False):
        print("JP: " + str(i))
        proj_view = dpu.project(mjp, attrs_project)
        print(str(proj_view.head(10)))
        print("Metadata")
        print(metadata)
        if args.output_path:
            if args.full_view:
                mjp.to_csv(args.output_path + "/raw_view_" + str(i),
                           encoding='latin1',
                           index=False)
            proj_view.to_csv(args.output_path + "/view_" + str(i),
                             encoding='latin1',
                             index=False)  # always store this
        i += 1
        if args.interactive == "True":
            print("")
            input("Press any key to continue...")
class TestProvenance(unittest.TestCase):
    # create store handler
    store_client = StoreHandler()
    # read graph
    path = '../test/test4/'
    network = deserialize_network(path)
    api = API(network)
    api.init_store()

    def test_keyword_provenance(self):
        print(self._testMethodName)

        res = self.api.keyword_search("Madden", max_results=10)

        print(res.get_provenance().prov_graph().nodes())
        print(res.get_provenance().prov_graph().edges())

        el_interest = [x for x in res][0]

        info = res.why(el_interest)
        print("WHY " + str(el_interest) + "? " + str(info))

        explanation = res.how(el_interest)
        print("HOW " + str(el_interest) + "? " + str(explanation))

        self.assertTrue(True)

    def test_content_sim_provenance(self):
        print(self._testMethodName)

        table = 'Buildings.csv'
        res = self.api.similar_content_to_table(table)

        print(res.get_provenance().prov_graph().nodes())
        print(res.get_provenance().prov_graph().edges())

        el_interest = [x for x in res][0]

        info = res.why(el_interest)
        print("WHY " + str(el_interest) + "? " + str(info))

        explanation = res.how(el_interest)
        print("HOW " + str(el_interest) + "? " + str(explanation))

        self.assertTrue(True)

    def test_intersection_provenance(self):
        print(self._testMethodName)

        res1 = self.api.keyword_search("Madden", max_results=10)
        res2 = self.api.keyword_search("Stonebraker", max_results=10)

        res = res1.intersection(res2)

        print(res.get_provenance().prov_graph().nodes())
        print(res.get_provenance().prov_graph().edges())

        el_interest = [x for x in res][0]

        info = res.why(el_interest)
        print("WHY " + str(el_interest) + "? " + str(info))

        explanation = res.how(el_interest)
        print("HOW " + str(el_interest) + "? " + str(explanation))

        self.assertTrue(True)

    def test_tc_table_mode_provenance(self):
        print(self._testMethodName)

        field1 = ('dwhsmall', 'All_olap2_uentity_desc_uses.csv',
                  'Entity Owner')
        field2 = ('dwhsmall', 'All_olap_entity_desc_uses.csv', 'Entity Owner')

        drs1 = self.api.drs_from_raw_field(field1)
        drs2 = self.api.drs_from_raw_field(field2)

        drs1.set_table_mode()
        drs2.set_table_mode()

        res = self.api.paths_between(drs1, drs2, Relation.PKFK)

        print(res.get_provenance().prov_graph().nodes())
        print(res.get_provenance().prov_graph().edges())

        el_interest = [x for x in res][0]

        info = res.why(el_interest)
        print("WHY " + str(el_interest) + "? " + str(info))

        explanation = res.how(el_interest)
        print("HOW " + str(el_interest) + "? " + str(explanation))

        self.assertTrue(True)
Exemple #17
0
def test_4_n_42(path_to_serialized_model):
    # Deserialize model
    network = fieldnetwork.deserialize_network(path_to_serialized_model)
    # Create client
    store_client = StoreHandler()

    # Load glove model
    print("Loading language model...")
    path_to_glove_model = "../glove/glove.6B.100d.txt"
    glove_api.load_model(path_to_glove_model)
    print("Loading language model...OK")

    # Retrieve indexes
    schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl')
    content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl')

    # Create ontomatch api
    om = SSAPI(network, store_client, schema_sim_index, content_sim_index)
    # Load parsed ontology
    #om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True)
    #om.add_krs([("clo", "cache_onto/clo.pkl")], parsed=True)
    #om.add_krs([("bao", "cache_onto/bao.pkl")], parsed=True)
    om.add_krs([("dbpedia", "cache_onto/dbpedia.pkl")], parsed=True)  # parse again

    # L6: [Relations] -> [Class names] (semantic groups)

    print("Finding L6 matchings...")
    st = time.time()
    l6_matchings, sem_coh_groups = matcherlib.find_sem_coh_matchings(om.network, om.kr_handlers)
    print("Finding L6 matchings...OK, " + str(len(l6_matchings)) + " found")
    et = time.time()
    print("Took: " + str(et - st))

    for m in l6_matchings:
        print(m)

    for k, v in sem_coh_groups.items():
        print(str(k) + " -> " + str(v))

    exit()

    print("Finding matchings...")
    st = time.time()
    # L4: [Relation names] -> [Class names] (syntax)
    print("Finding L4 matchings...")
    st = time.time()
    l4_matchings = matcherlib.find_relation_class_name_matchings(om.network, om.kr_handlers)
    print("Finding L4 matchings...OK, " + str(len(l4_matchings)) + " found")
    et = time.time()
    print("Took: " + str(et - st))

    print("computing fanout")
    fanout = defaultdict(int)
    for m in l4_matchings:
        sch, cla = m
        fanout[sch] += 1
    ordered = sorted(fanout.items(), key=operator.itemgetter(1), reverse=True)
    for o in ordered:
        print(o)

    # for match in l4_matchings:
    #    print(match)

    # L4.2: [Relation names] -> [Class names] (semantic)
    print("Finding L42 matchings...")
    st = time.time()
    l42_matchings = matcherlib.find_relation_class_name_sem_matchings(om.network, om.kr_handlers)
    print("Finding L42 matchings...OK, " + str(len(l42_matchings)) + " found")
    et = time.time()
    print("Took: " + str(et - st))
    et = time.time()
    print("Finding matchings...OK")
    print("Took: " + str(et - st))

    print("are l4 subsumed by l42?")
    not_in_l42 = 0
    not_subsumed = []
    for m in l4_matchings:
        if m not in l42_matchings:
            not_in_l42 += 1
            not_subsumed.append(m)
    print("NOT-subsumed: " + str(not_in_l42))

    """
    # L5: [Attribute names] -> [Class names] (syntax)
    print("Finding L5 matchings...")
    st = time.time()
    l5_matchings = matcherlib.find_relation_class_attr_name_matching(om.network, om.kr_handlers)
    print("Finding L5 matchings...OK, " + str(len(l5_matchings)) + " found")
    et = time.time()
    print("Took: " + str(et - st))

    # for match in l5_matchings:
    #    print(match)

    # l52_matchings = []

    # L52: [Attribute names] -> [Class names] (semantic)
    print("Finding L52 matchings...")
    st = time.time()
    l52_matchings = matcherlib.find_relation_class_attr_name_sem_matchings(om.network, om.kr_handlers)
    print("Finding L52 matchings...OK, " + str(len(l52_matchings)) + " found")
    et = time.time()
    print("Took: " + str(et - st))

    """

    with open('OUTPUT_442_only', 'w') as f:
        f.write("L4" + '\n')
        for m in l4_matchings:
            f.write(str(m) + '\n')
        f.write("L42" + '\n')
        for m in l42_matchings:
            f.write(str(m) + '\n')
        f.write("L5" + '\n')
class TestDDApiPathQueries(unittest.TestCase):

    # create store handler
    store_client = StoreHandler()
    # read graph
    path = 'models/chemical/'
    network = deserialize_network(path)
    api = API(network)
    api.init_store()
    """
    TC primitive API
    """
    def test_paths_between_field_mode(self):
        print(self._testMethodName)

        field1 = ('chembl_21', 'drug_indication', 'record_id')
        field2 = ('chembl_21', 'compound_records', 'record_id')

        drs1 = self.api.drs_from_raw_field(field1)
        drs2 = self.api.drs_from_raw_field(field2)

        res = self.api.paths_between(drs1, drs2, Relation.PKFK)

        data = [x for x in res]
        print("Total results: " + str(len(data)))
        for el in data:
            print(str(el))

    def test_paths_between_table_mode(self):
        print(self._testMethodName)

        field1 = ('chembl_21', 'drug_indication', 'record_id')
        field2 = ('chembl_21', 'compound_records', 'record_id')

        drs1 = self.api.drs_from_raw_field(field1)
        drs2 = self.api.drs_from_raw_field(field2)

        drs1.set_table_mode()
        drs2.set_table_mode()

        res = self.api.paths_between(drs1, drs2, Relation.PKFK)

        data = [x for x in res]
        print("Total results: " + str(len(data)))
        for el in data:
            print(str(el))

        print("Paths: ")
        res.visualize_provenance()
        res.debug_print()
        paths = res.paths()
        for p in paths:
            print(str(p))

    def test_paths_between_from_tables(self):
        print(self._testMethodName)

        table1_name = "drug_indication"
        table2_name = "compound_records"
        table1 = self.api.drs_from_table(table1_name)
        table2 = self.api.drs_from_table(table2_name)
        table1.set_table_mode()
        table2.set_table_mode()
        res = self.api.paths_between(table1, table2, Relation.PKFK)

        data = [x for x in res]
        print("Total results: " + str(len(data)))
        for el in data:
            print(str(el))

        print("Paths: ")
        paths = res.paths()
        for p in paths:
            print(str(p))

    def test_paths(self):
        print(self._testMethodName)

        return

    def test_traverse(self):
        print(self._testMethodName)

        field1 = ('chembl_21', 'drug_indication', 'record_id')
        drs_field = self.api.drs_from_raw_field(field1)
        res = self.api.traverse(drs_field, Relation.SCHEMA_SIM, 1)

        data = [x for x in res]
        print("Total results: " + str(len(data)))
        for el in data:
            print(str(el))

        return
Exemple #19
0
# Ignore in-table results of neighbor searches
# Exclude certain tables
# keyword_search and neighbor_search, but on mutiple contexts

import networkx as nx
from api.apiutils import Relation

from modelstore.elasticstore import StoreHandler, KWType
from knowledgerepr import fieldnetwork
from algebra import API

path_to_serialized_model = "/Users/arcarter/code/datadiscovery/test/testmodel/"
network = fieldnetwork.deserialize_network(path_to_serialized_model)
store_client = StoreHandler()

api = API(network, store_client)

# short variables for Scope
# These are used in keyword searches
# To specify what parts of a file will be searched
source = KWType.KW_TABLE  # table/file/source name
field = KWType.KW_SCHEMA  # colum names/fields
content = KWType.KW_TEXT  # content of the columns

# Short variables for Relation
# These represent edge types in the graph
# and are used for neighbor searches
# schema = Relation.SCHEMA  # similar schemas
schema_sim = Relation.SCHEMA_SIM  # Similar Schema Names
# similar content values. i.e. matching substrings and numbers
content_sim = Relation.CONTENT_SIM
Exemple #20
0
class TestDDApi(unittest.TestCase):

    # create store handler
    store_client = StoreHandler()
    # read graph
    path = 'models/dwh/'
    network = deserialize_network(path)
    api = API(network)
    api.init_store()
    """
    Seed API
    """
    def test_drs_from_raw_field(self):
        print(self._testMethodName)

        field = ('mitdwh', 'Iap_subject_person.csv', 'Person Mit Affiliation')
        res = self.api.drs_from_raw_field(field)

        for el in res:
            print(str(el))

    def test_drs_from_hit(self):
        print(self._testMethodName)

        field = ('mitdwh', 'Iap_subject_person.csv', 'Person Mit Affiliation')
        res = self.api.drs_from_raw_field(field)

        els = [x for x in res]
        el = els[0]

        res = self.api.drs_from_hit(el)

        for el in res:
            print(str(el))

    def test_drs_from_table(self):
        print(self._testMethodName)

        table = 'Iap_subject_person.csv'
        res = self.api.drs_from_table(table)

        for el in res:
            print(el)

    def test_drs_from_table_hit(self):
        print(self._testMethodName)

        field = ('mitdwh', 'Iap_subject_person.csv', 'Person Mit Affiliation')
        res = self.api.drs_from_raw_field(field)

        els = [x for x in res]
        el = els[0]

        res = self.api.drs_from_table_hit(el)

        for el in res:
            print(str(el))

    """
    Primitive API
    """

    def test_keyword_search(self):
        print(self._testMethodName)

        res = self.api.keyword_search("Madden", max_results=10)

        for el in res:
            print(str(el))

    def test_keywords_search(self):
        print(self._testMethodName)

        res = self.api.keywords_search(["Madden", "Stonebraker", "Liskov"])

        for el in res:
            print(str(el))

    def test_schema_name_search(self):
        print(self._testMethodName)

        res = self.api.schema_name_search("Name", max_results=10)

        for el in res:
            print(str(el))

    def test_schema_names_search(self):
        print(self._testMethodName)

        res = self.api.schema_names_search(["Name", "Last Name", "Employee"])

        for el in res:
            print(str(el))

    def test_entity_search(self):
        print(self._testMethodName)

        print("Future Work...")
        return

    def test_schema_neighbors(self):
        print(self._testMethodName)

        field = ('mitdwh', 'Iap_subject_person.csv', 'Person Mit Affiliation')
        res = self.api.schema_neighbors(field)

        for el in res:
            print(str(el))

    def test_schema_neighbors_of(self):
        print(self._testMethodName)

        field = ('mitdwh', 'Iap_subject_person.csv', 'Person Mit Affiliation')
        res = self.api.schema_neighbors(field)

        res = self.api.schema_neighbors_of(res)

        for el in res:
            print(str(el))

    def test_similar_schema_name_to_field(self):
        print(self._testMethodName)

        field = ('mitdwh', 'Buildings.csv', 'Building Name')
        res = self.api.similar_schema_name_to_field(field)

        print("RES size: " + str(res.size()))
        for el in res:
            print(str(el))

    def test_ids_functions(self):
        print(self._testMethodName)

        field = ('mitdwh', 'Buildings.csv', 'Building Key')
        drs1 = self.api.drs_from_raw_field(field)

        field = ('mitdwh', 'Building Key', 'Buildings.csv')
        drs2 = self.api.drs_from_raw_field(field)

        for el in drs1:
            print(str(el))
        for el in drs2:
            print(str(el))

    def test_similar_schema_name_to_table(self):
        print(self._testMethodName)

        table = 'Buildings.csv'
        res = self.api.similar_schema_name_to_table(table)

        print("RES size: " + str(res.size()))
        for el in res:
            print(str(el))

    def test_similar_schema_name_to(self):
        print(self._testMethodName)

        field = ('mitdwh', 'Buildings.csv', 'Building Key')
        res = self.api.similar_schema_name_to_field(field)

        res = self.api.similar_schema_name_to(res)

        print("RES size: " + str(res.size()))
        for el in res:
            print(str(el))

    def test_similar_content_to_field(self):
        print(self._testMethodName)

        field = ('mitdwh', 'Buildings.csv', 'Building Name')
        res = self.api.similar_content_to_field(field)

        print("RES size: " + str(res.size()))
        for el in res:
            print(str(el))

    def test_similar_content_to_table(self):
        print(self._testMethodName)

        table = 'Buildings.csv'
        res = self.api.similar_content_to_table(table)

        print("RES size: " + str(res.size()))
        for el in res:
            print(str(el))

    def test_similar_content_to(self):
        print(self._testMethodName)

        field = ('mitdwh', 'Buildings.csv', 'Building Name')
        res = self.api.similar_content_to_field(field)

        res = self.api.similar_content_to(res)

        print("RES size: " + str(res.size()))
        for el in res:
            print(str(el))

    def test_pkfk_field(self):
        print(self._testMethodName)

        field = ('mitdwh', 'Buildings.csv', 'Building Name')
        res = self.api.pkfk_field(field)

        print("RES size: " + str(res.size()))
        for el in res:
            print(str(el))

    def test_pkfk_table(self):
        print(self._testMethodName)

        table = 'Buildings.csv'
        res = self.api.pkfk_table(table)

        print("RES size: " + str(res.size()))
        for el in res:
            print(str(el))

    def test_pkfk_of(self):
        print(self._testMethodName)

        field = ('mitdwh', 'Buildings.csv', 'Building Name')
        res = self.api.pkfk_field(field)

        res = self.api.pkfk_of(res)

        print("RES size: " + str(res.size()))
        for el in res:
            print(str(el))

    """
    Combiner API
    """

    def test_intersection(self):
        print(self._testMethodName)

        res1 = self.api.keyword_search("Madden", max_results=10)
        res2 = self.api.keyword_search("Stonebraker", max_results=10)

        res = res1.intersection(res2)

        for el in res:
            print(str(el))

    def test_union(self):
        print(self._testMethodName)

        res1 = self.api.keyword_search("Madden", max_results=10)
        res2 = self.api.schema_name_search("Stonebraker", max_results=10)

        res = res1.union(res2)

        for el in res:
            print(str(el))

    def test_difference(self):
        print(self._testMethodName)

        res1 = self.api.keyword_search("Madden", max_results=10)
        res2 = self.api.keyword_search("Stonebraker", max_results=10)

        res = res1.set_difference(res2)

        for el in res:
            print(str(el))

    """
    Other, bugs, etc
    """

    def test_iter_edges_with_data_bug(self):
        table = "Fac_building.csv"  # The table of interest
        # We get the representation of that table in DRS
        table_drs = self.api.drs_from_table(table)
        # similar tables are those with similar content
        content_similar = self.api.similar_content_to(table_drs)
        schema_similar = self.api.similar_schema_name_to(
            table_drs)  # similar attribute names
        # some pkfk relationship involved too
        pkfk_similar = self.api.pkfk_of(table_drs)
        # similar tables are similar in content and schema
        inters1 = self.api.intersection(content_similar, schema_similar)
        similar_tables = self.api.intersection(inters1, pkfk_similar)
        similar_tables.print_tables()