def test_benchmark_dbpedia_lookup_subject_columns_only():
    onlyfiles = [
        f for f in listdir(ENTITIES_DIR) if isfile(join(ENTITIES_DIR, f))
    ]
    scidentifier = SCIdentifier()
    num = 0
    while True:
        try:
            _id = onlyfiles[num]
            print("process table %d out of %d" % (num, len(onlyfiles)),
                  flush=True)
            print("table id %s" % (_id), flush=True)
            fixture_entities = get_gold_standard_entities(_id)
            _table = GenericTable(filename=join(TABLES_DIR, _id), _id=_id)
            _table.init()
            _subject_columns = scidentifier.identify_subject_column(_table)
            if _subject_columns:
                _table.subject_column = _subject_columns[0]
            dbpedia_lookup_entities = disambiguate_table_subject_column_only(
                _table)
            to_compare = map_agdistis_entities_to_gold_standard_format(
                _table, dbpedia_lookup_entities)
            print("", flush=True)
            print(fixture_entities, flush=True)
            print("", flush=True)
            print(to_compare, flush=True)
            print(diff_entities(fixture_entities, to_compare), flush=True)
            num += 1
            if (num >= len(onlyfiles)):
                break
        except BaseException as e:
            print(str(e))
Beispiel #2
0
def test_disambiguate_table_subject_column_only_case_1():
    table = GenericTable()
    table.table = CASE_1_TABLE
    table.subject_column = 1
    entities = disambiguate_table_subject_column_only(table)
    import ipdb
    ipdb.set_trace()
Beispiel #3
0
 def get_additional_tables(self):
     tables = []
     subject_column_list = os.path.join(ADDITIONAL_DATA_DIR,
                                        "subject_columns.csv")
     id_list = self.load_csv(subject_column_list)
     for (_id, subject_column) in id_list:
         table_filename = os.path.join(ADDITIONAL_DATA_DIR, "tables", _id)
         table = GenericTable(filename=table_filename, _id=_id)
         table.init()
         table.table = table.table[:int(ROWS_TO_ANALYZE)]
         table.subject_column = int(subject_column)
         tables.append(table)
     return tables