def test_suggestions(): csv_string = """main subject,date qualifier,ugly numbers,numbers with stuff ethiopia,1998,10 000 000,$100 ethiopia,1997,12 450,100$ panama,11/14/2011, ..,abcde panama,14/11/2011,"13,400",200.8 italy,"July 4, 2011",100,2.00E+21 italy,"July 4, 2012",200,11""" sheet = Sheet.load_sheet_from_csv_string(csv_string) selection = {"x1": 3, "x2": 4, "y1": 2, "y2": 7} #depvar annotations = add_annotation_from_suggestion(sheet, selection, []) #add property block manually annotations.append({ "selection": { "x1": 3, "x2": 4, "y1": 1, "y2": 1 }, "role": "property" }) selection = {"x1": 2, "x2": 2, "y1": 2, "y2": 7} #qualifier annotations = add_annotation_from_suggestion(sheet, selection, annotations) selection = {"x1": 2, "x2": 2, "y1": 1, "y2": 1} #qual prop annotations = add_annotation_from_suggestion(sheet, selection, annotations) selection = {"x1": 1, "x2": 1, "y1": 2, "y2": 7} #main subject annotations = add_annotation_from_suggestion(sheet, selection, annotations) a = Annotation(annotations)
def test_custom_properties(self): yaml_file = self.yaml_file sheet_name = "oecd.csv" add_props = add_entities_from_file(self.custom_properties_file) assert len(add_props["failed"]) == 0 sheet = Sheet(self.data_file, sheet_name) #convert_old_wikifier_to_new(self.wikifier_file, sheet, self.wikifier_file) kg = KnowledgeGraph.generate_from_files(self.data_file, sheet_name, yaml_file, self.wikifier_file) result = kg.get_output("kgtk") expected_result_name = "results.tsv" csv_args_dict = dict(delimiter="\t", lineterminator="\n", escapechar='', quotechar='', dialect=csv.unix_dialect, quoting=csv.QUOTE_NONE) with open(os.path.join(self.expected_result_dir, expected_result_name), 'r') as f: expected_reader = csv.reader(f, **csv_args_dict) fake_result_file = StringIO(result) result_reader = csv.reader(fake_result_file, **csv_args_dict) for e_line, r_line in zip(expected_reader, result_reader): self.assertEqual(e_line, r_line)
def test_wikifier(self): import pandas as pd from t2wml.api import Wikifier, Sheet test_folder = os.path.join(unit_test_folder, "error-catching") wikifier_file = os.path.join(test_folder, "wikifier_1.csv") sheet = Sheet(os.path.join(test_folder, "input_1.csv"), "input_1.csv") #convert_old_wikifier_to_new(wikifier_file, sheet, wikifier_file) output_file = os.path.join(test_folder, "test_save_wf") wf = Wikifier() wf.add_file(wikifier_file) df = pd.DataFrame.from_dict({ "column": ['0'], "row": ['5'], "value": 'Comoros', "item": ['Q99'], "context": [''], "file": ["input_1.csv"], "sheet": ["input_1.csv"] }) wf.add_dataframe(df, replace=True) wf.save_to_file(output_file) new_wf = Wikifier.load_from_file(output_file) assert new_wf.item_table.get_item(0, 3, sheet) == "Q967" assert new_wf.item_table.get_item(0, 5, sheet) == "Q99"
def test_blocks_from_csv(): csv_string = """date qualifier,ugly numbers,numbers with stuff "",ethiopia,panama 1998,10 000 000,$100 1997,12 450,100$ 11/14/2011, ..,abcde 14/11/2011,"13,400",200.8 "July 4, 2011",100,2.00E+21 "July 4, 2012",200,11""" s = Sheet.load_sheet_from_csv_string(csv_string, header=None) guess_annotation(s)
def test_basic_imports(self): from t2wml.api import KnowledgeGraph, YamlMapper, Wikifier, Sheet, SpreadsheetFile test_folder = os.path.join(unit_test_folder, "error-catching") data_file = os.path.join(test_folder, "input_1.csv") yaml_file = os.path.join(test_folder, "error.yaml") w_file = os.path.join(test_folder, "wikifier_1.csv") sheet = Sheet(data_file, "input_1.csv") #convert_old_wikifier_to_new(w_file, sheet, w_file) ym = YamlMapper(yaml_file) wf = Wikifier() wf.add_file(w_file) kg = KnowledgeGraph.generate(ym, sheet, wf)
def test_error(self): yaml_file = self.yaml_file sheet_name = "input_1.csv" sheet=Sheet(self.data_file, sheet_name) #convert_old_wikifier_to_new(self.wikifier_file, sheet, self.wikifier_file+sheet_name+".csv") kg = KnowledgeGraph.generate_from_files( self.data_file, sheet_name, yaml_file, self.wikifier_file) result = kg.get_output("json") errors = {to_excel(*key):kg.errors[key] for key in kg.errors} result_dict = {"data": json.loads(result), "error": errors} expected_result_name = "results.json" with open(os.path.join(self.expected_result_dir, expected_result_name), 'r') as f: expected_result = json.load(f) self.validate_results(result_dict["data"], expected_result["data"]) self.validate_results(result_dict["error"], expected_result["error"])
def run_test_on_sheet(self, sheet_name): yaml_name = sheet_name+".yaml" expected_result_name = sheet_name+".json" yaml_file = os.path.join(self.yaml_folder, yaml_name) sheet=Sheet(self.data_file, sheet_name) kg = KnowledgeGraph.generate_from_files( self.data_file, sheet_name, yaml_file, self.wikifier_file+sheet_name+".csv") result = kg.get_output("json") result_dict = json.loads(result) # code for saving results in an initial run (insertion-ordered and indented as mercy to future users) # with open(os.path.join(self.expected_result_dir, expected_result_name), 'w') as f: # json.dump(result_dict, f, sort_keys=False, indent=4) with open(os.path.join(self.expected_result_dir, expected_result_name), 'r') as f: expected_result = json.load(f) self.validate_results(result_dict, expected_result)
def test_custom_statement_mapper(self): from t2wml.mapping.statement_mapper import StatementMapper from t2wml.api import KnowledgeGraph, Wikifier, Sheet class SimpleSheetMapper(StatementMapper): def __init__(self, cols, rows): self.cols = cols self.rows = rows def iterator(self, start_index=0, end_index=None): for col in self.cols: for row in self.rows: yield (col, row) def get_cell_statement(self, col, row, do_init, sheet, wikifier, *args, **kwargs): error = {} statement = {} try: item = wikifier.item_table.get_item(col - 1, row, sheet) statement["subject"] = item except Exception as e: error["subject"] = str(e) try: value = sheet[col, row] statement["value"] = value except Exception as e: error["value"] = str(e) statement["property"] = "P123" return statement, error test_folder = os.path.join(unit_test_folder, "custom_classes") data_file = os.path.join(test_folder, "Book1.xlsx") sheet_name = "Sheet1" wikifier_file = os.path.join(test_folder, "wikifier_1.csv") ym = SimpleSheetMapper([1, 3], [2, 3, 4, 5, 6, 7]) sh = Sheet(data_file, sheet_name) #convert_old_wikifier_to_new(wikifier_file, sh, wikifier_file) wf = Wikifier() wf.add_file(wikifier_file) kg = KnowledgeGraph.generate(ym, sh, wf)
def test_project_asingle(self): from t2wml.api import Project project_folder = os.path.join(unit_test_folder, "homicide") sp = Project(project_folder) sp.add_data_file("homicide_report_total_and_sex.xlsx") sp.add_entity_file("homicide_properties.tsv") wikifier_file = os.path.join(project_folder, "unit_wikifier_general.csv") sh = Sheet( os.path.join(project_folder, "homicide_report_total_and_sex.xlsx"), "table-1a") #convert_old_wikifier_to_new(wikifier_file, sh, wikifier_file) df = pd.read_csv(wikifier_file) sp.add_df_to_wikifier_file(sh, df) yaml_file = sp.add_yaml_file(os.path.join("t2wml", "table-1a.yaml")) sp.associate_yaml_with_sheet(yaml_file, "homicide_report_total_and_sex.xlsx", "table-1a") save_file = sp.save()
def sheet(self): return Sheet(self.data_path, self.sheet_name)
def test_block_from_sheet(): data_file = r"C:\Users\devora\C_sources\pedro\various files\coumntry-wikifier-bug\FreedomHousePressFreedomIndex.csv" sheet_name = "FreedomHousePressFreedomIndex" s = Sheet(data_file, sheet_name) a = guess_annotation(s)