Esempio n. 1
0
def test_suggestions():
    csv_string = """main subject,date qualifier,ugly numbers,numbers with stuff
ethiopia,1998,10 000 000,$100
ethiopia,1997,12 450,100$
panama,11/14/2011, ..,abcde
panama,14/11/2011,"13,400",200.8
italy,"July 4, 2011",100,2.00E+21
italy,"July 4, 2012",200,11"""
    sheet = Sheet.load_sheet_from_csv_string(csv_string)
    selection = {"x1": 3, "x2": 4, "y1": 2, "y2": 7}  #depvar
    annotations = add_annotation_from_suggestion(sheet, selection, [])
    #add property block manually
    annotations.append({
        "selection": {
            "x1": 3,
            "x2": 4,
            "y1": 1,
            "y2": 1
        },
        "role": "property"
    })
    selection = {"x1": 2, "x2": 2, "y1": 2, "y2": 7}  #qualifier
    annotations = add_annotation_from_suggestion(sheet, selection, annotations)
    selection = {"x1": 2, "x2": 2, "y1": 1, "y2": 1}  #qual prop
    annotations = add_annotation_from_suggestion(sheet, selection, annotations)
    selection = {"x1": 1, "x2": 1, "y1": 2, "y2": 7}  #main subject
    annotations = add_annotation_from_suggestion(sheet, selection, annotations)
    a = Annotation(annotations)
Esempio n. 2
0
    def test_custom_properties(self):
        yaml_file = self.yaml_file
        sheet_name = "oecd.csv"
        add_props = add_entities_from_file(self.custom_properties_file)
        assert len(add_props["failed"]) == 0
        sheet = Sheet(self.data_file, sheet_name)
        #convert_old_wikifier_to_new(self.wikifier_file, sheet, self.wikifier_file)

        kg = KnowledgeGraph.generate_from_files(self.data_file, sheet_name,
                                                yaml_file, self.wikifier_file)
        result = kg.get_output("kgtk")
        expected_result_name = "results.tsv"
        csv_args_dict = dict(delimiter="\t",
                             lineterminator="\n",
                             escapechar='',
                             quotechar='',
                             dialect=csv.unix_dialect,
                             quoting=csv.QUOTE_NONE)
        with open(os.path.join(self.expected_result_dir, expected_result_name),
                  'r') as f:
            expected_reader = csv.reader(f, **csv_args_dict)
            fake_result_file = StringIO(result)
            result_reader = csv.reader(fake_result_file, **csv_args_dict)
            for e_line, r_line in zip(expected_reader, result_reader):
                self.assertEqual(e_line, r_line)
Esempio n. 3
0
    def test_wikifier(self):
        import pandas as pd
        from t2wml.api import Wikifier, Sheet
        test_folder = os.path.join(unit_test_folder, "error-catching")
        wikifier_file = os.path.join(test_folder, "wikifier_1.csv")
        sheet = Sheet(os.path.join(test_folder, "input_1.csv"), "input_1.csv")
        #convert_old_wikifier_to_new(wikifier_file, sheet, wikifier_file)

        output_file = os.path.join(test_folder, "test_save_wf")

        wf = Wikifier()
        wf.add_file(wikifier_file)
        df = pd.DataFrame.from_dict({
            "column": ['0'],
            "row": ['5'],
            "value": 'Comoros',
            "item": ['Q99'],
            "context": [''],
            "file": ["input_1.csv"],
            "sheet": ["input_1.csv"]
        })
        wf.add_dataframe(df, replace=True)
        wf.save_to_file(output_file)
        new_wf = Wikifier.load_from_file(output_file)
        assert new_wf.item_table.get_item(0, 3, sheet) == "Q967"
        assert new_wf.item_table.get_item(0, 5, sheet) == "Q99"
Esempio n. 4
0
def test_blocks_from_csv():
    csv_string = """date qualifier,ugly numbers,numbers with stuff
"",ethiopia,panama
1998,10 000 000,$100
1997,12 450,100$
11/14/2011, ..,abcde
14/11/2011,"13,400",200.8
"July 4, 2011",100,2.00E+21
"July 4, 2012",200,11"""

    s = Sheet.load_sheet_from_csv_string(csv_string, header=None)
    guess_annotation(s)
Esempio n. 5
0
    def test_basic_imports(self):
        from t2wml.api import KnowledgeGraph, YamlMapper, Wikifier, Sheet, SpreadsheetFile
        test_folder = os.path.join(unit_test_folder, "error-catching")
        data_file = os.path.join(test_folder, "input_1.csv")
        yaml_file = os.path.join(test_folder, "error.yaml")
        w_file = os.path.join(test_folder, "wikifier_1.csv")

        sheet = Sheet(data_file, "input_1.csv")
        #convert_old_wikifier_to_new(w_file, sheet, w_file)
        ym = YamlMapper(yaml_file)
        wf = Wikifier()
        wf.add_file(w_file)
        kg = KnowledgeGraph.generate(ym, sheet, wf)
Esempio n. 6
0
    def test_error(self):
        yaml_file = self.yaml_file
        sheet_name = "input_1.csv"
        sheet=Sheet(self.data_file, sheet_name)
        #convert_old_wikifier_to_new(self.wikifier_file, sheet, self.wikifier_file+sheet_name+".csv")
        kg = KnowledgeGraph.generate_from_files(
            self.data_file, sheet_name, yaml_file, self.wikifier_file)
        result = kg.get_output("json")
        errors = {to_excel(*key):kg.errors[key] for key in kg.errors}
        result_dict = {"data": json.loads(result), "error": errors}
        expected_result_name = "results.json"
        with open(os.path.join(self.expected_result_dir, expected_result_name), 'r') as f:
            expected_result = json.load(f)

        self.validate_results(result_dict["data"], expected_result["data"])
        self.validate_results(result_dict["error"], expected_result["error"])
Esempio n. 7
0
    def run_test_on_sheet(self, sheet_name):
        yaml_name = sheet_name+".yaml"
        expected_result_name = sheet_name+".json"
        yaml_file = os.path.join(self.yaml_folder, yaml_name)
        sheet=Sheet(self.data_file, sheet_name)
        kg = KnowledgeGraph.generate_from_files(
            self.data_file, sheet_name, yaml_file, self.wikifier_file+sheet_name+".csv")
        result = kg.get_output("json")
        result_dict = json.loads(result)

        # code for saving results in an initial run (insertion-ordered and indented as mercy to future users)
        # with open(os.path.join(self.expected_result_dir, expected_result_name), 'w') as f:
        #    json.dump(result_dict, f, sort_keys=False, indent=4)
        with open(os.path.join(self.expected_result_dir, expected_result_name), 'r') as f:
            expected_result = json.load(f)

        self.validate_results(result_dict, expected_result)
Esempio n. 8
0
    def test_custom_statement_mapper(self):
        from t2wml.mapping.statement_mapper import StatementMapper
        from t2wml.api import KnowledgeGraph, Wikifier, Sheet

        class SimpleSheetMapper(StatementMapper):
            def __init__(self, cols, rows):
                self.cols = cols
                self.rows = rows

            def iterator(self, start_index=0, end_index=None):
                for col in self.cols:
                    for row in self.rows:
                        yield (col, row)

            def get_cell_statement(self, col, row, do_init, sheet, wikifier,
                                   *args, **kwargs):
                error = {}
                statement = {}
                try:
                    item = wikifier.item_table.get_item(col - 1, row, sheet)
                    statement["subject"] = item
                except Exception as e:
                    error["subject"] = str(e)

                try:
                    value = sheet[col, row]
                    statement["value"] = value
                except Exception as e:
                    error["value"] = str(e)

                statement["property"] = "P123"

                return statement, error

        test_folder = os.path.join(unit_test_folder, "custom_classes")
        data_file = os.path.join(test_folder, "Book1.xlsx")
        sheet_name = "Sheet1"
        wikifier_file = os.path.join(test_folder, "wikifier_1.csv")

        ym = SimpleSheetMapper([1, 3], [2, 3, 4, 5, 6, 7])
        sh = Sheet(data_file, sheet_name)
        #convert_old_wikifier_to_new(wikifier_file, sh, wikifier_file)
        wf = Wikifier()
        wf.add_file(wikifier_file)
        kg = KnowledgeGraph.generate(ym, sh, wf)
Esempio n. 9
0
 def test_project_asingle(self):
     from t2wml.api import Project
     project_folder = os.path.join(unit_test_folder, "homicide")
     sp = Project(project_folder)
     sp.add_data_file("homicide_report_total_and_sex.xlsx")
     sp.add_entity_file("homicide_properties.tsv")
     wikifier_file = os.path.join(project_folder,
                                  "unit_wikifier_general.csv")
     sh = Sheet(
         os.path.join(project_folder, "homicide_report_total_and_sex.xlsx"),
         "table-1a")
     #convert_old_wikifier_to_new(wikifier_file, sh, wikifier_file)
     df = pd.read_csv(wikifier_file)
     sp.add_df_to_wikifier_file(sh, df)
     yaml_file = sp.add_yaml_file(os.path.join("t2wml", "table-1a.yaml"))
     sp.associate_yaml_with_sheet(yaml_file,
                                  "homicide_report_total_and_sex.xlsx",
                                  "table-1a")
     save_file = sp.save()
Esempio n. 10
0
 def sheet(self):
     return Sheet(self.data_path, self.sheet_name)
Esempio n. 11
0
def test_block_from_sheet():
    data_file = r"C:\Users\devora\C_sources\pedro\various files\coumntry-wikifier-bug\FreedomHousePressFreedomIndex.csv"
    sheet_name = "FreedomHousePressFreedomIndex"
    s = Sheet(data_file, sheet_name)
    a = guess_annotation(s)