Ejemplo n.º 1
0
def test_validate_order_fields_issue_313():
    source = "data/issue-313.xlsx"
    query = Query(pick_fields=[1, 2, 3, 4, 5])
    schema = {
        "fields": [
            {
                "name": "Column_1",
                "type": "string"
            },
            {
                "name": "Column_2",
                "type": "string",
                "constraints": {
                    "required": True
                }
            },
            {
                "name": "Column_3",
                "type": "string"
            },
            {
                "name": "Column_4",
                "type": "string"
            },
            {
                "name": "Column_5",
                "type": "string"
            },
        ]
    }
    report = validate(source, query=query, schema=schema, sync_schema=True)
    assert report.valid
Ejemplo n.º 2
0
def test_xlsx_parser_adjust_floating_point_error_default():
    source = "data/adjust-floating-point-error.xlsx"
    dialect = ExcelDialect(preserve_formatting=True)
    query = Query(skip_fields=["<blank>"])
    with pytest.warns(UserWarning):
        with Table(source, dialect=dialect, query=query) as table:
            assert table.read_data()[1][2] == 274.65999999999997
Ejemplo n.º 3
0
def test_table_limit_offset_fields():
    query = Query(limit_fields=1, offset_fields=1)
    source = "text://header1,header2,header3\nvalue1,value2,value3"
    with Table(source, format="csv", query=query) as table:
        assert table.header == ["header2"]
        assert table.header.field_positions == [2]
        assert table.read_rows() == [
            {"header2": "value2"},
        ]
Ejemplo n.º 4
0
def test_table_limit_fields_error_zero_issue_521():
    source = "data/long.csv"
    query = Query(limit_fields=0)
    table = Table(source, query=query)
    with pytest.raises(exceptions.FrictionlessException) as excinfo:
        table.open()
    error = excinfo.value.error
    assert error.code == "query-error"
    assert error.note.count('minimum of 1" at "limitFields')
Ejemplo n.º 5
0
def test_table_reset_on_close_issue_190():
    query = Query(limit_rows=1)
    source = [["1", "english"], ["2", "中国人"]]
    table = Table(source, headers=False, query=query)
    table.open()
    table.read_data() == [["1", "english"]]
    table.open()
    table.read_data() == [["1", "english"]]
    table.close()
Ejemplo n.º 6
0
def test_table_skip_fields_position_and_prefix():
    query = Query(skip_fields=[2, "header3"])
    source = "text://header1,header2,header3\nvalue1,value2,value3"
    with Table(source, format="csv", query=query) as table:
        assert table.header == ["header1"]
        assert table.header.field_positions == [1]
        assert table.read_rows() == [
            {"header1": "value1"},
        ]
Ejemplo n.º 7
0
def test_table_skip_fields_blank_header_notation():
    query = Query(skip_fields=["<blank>"])
    source = "text://header1,,header3\nvalue1,value2,value3"
    with Table(source, format="csv", query=query) as table:
        assert table.header == ["header1", "header3"]
        assert table.header.field_positions == [1, 3]
        assert table.read_rows() == [
            {"header1": "value1", "header3": "value3"},
        ]
Ejemplo n.º 8
0
def test_table_offset_rows_error_zero_issue_521():
    source = "data/long.csv"
    query = Query(offset_rows=0)
    table = Table(source, query=query)
    with pytest.raises(FrictionlessException) as excinfo:
        table.open()
    error = excinfo.value.error
    assert error.code == "query-error"
    assert error.note.count('minimum of 1" at "offsetRows')
Ejemplo n.º 9
0
def test_table_skip_fields_regex():
    query = Query(skip_fields=["<regex>header(1|3)"])
    source = "text://header1,header2,header3\nvalue1,value2,value3"
    with Table(source, format="csv", query=query) as table:
        assert table.header == ["header2"]
        assert table.header.field_positions == [2]
        assert table.read_rows() == [
            {"header2": "value2"},
        ]
Ejemplo n.º 10
0
def test_table_write_ods(tmpdir):
    source = "data/table.csv"
    target = str(tmpdir.join("table.ods"))
    with Table(source) as table:
        table.write(target)
    # NOTE: ezodf writer creates more cells than we ask
    query = Query(limit_fields=2, limit_rows=2)
    with Table(target, query=query) as table:
        assert table.header == ["id", "name"]
        assert table.read_data() == [[1, "english"], [2, "中国人"]]
    def read_data_and_validate(self):
        """Reads data from filepath and validates it.

        Using frictionless.
        """

        log.info(f"Reading and validating: {self.filepath}")

        skip_errors = []

        # assert the correct dialect and checks
        header_case = not self.table_params.get("headers-ignore-case", False)
        if "json" in self.metadata["file_format"]:
            expected_headers = [
                c["name"]
                for c in self.metadata["columns"]
                if c not in self.metadata.get("partitions", [])
            ]
            dialect = dialects.JsonDialect(keys=expected_headers)
            if (
                "headers-ignore-case" in self.table_params
                or "expect-header" in self.table_params
            ):
                conf_warn = (
                    "jsonl files do not support header options. If keys "
                    "in json lines do not match up exactly (i.e. case sensitive) "
                    "with meta columns then keys will be nulled"
                )
                log.warning(conf_warn)
        else:  # assumes CSV
            dialect = dialects.Dialect(header_case=header_case)
            if not self.table_params.get("expect-header"):
                skip_errors.append("#head")

        query = None
        row_limit = self.table_params.get("row-limit", False)

        if row_limit:
            query = Query(limit_rows=row_limit)

        if " " in self.filepath:
            raise ValueError("The filepath must not contain a space")

        with Table(self.filepath, dialect=dialect, query=query) as table:
            resp = validate(
                table.row_stream,
                schema=self.schema,
                dialect=dialect,
                skip_errors=skip_errors,
            )

        self.valid = resp.valid
        # Returns a class so lazily converting it to dict
        self.response = dict(resp.tables[0])
Ejemplo n.º 12
0
def test_validate_invalid_limit_rows():
    query = Query(limit_rows=2)
    report = validate("data/invalid.csv", query=query)
    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
        [None, 3, "blank-header"],
        [None, 4, "duplicate-header"],
        [2, 3, "missing-cell"],
        [2, 4, "missing-cell"],
        [3, 3, "missing-cell"],
        [3, 4, "missing-cell"],
    ]
Ejemplo n.º 13
0
def test_table_xlsx_adjust_floating_point_error():
    source = "data/adjust-floating-point-error.xlsx"
    dialect = dialects.ExcelDialect(
        fill_merged_cells=False,
        preserve_formatting=True,
        adjust_floating_point_error=True,
    )
    query = Query(skip_fields=["<blank>"])
    with pytest.warns(UserWarning):
        with Table(source, dialect=dialect, query=query) as table:
            assert table.read_data()[1][2] == 274.66
Ejemplo n.º 14
0
def test_table_skip_rows_regex():
    source = [
        ["# comment"],
        ["name", "order"],
        ["# cat"],
        ["# dog"],
        ["John", 1],
        ["Alex", 2],
    ]
    query = Query(skip_rows=["# comment", r"<regex># (cat|dog)"])
    with Table(source, query=query) as table:
        assert table.header == ["name", "order"]
        assert table.read_data() == [["John", 1], ["Alex", 2]]
Ejemplo n.º 15
0
def test_table_pick_rows_regex():
    source = [
        ["# comment"],
        ["name", "order"],
        ["# cat"],
        ["# dog"],
        ["John", 1],
        ["Alex", 2],
    ]
    query = Query(pick_rows=[r"<regex>(name|John|Alex)"])
    with Table(source, query=query) as table:
        assert table.header == ["name", "order"]
        assert table.read_data() == [["John", 1], ["Alex", 2]]
Ejemplo n.º 16
0
def test_table_skip_rows_preset():
    source = [
        ["name", "order"],
        ["", ""],
        [],
        ["Ray", 0],
        ["John", 1],
        ["Alex", 2],
        ["", 3],
        [None, 4],
        ["", None],
    ]
    query = Query(skip_rows=["<blank>"])
    with Table(source, query=query) as table:
        assert table.header == ["name", "order"]
        assert table.read_data() == [
            ["Ray", 0],
            ["John", 1],
            ["Alex", 2],
            ["", 3],
            [None, 4],
        ]
Ejemplo n.º 17
0
def test_table_skip_rows_non_string_cell_issue_322():
    query = Query(skip_rows=["1"])
    source = [["id", "name"], [1, "english"], [2, "spanish"]]
    with Table(source, query=query) as table:
        assert table.header == ["id", "name"]
        assert table.read_data() == [[2, "spanish"]]
Ejemplo n.º 18
0
def test_resource_to_table_respect_query_issue_503():
    resource = Resource(path="data/table.csv", query=Query(limit_rows=1))
    with resource.to_table() as table:
        assert table.header == ["id", "name"]
        assert table.read_rows() == [{"id": 1, "name": "english"}]
Ejemplo n.º 19
0
def test_resource_respect_query_set_after_creation_issue_503():
    resource = Resource(path="data/table.csv")
    resource.query = Query(limit_rows=1)
    assert resource.read_header() == ["id", "name"]
    assert resource.read_rows() == [{"id": 1, "name": "english"}]
Ejemplo n.º 20
0
def test_xlsx_parser_preserve_formatting_number_multicode():
    source = "data/number-format-multicode.xlsx"
    dialect = ExcelDialect(preserve_formatting=True)
    query = Query(skip_fields=["<blank>"])
    with Table(source, dialect=dialect, query=query) as table:
        assert table.read_data() == [["4.5"], ["-9.032"], ["15.8"]]
Ejemplo n.º 21
0
def test_table_limit_offset_rows():
    source = "data/long.csv"
    query = Query(limit_rows=2, offset_rows=2)
    with Table(source, query=query) as table:
        assert table.header == ["id", "name"]
        assert table.read_data() == [["3", "c"], ["4", "d"]]
Ejemplo n.º 22
0
def test_table_offset_rows():
    source = "data/long.csv"
    query = Query(offset_rows=5)
    with Table(source, query=query) as table:
        assert table.header == ["id", "name"]
        assert table.read_data() == [["6", "f"]]
Ejemplo n.º 23
0
def test_table_limit_rows():
    source = "data/long.csv"
    query = Query(limit_rows=1)
    with Table(source, query=query) as table:
        assert table.header == ["id", "name"]
        assert table.read_data() == [["1", "a"]]
Ejemplo n.º 24
0
def test_table_skip_rows_with_headers_example_from_readme():
    query = Query(skip_rows=["#"])
    source = [["#comment"], ["name", "order"], ["John", 1], ["Alex", 2]]
    with Table(source, query=query) as table:
        assert table.header == ["name", "order"]
        assert table.read_data() == [["John", 1], ["Alex", 2]]
Ejemplo n.º 25
0
def test_table_skip_blank_at_the_end_issue_bco_dmo_33():
    query = Query(skip_rows=["#"])
    source = "data/skip-blank-at-the-end.csv"
    with Table(source, query=query) as table:
        assert table.header == ["test1", "test2"]
        assert table.read_data() == [["1", "2"], []]
Ejemplo n.º 26
0
def test_table_skip_rows_with_headers():
    source = "data/skip-rows.csv"
    query = Query(skip_rows=["#"])
    with Table(source, query=query) as table:
        assert table.header == ["id", "name"]
        assert table.read_data() == [["1", "english"], ["2", "中国人"]]
Ejemplo n.º 27
0
def test_table_skip_rows_excel_empty_column():
    source = "data/skip-rows.xlsx"
    query = Query(skip_rows=[""])
    with Table(source, query=query) as table:
        assert table.read_data() == [["A", "B"], [8, 9]]
Ejemplo n.º 28
0
def test_validate_structure_errors_with_limit_rows():
    query = Query(limit_rows=3)
    report = validate("data/structure-errors.csv", query=query)
    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
        [4, None, "blank-row"],
    ]
Ejemplo n.º 29
0
def test_table_pick_rows_number():
    source = "data/skip-rows.csv"
    query = Query(pick_rows=[3, 5])
    with Table(source, headers=False, query=query) as table:
        assert table.read_data() == [["1", "english"], ["2", "中国人"]]
Ejemplo n.º 30
0
def test_package_resources_respect_query_set_after_creation_issue_503():
    package = Package(resources=[Resource(path="data/table.csv")])
    resource = package.get_resource("table")
    resource.query = Query(limit_rows=1)
    assert resource.read_header() == ["id", "name"]
    assert resource.read_rows() == [{"id": 1, "name": "english"}]