Beispiel #1
0
def test_table_jsonl_write_keyed(tmpdir):
    source = "data/table.csv"
    target = str(tmpdir.join("table.jsonl"))
    dialect = dialects.JsonDialect(keyed=True)
    with Table(source) as table:
        table.write(target, dialect=dialect)
    with Table(target, dialect=dialect) as table:
        assert table.header == ["id", "name"]
        assert table.read_data() == [[1, "english"], [2, "中国人"]]
    def read_data_and_validate(self):
        """Reads data from filepath and validates it.

        Using frictionless.
        """

        log.info(f"Reading and validating: {self.filepath}")

        skip_errors = []

        # assert the correct dialect and checks
        header_case = not self.table_params.get("headers-ignore-case", False)
        if "json" in self.metadata["file_format"]:
            expected_headers = [
                c["name"]
                for c in self.metadata["columns"]
                if c not in self.metadata.get("partitions", [])
            ]
            dialect = dialects.JsonDialect(keys=expected_headers)
            if (
                "headers-ignore-case" in self.table_params
                or "expect-header" in self.table_params
            ):
                conf_warn = (
                    "jsonl files do not support header options. If keys "
                    "in json lines do not match up exactly (i.e. case sensitive) "
                    "with meta columns then keys will be nulled"
                )
                log.warning(conf_warn)
        else:  # assumes CSV
            dialect = dialects.Dialect(header_case=header_case)
            if not self.table_params.get("expect-header"):
                skip_errors.append("#head")

        query = None
        row_limit = self.table_params.get("row-limit", False)

        if row_limit:
            query = Query(limit_rows=row_limit)

        if " " in self.filepath:
            raise ValueError("The filepath must not contain a space")

        with Table(self.filepath, dialect=dialect, query=query) as table:
            resp = validate(
                table.row_stream,
                schema=self.schema,
                dialect=dialect,
                skip_errors=skip_errors,
            )

        self.valid = resp.valid
        # Returns a class so lazily converting it to dict
        self.response = dict(resp.tables[0])
Beispiel #3
0
def test_table_json_write_keyed(tmpdir):
    source = "data/table.csv"
    target = str(tmpdir.join("table.json"))
    dialect = dialects.JsonDialect(keyed=True)
    with Table(source) as table:
        table.write(target, dialect=dialect)
    with open(target) as file:
        assert json.load(file) == [
            {
                "id": 1,
                "name": "english"
            },
            {
                "id": 2,
                "name": "中国人"
            },
        ]
Beispiel #4
0
def test_table_json_keyed_with_keys_provided():
    dialect = dialects.JsonDialect(keys=["name", "id"])
    with Table("data/table.keyed.json", dialect=dialect) as table:
        assert table.dialect.keyed is True
        assert table.header == ["name", "id"]
        assert table.read_data() == [["english", 1], ["中国人", 2]]
Beispiel #5
0
def test_table_dialect_json_property():
    source = '{"root": [["header1", "header2"], ["value1", "value2"]]}'
    dialect = dialects.JsonDialect(property="root")
    with Table(source, scheme="text", format="json", dialect=dialect) as table:
        assert table.header == ["header1", "header2"]
        assert table.read_data() == [["value1", "value2"]]