def test_resource_write(tmpdir):
    path1 = "data/table.csv"
    path2 = str(tmpdir.join("table.csv"))
    source = Resource(path=path1)
    source.write(path2)
    target = Resource(path=path2, trusted=True)
    assert target.read_header() == ["id", "name"]
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "english"
        },
        {
            "id": 2,
            "name": "中国人"
        },
    ]
Example #2
0
def test_xls_parser_sheet_by_index():
    source = "data/sheet2.xls"
    dialect = ExcelDialect(sheet=2)
    with Resource(source, dialect=dialect) as resource:
        assert resource.header == ["id", "name"]
        assert resource.read_rows() == [
            {
                "id": 1,
                "name": "english"
            },
            {
                "id": 2,
                "name": "中国人"
            },
        ]
def test_resource_expand_with_dialect():
    dialect = {"delimiter": "custom"}
    resource = Resource({
        "name": "name",
        "path": "data/table.csv",
        "dialect": dialect
    })
    resource.expand()
    assert resource == {
        "name": "name",
        "path": "data/table.csv",
        "profile": "data-resource",
        "dialect": {
            "header": True,
            "headerRows": [1],
            "headerJoin": " ",
            "headerCase": True,
            "delimiter": "custom",
            "lineTerminator": "\r\n",
            "doubleQuote": True,
            "quoteChar": '"',
            "skipInitialSpace": False,
        },
    }
Example #4
0
def test_step_field_add_with_formula():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.field_add(name="calc", formula="id * 100 + population"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
            {
                "name": "calc"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83,
            "calc": 183
        },
        {
            "id": 2,
            "name": "france",
            "population": 66,
            "calc": 266
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47,
            "calc": 347
        },
    ]
Example #5
0
def test_step_field_add():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_add(name="note", type="string", value="eu"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
            {
                "name": "note",
                "type": "string"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83,
            "note": "eu"
        },
        {
            "id": 2,
            "name": "france",
            "population": 66,
            "note": "eu"
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47,
            "note": "eu"
        },
    ]
Example #6
0
def test_step_row_ungroup_last():
    source = Resource("data/transform-groups.csv")
    target = transform(
        source,
        steps=[
            steps.row_ungroup(group_name="name", selection="last"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
            {
                "name": "year",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 4,
            "name": "france",
            "population": 54,
            "year": 1920
        },
        {
            "id": 2,
            "name": "germany",
            "population": 77,
            "year": 1920
        },
        {
            "id": 6,
            "name": "spain",
            "population": 33,
            "year": 1920
        },
    ]
Example #7
0
def test_resource_source_non_tabular_remote():
    path = BASEURL % "data/text.txt"
    with Resource(path) as resource:
        assert resource.path == path
        assert resource.data is None
        assert resource.memory is False
        assert resource.tabular is False
        assert resource.multipart is False
        assert resource.basepath == ""
        assert resource.fullpath == path
        if IS_UNIX:
            assert resource.read_bytes() == b"text\n"
            assert resource.stats == {
                "hash": "e1cbb0c3879af8347246f12c559a86b5",
                "bytes": 5,
            }
Example #8
0
def test_resource_relative_parent_path_with_trusted_option_issue_171():
    # trusted=false (default)
    resource = Resource(path="data/../data/table.csv")
    with pytest.raises(exceptions.FrictionlessException) as excinfo:
        resource.read_rows()
    error = excinfo.value.error
    assert error.code == "resource-error"
    assert error.note.count("data/table.csv")
    # trusted=true
    resource = Resource(path="data/../data/table.csv", trusted=True)
    assert resource.read_rows() == [
        {"id": 1, "name": "english"},
        {"id": 2, "name": "中国人"},
    ]
Example #9
0
def test_resource_to_zip(tmpdir):

    # Write
    target = os.path.join(tmpdir, "dataresource.zip")
    resource = Resource("data/resource.json")
    resource.to_zip(target)

    # Read
    resource = Resource(target)
    assert resource == {"name": "name", "path": "table.csv"}
    assert resource.read_rows() == [
        {"id": 1, "name": "english"},
        {"id": 2, "name": "中国人"},
    ]
Example #10
0
def test_xlsx_parser_preserve_formatting_number_multicode():
    source = "data/number-format-multicode.xlsx"
    dialect = ExcelDialect(preserve_formatting=True)
    layout = Layout(skip_fields=["<blank>"])
    with Resource(source, dialect=dialect, layout=layout) as resource:
        assert resource.read_rows() == [
            {
                "col1": Decimal("4.5")
            },
            {
                "col1": Decimal("-9.032")
            },
            {
                "col1": Decimal("15.8")
            },
        ]
Example #11
0
def test_type_custom(custom_plugin):
    schema = Schema(fields=[
        Field(name="integer", type="integer"),
        Field(name="custom", type="custom"),
    ])
    with Resource(path="data/table.csv", schema=schema) as resource:
        assert resource.read_rows() == [
            {
                "integer": 1,
                "custom": ["english"]
            },
            {
                "integer": 2,
                "custom": ["中国人"]
            },
        ]
Example #12
0
def test_csv_parser_excel_tab():
    source = "header1\theader2\nvalue1\tvalue2\nvalue3\tvalue4"
    dialect = CsvDialect(delimiter="\t")
    with Resource(source, scheme="text", format="csv",
                  dialect=dialect) as resource:
        assert resource.header == ["header1", "header2"]
        assert resource.read_rows() == [
            {
                "header1": "value1",
                "header2": "value2"
            },
            {
                "header1": "value3",
                "header2": "value4"
            },
        ]
Example #13
0
def test_csv_parser_skipinitialspace():
    source = "header1, header2\nvalue1, value2\nvalue3, value4"
    dialect = CsvDialect(skip_initial_space=False)
    with Resource(source, scheme="text", format="csv",
                  dialect=dialect) as resource:
        assert resource.header == ["header1", "header2"]
        assert resource.read_rows() == [
            {
                "header1": "value1",
                "header2": " value2"
            },
            {
                "header1": "value3",
                "header2": " value4"
            },
        ]
Example #14
0
def test_csv_parser_delimiter():
    source = '"header1";"header2"\n"value1";"value2"\n"value3";"value4"'
    dialect = CsvDialect(delimiter=";")
    with Resource(source, scheme="text", format="csv",
                  dialect=dialect) as resource:
        assert resource.header == ["header1", "header2"]
        assert resource.read_rows() == [
            {
                "header1": "value1",
                "header2": "value2"
            },
            {
                "header1": "value3",
                "header2": "value4"
            },
        ]
Example #15
0
def test_inline_parser_keyed_with_keys_provided():
    source = [{"id": "1", "name": "english"}, {"id": "2", "name": "中国人"}]
    dialect = InlineDialect(keys=["name", "id"])
    with Resource(source, format="inline", dialect=dialect) as resource:
        assert resource.dialect.keyed is True
        assert resource.header == ["name", "id"]
        assert resource.read_rows() == [
            {
                "id": 1,
                "name": "english"
            },
            {
                "id": 2,
                "name": "中国人"
            },
        ]
Example #16
0
def test_json_parser_from_buffer_keyed():
    source = '[{"id": 1, "name": "english" }, {"id": 2, "name": "中国人" }]'.encode(
        "utf-8")
    with Resource(source, format="json") as resource:
        assert resource.dialect.keyed is True
        assert resource.header == ["id", "name"]
        assert resource.read_rows() == [
            {
                "id": 1,
                "name": "english"
            },
            {
                "id": 2,
                "name": "中国人"
            },
        ]
def test_resource_layout_header_csv_multiline_headers_duplicates():
    source = b"k1\nk1\nv1\nv2\nv3"
    layout = Layout(header_rows=[1, 2])
    with Resource(source, format="csv", layout=layout) as resource:
        assert resource.header == ["k1"]
        assert resource.read_rows() == [
            {
                "k1": "v1"
            },
            {
                "k1": "v2"
            },
            {
                "k1": "v3"
            },
        ]
Example #18
0
def test_step_row_subset_conflicts():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_subset(subset="conflicts", field_name="id"),
        ],
    )
    assert target.schema == {
        "fields": [
            {"name": "id", "type": "integer"},
            {"name": "name", "type": "string"},
            {"name": "population", "type": "integer"},
        ]
    }
    assert target.read_rows() == []
Example #19
0
def test_resource_official_hash_bytes_rows_with_hashing_algorithm():
    resource = Resource({
        "path": "path",
        "hash": "sha256:hash",
        "bytes": 1,
        "rows": 1
    })
    assert resource == {
        "path": "path",
        "hashing": "sha256",
        "stats": {
            "hash": "hash",
            "bytes": 1,
            "rows": 1,
        },
    }
def test_resource_open_without_rows():
    with Resource("data/without-rows.csv") as resource:
        assert resource.header == ["id", "name"]
        assert resource.read_rows() == []
        assert resource.schema == {
            "fields": [
                {
                    "name": "id",
                    "type": "any"
                },
                {
                    "name": "name",
                    "type": "any"
                },
            ]
        }
Example #21
0
def test_step_row_subset_conflicts_from_descriptor_issue_996():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_subset({"subset": "conflicts", "fieldName": "id"}),
        ],
    )
    assert target.schema == {
        "fields": [
            {"name": "id", "type": "integer"},
            {"name": "name", "type": "string"},
            {"name": "population", "type": "integer"},
        ]
    }
    assert target.read_rows() == []
Example #22
0
def test_step_field_unpack():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_update(name="id", type="array", value=[1, 1]),
            steps.field_unpack(name="id", to_names=["id2", "id3"]),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
            {
                "name": "id2"
            },
            {
                "name": "id3"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "name": "germany",
            "population": 83,
            "id2": 1,
            "id3": 1
        },
        {
            "name": "france",
            "population": 66,
            "id2": 1,
            "id3": 1
        },
        {
            "name": "spain",
            "population": 47,
            "id2": 1,
            "id3": 1
        },
    ]
Example #23
0
def test_step_field_add_with_incremental():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_add(name="number", incremental=True),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "number"
            },
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "number": 1,
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "number": 2,
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "number": 3,
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Example #24
0
def test_csv_parser_detect_delimiter_tab():
    source = "a1\tb1\tc1A,c1B\na2\tb2\tc2\n"
    layout = Layout(header=False)
    with Resource(source, scheme="text", format="csv",
                  layout=layout) as resource:
        assert resource.read_rows() == [
            {
                "field1": "a1",
                "field2": "b1",
                "field3": "c1A,c1B"
            },
            {
                "field1": "a2",
                "field2": "b2",
                "field3": "c2"
            },
        ]
Example #25
0
def test_resource_schema_from_path_remote():
    resource = Resource(BASE_URL % "data/resource-with-dereferencing.json")
    assert resource == {
        "name": "name",
        "path": "table.csv",
        "dialect": "dialect.json",
        "schema": "schema-simple.json",
    }
    assert resource.schema == {
        "fields": [{
            "name": "id",
            "type": "integer"
        }, {
            "name": "name",
            "type": "string"
        }]
    }
def test_resource_schema_unique_error():
    source = [["name"], [1], [2], [2]]
    detector = Detector(
        schema_patch={"fields": {
            "name": {
                "constraints": {
                    "unique": True
                }
            }
        }})
    with Resource(source, detector=detector) as resource:
        for row in resource:
            if row.row_number == 3:
                assert row.valid is False
                assert row.errors[0].code == "unique-error"
                continue
            assert row.valid
Example #27
0
def test_step_table_join_from_dict():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_join(
                resource=dict(data=[["id", "note"], [1, "beer"], [2, "vine"]]),
                field_name="id",
            ),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
            {
                "name": "note",
                "type": "string"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83,
            "note": "beer"
        },
        {
            "id": 2,
            "name": "france",
            "population": 66,
            "note": "vine"
        },
    ]
Example #28
0
def test_resource_to_zip_source_remote(tmpdir):

    # Write
    path = BASE_URL % "data/table.csv"
    target = os.path.join(tmpdir, "datapackage.zip")
    resource = Resource(name="name", path=path)
    resource.to_zip(target)

    # Read
    resource = Resource(target)
    assert resource == {"name": "name", "path": path}
    assert resource.read_rows() == [
        {"id": 1, "name": "english"},
        {"id": 2, "name": "中国人"},
    ]
Example #29
0
def test_resource_to_zip_source_inline(tmpdir):

    # Write
    target = os.path.join(tmpdir, "dataresource.zip")
    data = [["id", "name"], ["1", "english"], ["2", "中国人"]]
    resource = Resource(name="name", data=data)
    resource.to_zip(target)

    # Read
    resource = Resource(target)
    assert resource == {"name": "name", "data": data}
    assert resource.read_rows() == [
        {"id": 1, "name": "english"},
        {"id": 2, "name": "中国人"},
    ]
Example #30
0
def test_step_row_subset_unique_with_name():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_update(name="id", value=1),
            steps.row_subset(subset="unique", field_name="id"),
        ],
    )
    assert target.schema == {
        "fields": [
            {"name": "id", "type": "integer"},
            {"name": "name", "type": "string"},
            {"name": "population", "type": "integer"},
        ]
    }
    assert target.read_rows() == []