Example #1
0
def test_resource_source_data():
    data = [["id", "name"], ["1", "english"], ["2", "中国人"]]
    resource = Resource({"data": data})
    assert resource.path is None
    assert resource.data == data
    assert resource.source == data
    assert resource.basepath == ""
    assert resource.inline is True
    assert resource.tabular is True
    assert resource.multipart is False
    assert resource.read_bytes() == b""
    assert resource.read_data() == data[1:]
    assert resource.read_rows() == [
        {
            "id": 1,
            "name": "english"
        },
        {
            "id": 2,
            "name": "中国人"
        },
    ]
    assert resource.read_header() == ["id", "name"]
    assert resource.read_sample() == data[1:]
    assert resource.read_stats() == {
        "hash": "",
        "bytes": 0,
        "fields": 2,
        "rows": 2,
    }
def test_resource_infer_float_numbers():
    data = [["number"], ["1.1"], ["2.2"], ["3.3"]]
    resource = Resource(data=data, infer_float_numbers=True)
    resource.infer()
    assert resource.schema == {
        "fields": [
            {
                "name": "number",
                "type": "number",
                "floatNumber": True
            },
        ]
    }
    assert resource.read_header() == ["number"]
    assert resource.read_rows() == [
        {
            "number": 1.1
        },
        {
            "number": 2.2
        },
        {
            "number": 3.3
        },
    ]
Example #3
0
def test_resource_source_path():
    path = "data/table.csv"
    resource = Resource({"path": path})
    assert resource.path == path
    assert resource.data is None
    assert resource.source == path
    assert resource.basepath == ""
    assert resource.inline is False
    assert resource.tabular is True
    assert resource.multipart is False
    assert (resource.read_bytes() ==
            b"id,name\n1,english\n2,\xe4\xb8\xad\xe5\x9b\xbd\xe4\xba\xba\n")
    assert resource.read_data() == [["1", "english"], ["2", "中国人"]]
    assert resource.read_rows() == [
        {
            "id": 1,
            "name": "english"
        },
        {
            "id": 2,
            "name": "中国人"
        },
    ]
    assert resource.read_header() == ["id", "name"]
    assert resource.read_sample() == [["1", "english"], ["2", "中国人"]]
    assert resource.read_stats() == {
        "hash": "6c2c61dd9b0e9c6876139a449ed87933",
        "bytes": 30,
        "fields": 2,
        "rows": 2,
    }
def test_resource_infer_names():
    resource = Resource(path="data/table.csv", infer_names=["new1", "new2"])
    resource.infer()
    assert resource.schema == {
        "fields": [
            {
                "name": "new1",
                "type": "integer"
            },
            {
                "name": "new2",
                "type": "string"
            },
        ]
    }
    assert resource.read_header() == ["id", "name"]
    assert resource.read_rows() == [
        {
            "new1": 1,
            "new2": "english"
        },
        {
            "new1": 2,
            "new2": "中国人"
        },
    ]
def test_resource_infer_type():
    resource = Resource(path="data/table.csv", infer_type="string")
    resource.infer()
    assert resource.schema == {
        "fields": [
            {
                "name": "id",
                "type": "string"
            },
            {
                "name": "name",
                "type": "string"
            },
        ]
    }
    assert resource.read_header() == ["id", "name"]
    assert resource.read_rows() == [
        {
            "id": "1",
            "name": "english"
        },
        {
            "id": "2",
            "name": "中国人"
        },
    ]
def test_table_schema_patch_schema():
    patch_schema = {"fields": {"id": {"name": "new", "type": "string"}}}
    resource = Resource(path="data/table.csv", patch_schema=patch_schema)
    resource.infer()
    assert resource.schema == {
        "fields": [
            {
                "name": "new",
                "type": "string"
            },
            {
                "name": "name",
                "type": "string"
            },
        ]
    }
    assert resource.read_header() == ["id", "name"]
    assert resource.read_rows() == [
        {
            "new": "1",
            "name": "english"
        },
        {
            "new": "2",
            "name": "中国人"
        },
    ]
def test_resource_sync_schema():
    schema = {
        "fields": [{
            "name": "name",
            "type": "string"
        }, {
            "name": "id",
            "type": "integer"
        }]
    }
    resource = Resource(path="data/sync-schema.csv",
                        schema=schema,
                        sync_schema=True)
    resource.infer()
    assert resource.schema == schema
    assert resource.read_header() == ["name", "id"]
    assert resource.read_sample() == [["english", "1"], ["中国人", "2"]]
    assert resource.read_rows() == [
        {
            "id": 1,
            "name": "english"
        },
        {
            "id": 2,
            "name": "中国人"
        },
    ]
Example #8
0
def test_resource_multipart_write_file(tmpdir):
    source = "data/table.json"
    target = str(tmpdir.join("table{number}.json"))
    target1 = str(tmpdir.join("table1.json"))
    target2 = str(tmpdir.join("table2.json"))

    # Write
    resource = Resource(path=source)
    resource.write(target, scheme="multipart", control={"chunkSize": 80})

    # Read
    resource = Resource(path=[target1, target2], trusted=True)
    resource.read_header() == ["id", "name"]
    assert resource.read_rows() == [
        {"id": 1, "name": "english"},
        {"id": 2, "name": "中国人"},
    ]
def test_missing_header():
    schema = Schema(
        fields=[Field(name="id"),
                Field(name="name"),
                Field(name="extra")])
    resource = Resource(path="data/table.csv", schema=schema)
    header = resource.read_header()
    assert header == ["id", "name"]
    assert header.valid is False
def test_resource_write(tmpdir):
    path1 = "data/table.csv"
    path2 = str(tmpdir.join("table.csv"))
    source = Resource(path=path1)
    source.write(path2)
    target = Resource(path=path2, trusted=True)
    assert target.read_header() == ["id", "name"]
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "english"
        },
        {
            "id": 2,
            "name": "中国人"
        },
    ]
Example #11
0
def test_resource_respect_query_set_after_creation_issue_503():
    resource = Resource(path="data/table.csv")
    resource.query = Query(limit_rows=1)
    assert resource.read_header() == ["id", "name"]
    assert resource.read_rows() == [{"id": 1, "name": "english"}]