コード例 #1
0
def test_step_row_subset_unique_with_name():
    source = Resource(path="data/transform.csv")
    source.infer(only_sample=True)
    target = transform(
        source,
        steps=[
            steps.field_update(name="id", value=1),
            steps.row_subset(subset="unique", field_name="id"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == []
コード例 #2
0
def test_transform_resource():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = source.transform(steps=[
        steps.table_normalize(),
        steps.table_melt(field_name="id"),
    ], )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "variable"
            },
            {
                "name": "value"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "variable": "name",
            "value": "germany"
        },
        {
            "id": 1,
            "variable": "population",
            "value": 83
        },
        {
            "id": 2,
            "variable": "name",
            "value": "france"
        },
        {
            "id": 2,
            "variable": "population",
            "value": 66
        },
        {
            "id": 3,
            "variable": "name",
            "value": "spain"
        },
        {
            "id": 3,
            "variable": "population",
            "value": 47
        },
    ]
コード例 #3
0
def test_step_row_search_with_name():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_search(regex=r"^f.*", field_name="name"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 2, "name": "france", "population": 66},
    ]
コード例 #4
0
def test_step_row_filter_petl_selectcontains():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_filter(formula="'er' in name"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83},
    ]
コード例 #5
0
def test_sql_parser_write_string_pk_issue_777_mysql(mysql_url):
    source = Resource("data/table.csv")
    source.infer()
    source.schema.primary_key = ["name"]
    source.schema.get_field("name").constraints["maxLength"] = 100
    target = source.write(mysql_url, dialect=SqlDialect(table="name"))
    with target:
        assert target.schema.primary_key == ["name"]
        assert target.header == ["id", "name"]
        assert target.read_rows() == [
            {"id": 1, "name": "english"},
            {"id": 2, "name": "中国人"},
        ]
コード例 #6
0
def test_step_row_slice_with_start_and_step():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_slice(start=1, stop=3, step=2),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 2, "name": "france", "population": 66},
    ]
コード例 #7
0
def test_step_row_search_with_negate():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_search(regex=r"^f.*", negate=True),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83},
        {"id": 3, "name": "spain", "population": 47},
    ]
コード例 #8
0
def test_step_row_slice_with_head():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_slice(head=2),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83},
        {"id": 2, "name": "france", "population": 66},
    ]
コード例 #9
0
def test_step_row_filter_petl_selectnoin():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(formula="id not in [2, 3]"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83},
    ]
コード例 #10
0
def test_step_row_filter_petl_selectrangeclosed():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(formula="1 < id < 3"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 2, "name": "france", "population": 66},
    ]
コード例 #11
0
def test_step_row_filter_petl_selectgt():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(formula="id > 2"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 3, "name": "spain", "population": 47},
    ]
コード例 #12
0
def test_step_row_slice_with_tail():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_slice(tail=2),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 2, "name": "france", "population": 66},
        {"id": 3, "name": "spain", "population": 47},
    ]
コード例 #13
0
def test_step_row_subset_distinct_with_duplicates():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.field_update(name="id", value=1),
            steps.row_subset(subset="distinct", field_name="id"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83},
    ]
コード例 #14
0
def test_step_row_filter_petl_rowlenselect():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_filter(function=lambda row: len(row) == 3),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83},
        {"id": 2, "name": "france", "population": 66},
        {"id": 3, "name": "spain", "population": 47},
    ]
コード例 #15
0
def test_step_row_filter_with_function():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(function=lambda row: row["id"] > 1),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 2, "name": "france", "population": 66},
        {"id": 3, "name": "spain", "population": 47},
    ]
コード例 #16
0
def test_step_row_ungroup_last():
    source = Resource(path="data/transform-groups.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_ungroup(group_name="name", selection="last"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 4, "name": "france", "population": 54, "year": 1920},
        {"id": 2, "name": "germany", "population": 77, "year": 1920},
        {"id": 6, "name": "spain", "population": 33, "year": 1920},
    ]
コード例 #17
0
def test_step_row_subset_unique():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_subset(subset="unique"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83},
        {"id": 2, "name": "france", "population": 66},
        {"id": 3, "name": "spain", "population": 47},
    ]
コード例 #18
0
def test_step_row_filter_petl_selectisnone():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_filter(formula="id is not None"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83},
        {"id": 2, "name": "france", "population": 66},
        {"id": 3, "name": "spain", "population": 47},
    ]
コード例 #19
0
def test_step_row_sort_with_reverse():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_sort(field_names=["id"], reverse=True),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 3, "name": "spain", "population": 47},
        {"id": 2, "name": "france", "population": 66},
        {"id": 1, "name": "germany", "population": 83},
    ]
コード例 #20
0
def test_step_table_validate():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.cell_set(field_name="population", value="bad"),
            steps.table_validate(),
        ],
    )
    assert target.schema == source.schema
    with pytest.raises(FrictionlessException) as excinfo:
        target.read_rows()
    error = excinfo.value.error
    assert error.code == "step-error"
    assert error.note.count('type is "integer/default"')
コード例 #21
0
def test_step_row_ungroup_max():
    source = Resource(path="data/transform-groups.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_ungroup(
                group_name="name", selection="max", value_name="population"
            ),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 3, "name": "france", "population": 66, "year": 2020},
        {"id": 1, "name": "germany", "population": 83, "year": 2020},
        {"id": 5, "name": "spain", "population": 47, "year": 2020},
    ]
コード例 #22
0
def test_step_row_filter_petl_selecteq():
    source = Resource(path="data/transform.csv")
    source.infer(only_sample=True)
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(predicat="<formula>id == 1"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
    ]
コード例 #23
0
def test_step_row_split():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_split(field_name="name", pattern="a"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 1, "name": "germ", "population": 83},
        {"id": 1, "name": "ny", "population": 83},
        {"id": 2, "name": "fr", "population": 66},
        {"id": 2, "name": "nce", "population": 66},
        {"id": 3, "name": "sp", "population": 47},
        {"id": 3, "name": "in", "population": 47},
    ]
コード例 #24
0
def test_resource_infer_source_non_tabular():
    resource = Resource(path="data/text.txt")
    resource.infer(stats=True)
    assert resource.metadata_valid
    if IS_UNIX:
        assert resource == {
            "name": "text",
            "path": "data/text.txt",
            "profile": "data-resource",
            "scheme": "file",
            "format": "txt",
            "hashing": "md5",
            "encoding": "utf-8",
            "stats": {
                "hash": "e1cbb0c3879af8347246f12c559a86b5",
                "bytes": 5,
            },
        }
コード例 #25
0
def test_resource_infer_source_non_tabular():
    resource = Resource(path="data/text.txt")
    resource.infer()
    print(resource.metadata_errors)
    assert resource.metadata_valid
    assert resource == {
        "name": "text",
        "path": "data/text.txt",
        "hash": "e1cbb0c3879af8347246f12c559a86b5",
        "bytes": 5,
        "rows": 0,
        "profile": "data-resource",
        "scheme": "file",
        "format": "txt",
        "hashing": "md5",
        "encoding": "utf-8",
        "compression": "no",
        "compressionPath": "",
    }
コード例 #26
0
ファイル: test_resource.py プロジェクト: kant/frictionless-py
def test_resource_infer():
    resource = Resource(path="data/table.csv")
    resource.infer()
    assert resource.metadata_valid
    assert resource == {
        "path": "data/table.csv",
        "profile": "tabular-data-resource",
        "name": "table",
        "scheme": "file",
        "format": "csv",
        "hashing": "md5",
        "encoding": "utf-8",
        "compression": "no",
        "compressionPath": "",
        "control": {
            "newline": ""
        },
        "dialect": {},
        "query": {},
        "schema": {
            "fields": [
                {
                    "name": "id",
                    "type": "integer"
                },
                {
                    "name": "name",
                    "type": "string"
                },
            ]
        },
        "stats": {
            "hash": "6c2c61dd9b0e9c6876139a449ed87933",
            "bytes": 30,
            "fields": 2,
            "rows": 2,
        },
    }
コード例 #27
0
ファイル: test_resource.py プロジェクト: kant/frictionless-py
def test_resource_source_multipart_infer():
    descriptor = {"path": ["data/chunk1.csv", "data/chunk2.csv"]}
    resource = Resource(descriptor)
    resource.infer()
    assert resource == {
        "path": ["data/chunk1.csv", "data/chunk2.csv"],
        "profile": "tabular-data-resource",
        "name": "memory",
        "scheme": "stream",
        "format": "csv",
        "hashing": "md5",
        "encoding": "utf-8",
        "compression": "no",
        "compressionPath": "",
        "control": {
            "newline": ""
        },
        "dialect": {},
        "query": {},
        "schema": {
            "fields": [
                {
                    "name": "id",
                    "type": "integer"
                },
                {
                    "name": "name",
                    "type": "string"
                },
            ]
        },
        "stats": {
            "hash": "6c2c61dd9b0e9c6876139a449ed87933",
            "bytes": 30,
            "fields": 2,
            "rows": 2,
        },
    }
コード例 #28
0
def test_step_table_diff():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_diff(resource=Resource(data=[
                ["id", "name", "population"],
                [1, "germany", 83],
                [2, "france", 50],
                [3, "spain", 47],
            ])),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]
コード例 #29
0
def test_step_row_filter_petl_selectne():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(predicat="<formula>id != 1"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
コード例 #30
0
def test_step_row_filter_petl_selectrangeopenleft():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(predicat="<formula>1 <= id < 3"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]