Esempio n. 1
0
def test_transform_rename_move_field_issue_953():
    target = transform(
        data=[
            {"id": 1, "name": "germany", "population": 83},
            {"id": 2, "name": "france", "population": 66},
            {"id": 3, "name": "spain", "population": 47},
        ],
        steps=[
            steps.table_normalize(),
            steps.field_update(name="name", new_name="country"),
            steps.field_move(name="country", position=3),
        ],
    )
    assert target.schema == {
        "fields": [
            {"name": "id", "type": "integer"},
            {"name": "population", "type": "integer"},
            {"name": "country", "type": "string"},
        ]
    }
    assert target.read_rows() == [
        {"id": 1, "population": 83, "country": "germany"},
        {"id": 2, "population": 66, "country": "france"},
        {"id": 3, "population": 47, "country": "spain"},
    ]
def test_step_resource_add():
    source = Package("data/package/datapackage.json")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.resource_remove(name="data2"),
            steps.resource_add(name="data2", path="data2.csv"),
        ],
    )
    assert target.resource_names == ["data", "data2"]
    assert target.get_resource("data2").read_rows() == [
        {
            "parent": "A3001",
            "comment": "comment1"
        },
        {
            "parent": "A3001",
            "comment": "comment2"
        },
        {
            "parent": "A5032",
            "comment": "comment3"
        },
    ]
Esempio n. 3
0
def test_step_table_transpose():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_transpose(),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "germany",
                "type": "integer"
            },
            {
                "name": "france",
                "type": "integer"
            },
            {
                "name": "spain",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [{
        "name": "population",
        "germany": 83,
        "france": 66,
        "spain": 47
    }]
def test_step_row_filter_petl_selectrangeclosed():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(formula="1 < id < 3"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]
def test_step_row_filter_petl_selectnone():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_filter(formula="id is None"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == []
Esempio n. 6
0
def test_step_row_filter_petl_rowlenselect():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_filter(predicat=lambda row: len(row) == 3),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Esempio n. 7
0
def test_step_row_subset_duplicates_with_name():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.field_update(name="id", value=1),
            steps.row_subset(subset="duplicates", field_name="id"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 1,
            "name": "france",
            "population": 66
        },
        {
            "id": 1,
            "name": "spain",
            "population": 47
        },
    ]
Esempio n. 8
0
def test_step_row_subset_unique_with_name():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_update(name="id", value=1),
            steps.row_subset(subset="unique", field_name="id"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == []
Esempio n. 9
0
def test_step_table_diff_with_ignore_order():
    source = Resource(path="data/transform.csv")
    source.infer(only_sample=True)
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_diff(
                resource=Resource(data=[
                    ["name", "id", "population"],
                    ["germany", 1, 83],
                    ["france", 2, 50],
                    ["spain", 3, 47],
                ]),
                ignore_order=True,
            ),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]
Esempio n. 10
0
def test_step_row_subset_distinct_with_duplicates():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_update(name="id", value=1),
            steps.row_subset(subset="distinct", field_name="id"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
    ]
Esempio n. 11
0
def test_step_row_subset_duplicates():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_subset(subset="duplicates"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == []
Esempio n. 12
0
def test_step_row_subset_conflicts_from_descriptor_issue_996():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_subset({
                "subset": "conflicts",
                "fieldName": "id"
            }),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == []
Esempio n. 13
0
def test_step_row_slice_with_start_and_step():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_slice(start=1, stop=3, step=2),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]
Esempio n. 14
0
def test_transform_package():
    source = describe("data/chunk*.csv")
    target = transform(
        source,
        steps=[
            steps.resource_transform(
                name="chunk1",
                steps=[
                    steps.table_merge(resource="chunk2"),
                ],
            ),
            steps.resource_remove(name="chunk2"),
        ],
    )
    assert target.resource_names == ["chunk1"]
    assert target.get_resource("chunk1").read_rows() == [
        {
            "id": 1,
            "name": "english"
        },
        {
            "id": 2,
            "name": "中国人"
        },
    ]
Esempio n. 15
0
def test_step_row_ungroup_max():
    source = Resource(path="data/transform-groups.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_ungroup(group_name="name",
                              selection="max",
                              value_name="population"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 3,
            "name": "france",
            "population": 66,
            "year": 2020
        },
        {
            "id": 1,
            "name": "germany",
            "population": 83,
            "year": 2020
        },
        {
            "id": 5,
            "name": "spain",
            "population": 47,
            "year": 2020
        },
    ]
Esempio n. 16
0
def test_step_table_intersect_with_use_hash():
    source = Resource(path="data/transform.csv")
    source.infer(only_sample=True)
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_intersect(
                resource=Resource(data=[
                    ["id", "name", "population"],
                    [1, "germany", 83],
                    [2, "france", 50],
                    [3, "spain", 47],
                ]),
                use_hash=True,
            ),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Esempio n. 17
0
def test_step_row_filter_petl_selectrangeopen():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(predicat="<formula>1 <= id <= 3"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Esempio n. 18
0
def test_step_table_aggregate():
    source = Resource(path="data/transform-groups.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_aggregate(group_name="name",
                                  aggregation={"sum": ("population", sum)}),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "sum"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "name": "france",
            "sum": 120
        },
        {
            "name": "germany",
            "sum": 160
        },
        {
            "name": "spain",
            "sum": 80
        },
    ]
Esempio n. 19
0
def test_step_row_sort_with_reverse():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_sort(field_names=["id"], reverse=True),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
    ]
Esempio n. 20
0
def test_step_row_search():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_search(regex=r"^f.*"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]
Esempio n. 21
0
def test_step_row_subset_unique():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_subset(subset="unique"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Esempio n. 22
0
def test_step_field_filter():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_filter(names=["id", "name"]),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany"
        },
        {
            "id": 2,
            "name": "france"
        },
        {
            "id": 3,
            "name": "spain"
        },
    ]
def test_step_row_filter_petl_selectisfalse():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_filter(function=lambda row: not bool(row["id"])),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == []
Esempio n. 24
0
def test_step_field_remove():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_remove(names=["id"]),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "name": "germany",
            "population": 83
        },
        {
            "name": "france",
            "population": 66
        },
        {
            "name": "spain",
            "population": 47
        },
    ]
def test_step_row_filter_petl_selectop():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(formula="id == 1"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
    ]
Esempio n. 26
0
def test_step_table_join_mode_outer():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_join(
                resource=Resource(
                    data=[["id", "note"], [1, "beer"], [4, "rum"]]),
                field_name="id",
                mode="outer",
            ),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
            {
                "name": "note",
                "type": "string"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83,
            "note": "beer"
        },
        {
            "id": 2,
            "name": "france",
            "population": 66,
            "note": None
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47,
            "note": None
        },
        {
            "id": 4,
            "name": None,
            "population": None,
            "note": "rum"
        },
    ]
Esempio n. 27
0
def test_step_table_recast():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_melt(field_name="id"),
            steps.table_recast(field_name="id"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Esempio n. 28
0
def test_step_row_ungroup_last():
    source = Resource(path="data/transform-groups.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_ungroup(group_name="name", selection="last"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 4,
            "name": "france",
            "population": 54,
            "year": 1920
        },
        {
            "id": 2,
            "name": "germany",
            "population": 77,
            "year": 1920
        },
        {
            "id": 6,
            "name": "spain",
            "population": 33,
            "year": 1920
        },
    ]
Esempio n. 29
0
def test_step_table_diff_with_ignore_order():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_diff(
                resource=Resource(data=[
                    ["name", "id", "population"],
                    ["germany", "1", "83"],
                    ["france", "2", "50"],
                    ["spain", "3", "47"],
                ]),
                ignore_order=True,
            ),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]
Esempio n. 30
0
def test_step_row_subset_distinct():
    source = Resource(path="data/transform.csv")
    source.infer(only_sample=True)
    target = transform(
        source,
        steps=[
            steps.row_subset(subset="distinct", field_name="id"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]