Python transform Examples, frictionless.transform Python Examples

Example #1

0

Show file

def test_transform_rename_move_field_issue_953():
    target = transform(
        data=[
            {"id": 1, "name": "germany", "population": 83},
            {"id": 2, "name": "france", "population": 66},
            {"id": 3, "name": "spain", "population": 47},
        ],
        steps=[
            steps.table_normalize(),
            steps.field_update(name="name", new_name="country"),
            steps.field_move(name="country", position=3),
        ],
    )
    assert target.schema == {
        "fields": [
            {"name": "id", "type": "integer"},
            {"name": "population", "type": "integer"},
            {"name": "country", "type": "string"},
        ]
    }
    assert target.read_rows() == [
        {"id": 1, "population": 83, "country": "germany"},
        {"id": 2, "population": 66, "country": "france"},
        {"id": 3, "population": 47, "country": "spain"},
    ]

Example #2

0

Show file

File: test_resource.py Project: pierredittgen/frictionless-py

def test_step_resource_add():
    source = Package("data/package/datapackage.json")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.resource_remove(name="data2"),
            steps.resource_add(name="data2", path="data2.csv"),
        ],
    )
    assert target.resource_names == ["data", "data2"]
    assert target.get_resource("data2").read_rows() == [
        {
            "parent": "A3001",
            "comment": "comment1"
        },
        {
            "parent": "A3001",
            "comment": "comment2"
        },
        {
            "parent": "A5032",
            "comment": "comment3"
        },
    ]

Example #3

0

Show file

def test_step_table_transpose():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_transpose(),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "germany",
                "type": "integer"
            },
            {
                "name": "france",
                "type": "integer"
            },
            {
                "name": "spain",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [{
        "name": "population",
        "germany": 83,
        "france": 66,
        "spain": 47
    }]

Example #4

0

Show file

File: test_row_filter.py Project: frictionlessdata/frictionless-py

def test_step_row_filter_petl_selectrangeclosed():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(formula="1 < id < 3"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]

Example #5

0

Show file

File: test_row_filter.py Project: frictionlessdata/frictionless-py

def test_step_row_filter_petl_selectnone():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_filter(formula="id is None"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == []

Example #6

0

Show file

def test_step_row_filter_petl_rowlenselect():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_filter(predicat=lambda row: len(row) == 3),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]

Example #7

0

Show file

def test_step_row_subset_duplicates_with_name():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.field_update(name="id", value=1),
            steps.row_subset(subset="duplicates", field_name="id"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 1,
            "name": "france",
            "population": 66
        },
        {
            "id": 1,
            "name": "spain",
            "population": 47
        },
    ]

Example #8

0

Show file

File: test_row.py Project: roll/frictionless-py

def test_step_row_subset_unique_with_name():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_update(name="id", value=1),
            steps.row_subset(subset="unique", field_name="id"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == []

Example #9

0

Show file

File: test_table.py Project: kant/frictionless-py

def test_step_table_diff_with_ignore_order():
    source = Resource(path="data/transform.csv")
    source.infer(only_sample=True)
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_diff(
                resource=Resource(data=[
                    ["name", "id", "population"],
                    ["germany", 1, 83],
                    ["france", 2, 50],
                    ["spain", 3, 47],
                ]),
                ignore_order=True,
            ),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]

Example #10

0

Show file

File: test_row.py Project: roll/frictionless-py

def test_step_row_subset_distinct_with_duplicates():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_update(name="id", value=1),
            steps.row_subset(subset="distinct", field_name="id"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
    ]

Example #11

0

Show file

File: test_row.py Project: roll/frictionless-py

def test_step_row_subset_duplicates():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_subset(subset="duplicates"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == []

Example #12

0

Show file

File: test_row.py Project: roll/frictionless-py

def test_step_row_subset_conflicts_from_descriptor_issue_996():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_subset({
                "subset": "conflicts",
                "fieldName": "id"
            }),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == []

Example #13

0

Show file

File: test_row.py Project: roll/frictionless-py

def test_step_row_slice_with_start_and_step():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_slice(start=1, stop=3, step=2),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]

Example #14

0

Show file

def test_transform_package():
    source = describe("data/chunk*.csv")
    target = transform(
        source,
        steps=[
            steps.resource_transform(
                name="chunk1",
                steps=[
                    steps.table_merge(resource="chunk2"),
                ],
            ),
            steps.resource_remove(name="chunk2"),
        ],
    )
    assert target.resource_names == ["chunk1"]
    assert target.get_resource("chunk1").read_rows() == [
        {
            "id": 1,
            "name": "english"
        },
        {
            "id": 2,
            "name": "中国人"
        },
    ]

Example #15

0

Show file

def test_step_row_ungroup_max():
    source = Resource(path="data/transform-groups.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_ungroup(group_name="name",
                              selection="max",
                              value_name="population"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 3,
            "name": "france",
            "population": 66,
            "year": 2020
        },
        {
            "id": 1,
            "name": "germany",
            "population": 83,
            "year": 2020
        },
        {
            "id": 5,
            "name": "spain",
            "population": 47,
            "year": 2020
        },
    ]

Example #16

0

Show file

File: test_table.py Project: kant/frictionless-py

def test_step_table_intersect_with_use_hash():
    source = Resource(path="data/transform.csv")
    source.infer(only_sample=True)
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_intersect(
                resource=Resource(data=[
                    ["id", "name", "population"],
                    [1, "germany", 83],
                    [2, "france", 50],
                    [3, "spain", 47],
                ]),
                use_hash=True,
            ),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]

Example #17

0

Show file

def test_step_row_filter_petl_selectrangeopen():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(predicat="<formula>1 <= id <= 3"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]

Example #18

0

Show file

File: test_table.py Project: kant/frictionless-py

def test_step_table_aggregate():
    source = Resource(path="data/transform-groups.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_aggregate(group_name="name",
                                  aggregation={"sum": ("population", sum)}),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "sum"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "name": "france",
            "sum": 120
        },
        {
            "name": "germany",
            "sum": 160
        },
        {
            "name": "spain",
            "sum": 80
        },
    ]

Example #19

0

Show file

def test_step_row_sort_with_reverse():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_sort(field_names=["id"], reverse=True),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
    ]

Example #20

0

Show file

def test_step_row_search():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_search(regex=r"^f.*"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]

Example #21

0

Show file

def test_step_row_subset_unique():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_subset(subset="unique"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]

Example #22

0

Show file

File: test_field.py Project: yyht/frictionless-py

def test_step_field_filter():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_filter(names=["id", "name"]),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany"
        },
        {
            "id": 2,
            "name": "france"
        },
        {
            "id": 3,
            "name": "spain"
        },
    ]

Example #23

0

Show file

File: test_row_filter.py Project: frictionlessdata/frictionless-py

def test_step_row_filter_petl_selectisfalse():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_filter(function=lambda row: not bool(row["id"])),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == []

Example #24

0

Show file

File: test_field.py Project: yyht/frictionless-py

def test_step_field_remove():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_remove(names=["id"]),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "name": "germany",
            "population": 83
        },
        {
            "name": "france",
            "population": 66
        },
        {
            "name": "spain",
            "population": 47
        },
    ]

Example #25

0

Show file

File: test_row_filter.py Project: frictionlessdata/frictionless-py

def test_step_row_filter_petl_selectop():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(formula="id == 1"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
    ]

Example #26

0

Show file

def test_step_table_join_mode_outer():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_join(
                resource=Resource(
                    data=[["id", "note"], [1, "beer"], [4, "rum"]]),
                field_name="id",
                mode="outer",
            ),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
            {
                "name": "note",
                "type": "string"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83,
            "note": "beer"
        },
        {
            "id": 2,
            "name": "france",
            "population": 66,
            "note": None
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47,
            "note": None
        },
        {
            "id": 4,
            "name": None,
            "population": None,
            "note": "rum"
        },
    ]

Example #27

0

Show file

def test_step_table_recast():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_melt(field_name="id"),
            steps.table_recast(field_name="id"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]

Example #28

0

Show file

def test_step_row_ungroup_last():
    source = Resource(path="data/transform-groups.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_ungroup(group_name="name", selection="last"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 4,
            "name": "france",
            "population": 54,
            "year": 1920
        },
        {
            "id": 2,
            "name": "germany",
            "population": 77,
            "year": 1920
        },
        {
            "id": 6,
            "name": "spain",
            "population": 33,
            "year": 1920
        },
    ]

Example #29

0

Show file

def test_step_table_diff_with_ignore_order():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_diff(
                resource=Resource(data=[
                    ["name", "id", "population"],
                    ["germany", "1", "83"],
                    ["france", "2", "50"],
                    ["spain", "3", "47"],
                ]),
                ignore_order=True,
            ),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]

Example #30

0

Show file

def test_step_row_subset_distinct():
    source = Resource(path="data/transform.csv")
    source.infer(only_sample=True)
    target = transform(
        source,
        steps=[
            steps.row_subset(subset="distinct", field_name="id"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]