def test_transform_package(): source = describe("data/chunk*.csv") target = transform( source, steps=[ steps.resource_transform( name="chunk1", steps=[ steps.table_merge(resource="chunk2"), ], ), steps.resource_remove(name="chunk2"), ], ) assert target.resource_names == ["chunk1"] assert target.get_resource("chunk1").read_rows() == [ { "id": 1, "name": "english" }, { "id": 2, "name": "中国人" }, ]
def test_step_table_merge(): # TODO: renamed population header to people source = Resource(path="data/transform.csv") target = transform( source, steps=[ steps.table_merge(resource=Resource( data=[["id", "name", "note"], [4, "malta", "island"]])), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, { "name": "note", "type": "string" }, ] } assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83, "note": None }, { "id": 2, "name": "france", "population": 66, "note": None }, { "id": 3, "name": "spain", "population": 47, "note": None }, { "id": 4, "name": "malta", "population": None, "note": "island" }, ]
def test_step_table_merge_with_sort(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.table_merge( resource=Resource( data=[["id", "name", "population"], [4, "malta", 1]]), sort_by_field=["population"], ), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, ] } assert target.read_rows() == [ { "id": 4, "name": "malta", "population": 1 }, { "id": 3, "name": "spain", "population": 47 }, { "id": 2, "name": "france", "population": 66 }, { "id": 1, "name": "germany", "population": 83 }, ]
def test_step_merge_ignore_fields(): source = Resource(path="data/transform.csv") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[["id2", "name2"], [4, "malta"]]), ignore_fields=True, ), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, ] } assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83 }, { "id": 2, "name": "france", "population": 66 }, { "id": 3, "name": "spain", "population": 47 }, { "id": 4, "name": "malta", "population": None }, ]
def test_step_table_merge_with_field_names(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.table_merge( resource=Resource( data=[["id", "name", "note"], [4, "malta", "island"]]), field_names=["id", "name"], ), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, ] } assert target.read_rows() == [ { "id": 1, "name": "germany" }, { "id": 2, "name": "france" }, { "id": 3, "name": "spain" }, { "id": 4, "name": "malta" }, ]