def test_step_row_subset_unique_with_name(): source = Resource(path="data/transform.csv") source.infer(only_sample=True) target = transform( source, steps=[ steps.field_update(name="id", value=1), steps.row_subset(subset="unique", field_name="id"), ], ) assert target.schema == source.schema assert target.read_rows() == []
def test_transform_resource(): source = Resource(path="data/transform.csv") source.infer() target = source.transform(steps=[ steps.table_normalize(), steps.table_melt(field_name="id"), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "variable" }, { "name": "value" }, ] } assert target.read_rows() == [ { "id": 1, "variable": "name", "value": "germany" }, { "id": 1, "variable": "population", "value": 83 }, { "id": 2, "variable": "name", "value": "france" }, { "id": 2, "variable": "population", "value": 66 }, { "id": 3, "variable": "name", "value": "spain" }, { "id": 3, "variable": "population", "value": 47 }, ]
def test_step_row_search_with_name(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_search(regex=r"^f.*", field_name="name"), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 2, "name": "france", "population": 66}, ]
def test_step_row_filter_petl_selectcontains(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_filter(formula="'er' in name"), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 1, "name": "germany", "population": 83}, ]
def test_sql_parser_write_string_pk_issue_777_mysql(mysql_url): source = Resource("data/table.csv") source.infer() source.schema.primary_key = ["name"] source.schema.get_field("name").constraints["maxLength"] = 100 target = source.write(mysql_url, dialect=SqlDialect(table="name")) with target: assert target.schema.primary_key == ["name"] assert target.header == ["id", "name"] assert target.read_rows() == [ {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ]
def test_step_row_slice_with_start_and_step(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_slice(start=1, stop=3, step=2), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 2, "name": "france", "population": 66}, ]
def test_step_row_search_with_negate(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_search(regex=r"^f.*", negate=True), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 1, "name": "germany", "population": 83}, {"id": 3, "name": "spain", "population": 47}, ]
def test_step_row_slice_with_head(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_slice(head=2), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 1, "name": "germany", "population": 83}, {"id": 2, "name": "france", "population": 66}, ]
def test_step_row_filter_petl_selectnoin(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula="id not in [2, 3]"), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 1, "name": "germany", "population": 83}, ]
def test_step_row_filter_petl_selectrangeclosed(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula="1 < id < 3"), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 2, "name": "france", "population": 66}, ]
def test_step_row_filter_petl_selectgt(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula="id > 2"), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 3, "name": "spain", "population": 47}, ]
def test_step_row_slice_with_tail(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_slice(tail=2), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 2, "name": "france", "population": 66}, {"id": 3, "name": "spain", "population": 47}, ]
def test_step_row_subset_distinct_with_duplicates(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.field_update(name="id", value=1), steps.row_subset(subset="distinct", field_name="id"), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 1, "name": "germany", "population": 83}, ]
def test_step_row_filter_petl_rowlenselect(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_filter(function=lambda row: len(row) == 3), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 1, "name": "germany", "population": 83}, {"id": 2, "name": "france", "population": 66}, {"id": 3, "name": "spain", "population": 47}, ]
def test_step_row_filter_with_function(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(function=lambda row: row["id"] > 1), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 2, "name": "france", "population": 66}, {"id": 3, "name": "spain", "population": 47}, ]
def test_step_row_ungroup_last(): source = Resource(path="data/transform-groups.csv") source.infer() target = transform( source, steps=[ steps.row_ungroup(group_name="name", selection="last"), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 4, "name": "france", "population": 54, "year": 1920}, {"id": 2, "name": "germany", "population": 77, "year": 1920}, {"id": 6, "name": "spain", "population": 33, "year": 1920}, ]
def test_step_row_subset_unique(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_subset(subset="unique"), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 1, "name": "germany", "population": 83}, {"id": 2, "name": "france", "population": 66}, {"id": 3, "name": "spain", "population": 47}, ]
def test_step_row_filter_petl_selectisnone(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_filter(formula="id is not None"), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 1, "name": "germany", "population": 83}, {"id": 2, "name": "france", "population": 66}, {"id": 3, "name": "spain", "population": 47}, ]
def test_step_row_sort_with_reverse(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_sort(field_names=["id"], reverse=True), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 3, "name": "spain", "population": 47}, {"id": 2, "name": "france", "population": 66}, {"id": 1, "name": "germany", "population": 83}, ]
def test_step_table_validate(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.cell_set(field_name="population", value="bad"), steps.table_validate(), ], ) assert target.schema == source.schema with pytest.raises(FrictionlessException) as excinfo: target.read_rows() error = excinfo.value.error assert error.code == "step-error" assert error.note.count('type is "integer/default"')
def test_step_row_ungroup_max(): source = Resource(path="data/transform-groups.csv") source.infer() target = transform( source, steps=[ steps.row_ungroup( group_name="name", selection="max", value_name="population" ), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 3, "name": "france", "population": 66, "year": 2020}, {"id": 1, "name": "germany", "population": 83, "year": 2020}, {"id": 5, "name": "spain", "population": 47, "year": 2020}, ]
def test_step_row_filter_petl_selecteq(): source = Resource(path="data/transform.csv") source.infer(only_sample=True) target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(predicat="<formula>id == 1"), ], ) assert target.schema == source.schema assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83 }, ]
def test_step_row_split(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_split(field_name="name", pattern="a"), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 1, "name": "germ", "population": 83}, {"id": 1, "name": "ny", "population": 83}, {"id": 2, "name": "fr", "population": 66}, {"id": 2, "name": "nce", "population": 66}, {"id": 3, "name": "sp", "population": 47}, {"id": 3, "name": "in", "population": 47}, ]
def test_resource_infer_source_non_tabular(): resource = Resource(path="data/text.txt") resource.infer(stats=True) assert resource.metadata_valid if IS_UNIX: assert resource == { "name": "text", "path": "data/text.txt", "profile": "data-resource", "scheme": "file", "format": "txt", "hashing": "md5", "encoding": "utf-8", "stats": { "hash": "e1cbb0c3879af8347246f12c559a86b5", "bytes": 5, }, }
def test_resource_infer_source_non_tabular(): resource = Resource(path="data/text.txt") resource.infer() print(resource.metadata_errors) assert resource.metadata_valid assert resource == { "name": "text", "path": "data/text.txt", "hash": "e1cbb0c3879af8347246f12c559a86b5", "bytes": 5, "rows": 0, "profile": "data-resource", "scheme": "file", "format": "txt", "hashing": "md5", "encoding": "utf-8", "compression": "no", "compressionPath": "", }
def test_resource_infer(): resource = Resource(path="data/table.csv") resource.infer() assert resource.metadata_valid assert resource == { "path": "data/table.csv", "profile": "tabular-data-resource", "name": "table", "scheme": "file", "format": "csv", "hashing": "md5", "encoding": "utf-8", "compression": "no", "compressionPath": "", "control": { "newline": "" }, "dialect": {}, "query": {}, "schema": { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, ] }, "stats": { "hash": "6c2c61dd9b0e9c6876139a449ed87933", "bytes": 30, "fields": 2, "rows": 2, }, }
def test_resource_source_multipart_infer(): descriptor = {"path": ["data/chunk1.csv", "data/chunk2.csv"]} resource = Resource(descriptor) resource.infer() assert resource == { "path": ["data/chunk1.csv", "data/chunk2.csv"], "profile": "tabular-data-resource", "name": "memory", "scheme": "stream", "format": "csv", "hashing": "md5", "encoding": "utf-8", "compression": "no", "compressionPath": "", "control": { "newline": "" }, "dialect": {}, "query": {}, "schema": { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, ] }, "stats": { "hash": "6c2c61dd9b0e9c6876139a449ed87933", "bytes": 30, "fields": 2, "rows": 2, }, }
def test_step_table_diff(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.table_normalize(), steps.table_diff(resource=Resource(data=[ ["id", "name", "population"], [1, "germany", 83], [2, "france", 50], [3, "spain", 47], ])), ], ) assert target.schema == source.schema assert target.read_rows() == [ { "id": 2, "name": "france", "population": 66 }, ]
def test_step_row_filter_petl_selectne(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(predicat="<formula>id != 1"), ], ) assert target.schema == source.schema assert target.read_rows() == [ { "id": 2, "name": "france", "population": 66 }, { "id": 3, "name": "spain", "population": 47 }, ]
def test_step_row_filter_petl_selectrangeopenleft(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(predicat="<formula>1 <= id < 3"), ], ) assert target.schema == source.schema assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83 }, { "id": 2, "name": "france", "population": 66 }, ]