def test_step_row_filter_petl_selectrangeopen(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(predicat="<formula>1 <= id <= 3"), ], ) assert target.schema == source.schema assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83 }, { "id": 2, "name": "france", "population": 66 }, { "id": 3, "name": "spain", "population": 47 }, ]
def test_step_row_filter_petl_selectop(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula="id == 1"), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, ] } assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83 }, ]
def test_step_row_filter_petl_rowlenselect(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_filter(predicat=lambda row: len(row) == 3), ], ) assert target.schema == source.schema assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83 }, { "id": 2, "name": "france", "population": 66 }, { "id": 3, "name": "spain", "population": 47 }, ]
def test_step_row_filter_petl_selectrangeclosed(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula="1 < id < 3"), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, ] } assert target.read_rows() == [ { "id": 2, "name": "france", "population": 66 }, ]
def test_step_row_filter_petl_selectisfalse(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.row_filter(function=lambda row: not bool(row["id"])), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, ] } assert target.read_rows() == []
def test_step_row_filter_petl_selectnone(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.row_filter(formula="id is None"), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, ] } assert target.read_rows() == []
def test_step_row_filter_petl_selectisnone(): source = Resource(path="data/transform.csv") source.infer(only_sample=True) target = transform( source, steps=[ steps.row_filter(predicat="<formula>id is not None"), ], ) assert target.schema == source.schema assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83 }, { "id": 2, "name": "france", "population": 66 }, { "id": 3, "name": "spain", "population": 47 }, ]
def test_step_row_filter_petl_selectisinstance(): source = Resource(path="data/transform.csv") source.infer(only_sample=True) target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(predicat=lambda row: isinstance(row["id"], int)), ], ) assert target.schema == source.schema assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83 }, { "id": 2, "name": "france", "population": 66 }, { "id": 3, "name": "spain", "population": 47 }, ]
def test_step_row_filter_petl_selectisfalse(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_filter(predicat=lambda row: not bool(row["id"])), ], ) assert target.schema == source.schema assert target.read_rows() == []
def test_step_row_filter_petl_selectnone(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_filter(predicat="<formula>id is None"), ], ) assert target.schema == source.schema assert target.read_rows() == []
def test_step_row_filter_petl_selectcontains(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_filter(formula="'er' in name"), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 1, "name": "germany", "population": 83}, ]
def test_step_row_filter_petl_selectgt(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula="id > 2"), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 3, "name": "spain", "population": 47}, ]
def test_step_row_filter_petl_selectrangeclosed(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula="1 < id < 3"), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 2, "name": "france", "population": 66}, ]
def test_step_row_filter_petl_selectnoin(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula="id not in [2, 3]"), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 1, "name": "germany", "population": 83}, ]
def test_step_row_filter_with_function(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(function=lambda row: row["id"] > 1), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 2, "name": "france", "population": 66}, {"id": 3, "name": "spain", "population": 47}, ]
def test_step_row_filter_petl_selectistrue(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.row_filter(function=lambda row: bool(row["id"])), ], ) assert target.schema == source.schema assert target.read_rows() == [ {"id": 1, "name": "germany", "population": 83}, {"id": 2, "name": "france", "population": 66}, {"id": 3, "name": "spain", "population": 47}, ]
def test_step_row_filter_petl_selecteq(): source = Resource(path="data/transform.csv") source.infer(only_sample=True) target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(predicat="<formula>id == 1"), ], ) assert target.schema == source.schema assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83 }, ]
def test_step_row_filter_petl_selectisinstance(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(function=lambda row: isinstance(row["id"], int)), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, ] } assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83 }, { "id": 2, "name": "france", "population": 66 }, { "id": 3, "name": "spain", "population": 47 }, ]
def test_step_row_filter_petl_selectisnone(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.row_filter(formula="id is not None"), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, ] } assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83 }, { "id": 2, "name": "france", "population": 66 }, { "id": 3, "name": "spain", "population": 47 }, ]
def test_step_row_filter(): source = Resource(path="data/transform.csv") source.infer(only_sample=True) target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(predicat="<formula>id > 1"), ], ) assert target.schema == source.schema assert target.read_rows() == [ { "id": 2, "name": "france", "population": 66 }, { "id": 3, "name": "spain", "population": 47 }, ]