Example #1
0
def test_step_row_filter_petl_selectrangeopen():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(predicat="<formula>1 <= id <= 3"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Example #2
0
def test_step_row_filter_petl_selectop():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(formula="id == 1"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
    ]
Example #3
0
def test_step_row_filter_petl_rowlenselect():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_filter(predicat=lambda row: len(row) == 3),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Example #4
0
def test_step_row_filter_petl_selectrangeclosed():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(formula="1 < id < 3"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]
Example #5
0
def test_step_row_filter_petl_selectisfalse():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_filter(function=lambda row: not bool(row["id"])),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == []
Example #6
0
def test_step_row_filter_petl_selectnone():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_filter(formula="id is None"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == []
Example #7
0
def test_step_row_filter_petl_selectisnone():
    source = Resource(path="data/transform.csv")
    source.infer(only_sample=True)
    target = transform(
        source,
        steps=[
            steps.row_filter(predicat="<formula>id is not None"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Example #8
0
def test_step_row_filter_petl_selectisinstance():
    source = Resource(path="data/transform.csv")
    source.infer(only_sample=True)
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(predicat=lambda row: isinstance(row["id"], int)),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Example #9
0
def test_step_row_filter_petl_selectisfalse():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_filter(predicat=lambda row: not bool(row["id"])),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == []
Example #10
0
def test_step_row_filter_petl_selectnone():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_filter(predicat="<formula>id is None"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == []
Example #11
0
def test_step_row_filter_petl_selectcontains():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_filter(formula="'er' in name"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83},
    ]
Example #12
0
def test_step_row_filter_petl_selectgt():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(formula="id > 2"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 3, "name": "spain", "population": 47},
    ]
Example #13
0
def test_step_row_filter_petl_selectrangeclosed():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(formula="1 < id < 3"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 2, "name": "france", "population": 66},
    ]
Example #14
0
def test_step_row_filter_petl_selectnoin():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(formula="id not in [2, 3]"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83},
    ]
Example #15
0
def test_step_row_filter_with_function():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(function=lambda row: row["id"] > 1),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 2, "name": "france", "population": 66},
        {"id": 3, "name": "spain", "population": 47},
    ]
Example #16
0
def test_step_row_filter_petl_selectistrue():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_filter(function=lambda row: bool(row["id"])),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83},
        {"id": 2, "name": "france", "population": 66},
        {"id": 3, "name": "spain", "population": 47},
    ]
Example #17
0
def test_step_row_filter_petl_selecteq():
    source = Resource(path="data/transform.csv")
    source.infer(only_sample=True)
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(predicat="<formula>id == 1"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
    ]
Example #18
0
def test_step_row_filter_petl_selectisinstance():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(function=lambda row: isinstance(row["id"], int)),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Example #19
0
def test_step_row_filter_petl_selectisnone():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_filter(formula="id is not None"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Example #20
0
def test_step_row_filter():
    source = Resource(path="data/transform.csv")
    source.infer(only_sample=True)
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(predicat="<formula>id > 1"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]