def test_step_table_join_mode_outer():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_join(
                resource=Resource(
                    data=[["id", "note"], [1, "beer"], [4, "rum"]]),
                field_name="id",
                mode="outer",
            ),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
            {
                "name": "note",
                "type": "string"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83,
            "note": "beer"
        },
        {
            "id": 2,
            "name": "france",
            "population": 66,
            "note": None
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47,
            "note": None
        },
        {
            "id": 4,
            "name": None,
            "population": None,
            "note": "rum"
        },
    ]
Beispiel #2
0
def test_step_table_join_mode_left_from_descriptor_issue_996():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_join(
                {
                    "fieldName": "id",
                    "mode": "left"
                },
                resource=Resource(
                    data=[["id", "note"], [1, "beer"], [2, "vine"]]),
            ),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
            {
                "name": "note",
                "type": "string"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83,
            "note": "beer"
        },
        {
            "id": 2,
            "name": "france",
            "population": 66,
            "note": "vine"
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47,
            "note": None
        },
    ]
def test_step_table_join_hash_is_true():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_join(
                resource=Resource(
                    data=[["id", "note"], [1, "beer"], [2, "vine"]]),
                field_name="id",
                use_hash=True,
            ),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
            {
                "name": "note",
                "type": "string"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83,
            "note": "beer"
        },
        {
            "id": 2,
            "name": "france",
            "population": 66,
            "note": "vine"
        },
    ]
Beispiel #4
0
def test_step_table_join_with_name_is_not_first_field():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_join(
                resource=Resource(data=[["name", "note"], ["germany", "beer"],
                                        ["france", "vine"]]),
                field_name="name",
            ),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
            {
                "name": "note",
                "type": "string"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66,
            "note": "vine"
        },
        {
            "id": 1,
            "name": "germany",
            "population": 83,
            "note": "beer"
        },
    ]
Beispiel #5
0
def test_step_table_join_mode_anti():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_join(
                resource=Resource(
                    data=[["id", "note"], [1, "beer"], [4, "rum"]]),
                mode="anti",
            ),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]