Esempio n. 1
0
def test_trigger_connection_fail():
    ab_resource = airbyte_resource(
        build_init_resource_context(config={
            "host": "some_host",
            "port": "8000"
        }))
    with pytest.raises(Failure, match="Exceeded max number of retries"):
        ab_resource.sync_and_poll("some_connection")
Esempio n. 2
0
def test_trigger_connection_fail():
    ab_resource = airbyte_resource(
        build_init_resource_context(
            config={"host": "some_host", "port": "8000", "request_max_retries": 1}
        )
    )
    responses.add(
        method=responses.POST, url=ab_resource.api_base_url + "/connections/sync", status=500
    )
    with pytest.raises(Failure, match="Exceeded max number of retries"):
        ab_resource.sync_and_poll("some_connection")
Esempio n. 3
0
def test_sync_and_poll(state):
    ab_resource = airbyte_resource(
        build_init_resource_context(config={
            "host": "some_host",
            "port": "8000",
        }))
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/get",
        json=get_sample_connection_json(),
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/sync",
        json={"job": {
            "id": 1
        }},
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json={"job": {
            "id": 1,
            "status": state
        }},
        status=200,
    )

    if state == AirbyteState.ERROR:
        with pytest.raises(Failure, match="Job failed"):
            ab_resource.sync_and_poll("some_connection", 0)

    elif state == AirbyteState.CANCELLED:
        with pytest.raises(Failure, match="Job was cancelled"):
            ab_resource.sync_and_poll("some_connection", 0)

    elif state == "unrecognized":
        with pytest.raises(Failure, match="unexpected state"):
            ab_resource.sync_and_poll("some_connection", 0)

    else:
        result = ab_resource.sync_and_poll("some_connection", 0)
        assert result == AirbyteOutput(
            job_details={"job": {
                "id": 1,
                "status": state
            }},
            connection_details=get_sample_connection_json(),
        )
Esempio n. 4
0
def test_sync_and_poll_timeout():
    ab_resource = airbyte_resource(
        build_init_resource_context(config={
            "host": "some_host",
            "port": "8000",
        }))
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/get",
        json={},
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/sync",
        json={"job": {
            "id": 1
        }},
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json={"job": {
            "id": 1,
            "status": "pending"
        }},
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json={"job": {
            "id": 1,
            "status": "running"
        }},
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json={"job": {
            "id": 1,
            "status": "running"
        }},
        status=200,
    )
    poll_wait_second = 2
    timeout = 1
    with pytest.raises(Failure, match="Timeout: Airbyte job"):
        ab_resource.sync_and_poll("some_connection", poll_wait_second, timeout)
Esempio n. 5
0
def test_get_job_status_bad_out_fail():
    ab_resource = airbyte_resource(
        build_init_resource_context(config={
            "host": "some_host",
            "port": "8000",
        }))
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json=None,
        status=204,
    )
    with pytest.raises(check.CheckError):
        ab_resource.get_job_status("some_connection")
Esempio n. 6
0
def test_trigger_connection():
    ab_resource = airbyte_resource(
        build_init_resource_context(config={
            "host": "some_host",
            "port": "8000",
        }))
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/sync",
        json={"job": {
            "id": 1
        }},
        status=200,
    )
    resp = ab_resource.start_sync("some_connection")
    assert resp == {"job": {"id": 1}}
Esempio n. 7
0
def test_assets():

    ab_resource = airbyte_resource(
        build_init_resource_context(config={
            "host": "some_host",
            "port": "8000",
        }))
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/get",
        json=get_sample_connection_json(),
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/sync",
        json={"job": {
            "id": 1
        }},
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json=get_sample_job_json(),
        status=200,
    )

    airbyte_output = ab_resource.sync_and_poll("some_connection", 0, None)

    materializations = list(generate_materializations(airbyte_output, []))
    assert len(materializations) == 3

    assert MetadataEntry("bytesEmitted",
                         value=1234) in materializations[0].metadata_entries
    assert MetadataEntry("recordsCommitted",
                         value=4321) in materializations[0].metadata_entries
Esempio n. 8
0
def test_logging_multi_attempts(capsys):
    def _get_attempt(ls):
        return {"logs": {"logLines": ls}}

    ab_resource = airbyte_resource(
        build_init_resource_context(config={
            "host": "some_host",
            "port": "8000",
        }))
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/get",
        json={},
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/sync",
        json={"job": {
            "id": 1
        }},
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json={"job": {
            "id": 1,
            "status": "pending"
        }},
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json={
            "job": {
                "id": 1,
                "status": "running"
            },
            "attempts": [_get_attempt(ls) for ls in [["log1a"]]],
        },
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json={
            "job": {
                "id": 1,
                "status": "running"
            },
            "attempts": [_get_attempt(ls) for ls in [["log1a", "log1b"]]],
        },
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json={
            "job": {
                "id": 1,
                "status": "running"
            },
            "attempts": [
                _get_attempt(ls)
                for ls in [["log1a", "log1b", "log1c"], ["log2a", "log2b"]]
            ],
        },
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json={
            "job": {
                "id": 1,
                "status": AirbyteState.SUCCEEDED
            },
            "attempts": [
                _get_attempt(ls)
                for ls in [["log1a", "log1b", "log1c"], ["log2a", "log2b"]]
            ],
        },
        status=200,
    )
    ab_resource.sync_and_poll("some_connection", 0, None)
    captured = capsys.readouterr()
    assert captured.out == "\n".join(
        ["log1a", "log1b", "log1c", "log2a", "log2b"]) + "\n"
Esempio n. 9
0
def test_assets(schema_prefix):

    ab_resource = airbyte_resource(
        build_init_resource_context(config={
            "host": "some_host",
            "port": "8000",
        }))
    destination_tables = ["foo", "bar"]
    if schema_prefix:
        destination_tables = [schema_prefix + t for t in destination_tables]
    ab_assets = build_airbyte_assets(
        "12345",
        destination_tables=destination_tables,
        asset_key_prefix=["some", "prefix"],
    )

    assert ab_assets[0].asset_keys == {
        AssetKey(["some", "prefix", t])
        for t in destination_tables
    }
    assert len(ab_assets[0].op.output_defs) == 2

    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/get",
        json=get_sample_connection_json(prefix=schema_prefix),
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/sync",
        json={"job": {
            "id": 1
        }},
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json=get_sample_job_json(schema_prefix=schema_prefix),
        status=200,
    )

    ab_job = build_assets_job(
        "ab_job",
        ab_assets,
        resource_defs={
            "airbyte":
            airbyte_resource.configured({
                "host": "some_host",
                "port": "8000",
            })
        },
    )

    res = ab_job.execute_in_process()

    materializations = [
        event.event_specific_data.materialization
        for event in res.events_for_node("airbyte_sync_12345")
        if event.event_type_value == "ASSET_MATERIALIZATION"
    ]
    assert len(materializations) == 3
    assert {m.asset_key
            for m in materializations} == {
                AssetKey(["some", "prefix", schema_prefix + "foo"]),
                AssetKey(["some", "prefix", schema_prefix + "bar"]),
                AssetKey(["some", "prefix", schema_prefix + "baz"]),
            }
    assert MetadataEntry("bytesEmitted",
                         value=1234) in materializations[0].metadata_entries
    assert MetadataEntry("recordsCommitted",
                         value=4321) in materializations[0].metadata_entries
    assert (MetadataEntry(
        "schema",
        value=TableSchema(columns=[
            TableColumn(name="a", type="str"),
            TableColumn(name="b", type="int"),
        ]),
    ) in materializations[0].metadata_entries)
Esempio n. 10
0
def test_assets():

    ab_resource = airbyte_resource(
        build_init_resource_context(config={
            "host": "some_host",
            "port": "8000",
        }))
    ab_assets = build_airbyte_assets("12345", ["foo", "bar"],
                                     asset_key_prefix=["some", "prefix"])

    assert len(ab_assets[0].op.output_defs) == 2

    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/get",
        json={
            "name": "xyz",
            "syncCatalog": {
                "streams": [
                    {
                        "stream": {
                            "name": "foo",
                            "jsonSchema": {
                                "properties": {
                                    "a": {
                                        "type": "str"
                                    },
                                    "b": {
                                        "type": "int"
                                    }
                                }
                            },
                        },
                        "config": {
                            "selected": True
                        },
                    },
                    {
                        "stream": {
                            "name": "bar",
                            "jsonSchema": {
                                "properties": {
                                    "c": {
                                        "type": "str"
                                    },
                                }
                            },
                        },
                        "config": {
                            "selected": True
                        },
                    },
                    {
                        "stream": {
                            "name": "baz",
                            "jsonSchema": {
                                "properties": {
                                    "d": {
                                        "type": "str"
                                    },
                                }
                            },
                        },
                        "config": {
                            "selected": True
                        },
                    },
                ]
            },
        },
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/sync",
        json={"job": {
            "id": 1
        }},
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json={
            "job": {
                "id": 1,
                "status": AirbyteState.SUCCEEDED
            },
            "attempts": [{
                "attempt": {
                    "streamStats": [
                        {
                            "streamName": "foo",
                            "stats": {
                                "bytesEmitted": 1234,
                                "recordsCommitted": 4321,
                            },
                        },
                        {
                            "streamName": "bar",
                            "stats": {
                                "bytesEmitted": 1234,
                                "recordsCommitted": 4321,
                            },
                        },
                        {
                            "streamName": "baz",
                            "stats": {
                                "bytesEmitted": 1111,
                                "recordsCommitted": 1111,
                            },
                        },
                    ]
                }
            }],
        },
        status=200,
    )

    ab_job = build_assets_job(
        "ab_job",
        ab_assets,
        resource_defs={
            "airbyte":
            airbyte_resource.configured({
                "host": "some_host",
                "port": "8000",
            })
        },
    )

    res = ab_job.execute_in_process()

    materializations = [
        event for event in res.events_for_node("airbyte_sync_12345")
        if event.event_type_value == "ASSET_MATERIALIZATION"
    ]
    assert len(materializations) == 3
    assert (MetadataEntry.text("a,b", "columns") in materializations[0].
            event_specific_data.materialization.metadata_entries)
    assert (MetadataEntry.int(1234, "bytesEmitted") in materializations[0].
            event_specific_data.materialization.metadata_entries)
    assert (MetadataEntry.int(4321, "recordsCommitted") in materializations[0].
            event_specific_data.materialization.metadata_entries)
Esempio n. 11
0
def test_assets():

    ab_resource = airbyte_resource(
        build_init_resource_context(config={
            "host": "some_host",
            "port": "8000",
        }))
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/get",
        json={
            "name": "xyz",
            "syncCatalog": {
                "streams": [
                    {
                        "stream": {
                            "name": "foo",
                            "jsonSchema": {
                                "properties": {
                                    "a": {
                                        "type": "str"
                                    },
                                    "b": {
                                        "type": "int"
                                    }
                                }
                            },
                        },
                        "config": {
                            "selected": True
                        },
                    },
                    {
                        "stream": {
                            "name": "bar",
                            "jsonSchema": {
                                "properties": {
                                    "c": {
                                        "type": "str"
                                    },
                                }
                            },
                        },
                        "config": {
                            "selected": True
                        },
                    },
                    {
                        "stream": {
                            "name": "baz",
                            "jsonSchema": {
                                "properties": {
                                    "d": {
                                        "type": "str"
                                    },
                                }
                            },
                        },
                        "config": {
                            "selected": False
                        },
                    },
                ]
            },
        },
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/sync",
        json={"job": {
            "id": 1
        }},
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json={
            "job": {
                "id": 1,
                "status": AirbyteState.SUCCEEDED
            },
            "attempts": [{
                "attempt": {
                    "streamStats": [
                        {
                            "streamName": "foo",
                            "stats": {
                                "bytesEmitted": 1234,
                                "recordsCommitted": 4321,
                            },
                        },
                        {
                            "streamName": "bar",
                            "stats": {
                                "bytesEmitted": 1234,
                                "recordsCommitted": 4321,
                            },
                        },
                    ]
                }
            }],
        },
        status=200,
    )

    airbyte_output = ab_resource.sync_and_poll("some_connection", 0, None)

    materializations = list(generate_materializations(airbyte_output, []))
    assert len(materializations) == 2

    assert MetadataEntry.text(
        "a,b", "columns") in materializations[0].metadata_entries
    assert MetadataEntry.int(
        1234, "bytesEmitted") in materializations[0].metadata_entries
    assert MetadataEntry.int(
        4321, "recordsCommitted") in materializations[0].metadata_entries