Esempio n. 1
0
def airbyte_sync_op(context):
    """
    Executes a Airbyte job sync for a given ``connection_id``, and polls until that sync
    completes, raising an error if it is unsuccessful. It outputs a AirbyteOutput which contains
    the job details for a given ``connection_id``.

    It requires the use of the :py:class:`~dagster_airbyte.airbyte_resource`, which allows it to
    communicate with the Airbyte API.

    Examples:

    .. code-block:: python

        from dagster import job
        from dagster_airbyte import airbyte_resource, airbyte_sync_op

        my_airbyte_resource = airbyte_resource.configured(
            {
                "host": {"env": "AIRBYTE_HOST"},
                "port": {"env": "AIRBYTE_PORT"},
            }
        )

        sync_foobar = airbyte_sync_op.configured({"connection_id": "foobar"}, name="sync_foobar")

        @job(resource_defs={"airbyte": my_airbyte_resource})
        def my_simple_airbyte_job():
            sync_foobar()

        @job(resource_defs={"airbyte": my_airbyte_resource})
        def my_composed_airbyte_job():
            final_foobar_state = sync_foobar(start_after=some_op())
            other_op(final_foobar_state)
    """

    airbyte_output = context.resources.airbyte.sync_and_poll(
        connection_id=context.op_config["connection_id"],
        poll_interval=context.op_config["poll_interval"],
        poll_timeout=context.op_config["poll_timeout"],
    )
    if context.op_config["yield_materializations"]:
        yield from generate_materializations(
            airbyte_output, asset_key_prefix=context.op_config["asset_key_prefix"]
        )
    yield Output(
        airbyte_output,
        metadata={
            **airbyte_output.job_details.get("attempts", [{}])[-1]
            .get("attempt", {})
            .get("totalStats", {})
        },
    )
Esempio n. 2
0
 def _assets(context):
     ab_output = context.resources.airbyte.sync_and_poll(
         connection_id=connection_id)
     for materialization in generate_materializations(
             ab_output, asset_key_prefix):
         table_name = materialization.asset_key.path[-1]
         if table_name in destination_tables:
             yield Output(
                 value=None,
                 output_name=table_name,
                 metadata={
                     entry.label: entry.entry_data
                     for entry in materialization.metadata_entries
                 },
             )
         else:
             yield materialization
Esempio n. 3
0
def test_assets():

    ab_resource = airbyte_resource(
        build_init_resource_context(config={
            "host": "some_host",
            "port": "8000",
        }))
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/get",
        json=get_sample_connection_json(),
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/sync",
        json={"job": {
            "id": 1
        }},
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json=get_sample_job_json(),
        status=200,
    )

    airbyte_output = ab_resource.sync_and_poll("some_connection", 0, None)

    materializations = list(generate_materializations(airbyte_output, []))
    assert len(materializations) == 3

    assert MetadataEntry("bytesEmitted",
                         value=1234) in materializations[0].metadata_entries
    assert MetadataEntry("recordsCommitted",
                         value=4321) in materializations[0].metadata_entries
Esempio n. 4
0
def test_assets():

    ab_resource = airbyte_resource(
        build_init_resource_context(config={
            "host": "some_host",
            "port": "8000",
        }))
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/get",
        json={
            "name": "xyz",
            "syncCatalog": {
                "streams": [
                    {
                        "stream": {
                            "name": "foo",
                            "jsonSchema": {
                                "properties": {
                                    "a": {
                                        "type": "str"
                                    },
                                    "b": {
                                        "type": "int"
                                    }
                                }
                            },
                        },
                        "config": {
                            "selected": True
                        },
                    },
                    {
                        "stream": {
                            "name": "bar",
                            "jsonSchema": {
                                "properties": {
                                    "c": {
                                        "type": "str"
                                    },
                                }
                            },
                        },
                        "config": {
                            "selected": True
                        },
                    },
                    {
                        "stream": {
                            "name": "baz",
                            "jsonSchema": {
                                "properties": {
                                    "d": {
                                        "type": "str"
                                    },
                                }
                            },
                        },
                        "config": {
                            "selected": False
                        },
                    },
                ]
            },
        },
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/connections/sync",
        json={"job": {
            "id": 1
        }},
        status=200,
    )
    responses.add(
        method=responses.POST,
        url=ab_resource.api_base_url + "/jobs/get",
        json={
            "job": {
                "id": 1,
                "status": AirbyteState.SUCCEEDED
            },
            "attempts": [{
                "attempt": {
                    "streamStats": [
                        {
                            "streamName": "foo",
                            "stats": {
                                "bytesEmitted": 1234,
                                "recordsCommitted": 4321,
                            },
                        },
                        {
                            "streamName": "bar",
                            "stats": {
                                "bytesEmitted": 1234,
                                "recordsCommitted": 4321,
                            },
                        },
                    ]
                }
            }],
        },
        status=200,
    )

    airbyte_output = ab_resource.sync_and_poll("some_connection", 0, None)

    materializations = list(generate_materializations(airbyte_output, []))
    assert len(materializations) == 2

    assert MetadataEntry.text(
        "a,b", "columns") in materializations[0].metadata_entries
    assert MetadataEntry.int(
        1234, "bytesEmitted") in materializations[0].metadata_entries
    assert MetadataEntry.int(
        4321, "recordsCommitted") in materializations[0].metadata_entries