Exemple #1
0
 def setUp(self):
     db.merge_conn(
         Connection(
             conn_id='airbyte_conn_id_test', conn_type='http', host='http://test-airbyte', port=8001
         )
     )
     self.hook = AirbyteHook(airbyte_conn_id=self.airbyte_conn_id)
Exemple #2
0
class AirbyteTriggerSyncOperator(BaseOperator):
    """
    This operator allows you to submit a job to an Airbyte server to run a integration
    process between your source and destination.

    .. seealso::
        For more information on how to use this operator, take a look at the guide:
        :ref:`howto/operator:AirbyteTriggerSyncOperator`

    :param airbyte_conn_id: Required. The name of the Airflow connection to get connection
        information for Airbyte.
    :param connection_id: Required. The Airbyte ConnectionId UUID between a source and destination.
    :param asynchronous: Optional. Flag to get job_id after submitting the job to the Airbyte API.
        This is useful for submitting long running jobs and
        waiting on them asynchronously using the AirbyteJobSensor.
    :param api_version: Optional. Airbyte API version.
    :param wait_seconds: Optional. Number of seconds between checks. Only used when ``asynchronous`` is False.
    :param timeout: Optional. The amount of time, in seconds, to wait for the request to complete.
        Only used when ``asynchronous`` is False.
    """

    template_fields: Sequence[str] = ('connection_id',)

    def __init__(
        self,
        connection_id: str,
        airbyte_conn_id: str = "airbyte_default",
        asynchronous: Optional[bool] = False,
        api_version: str = "v1",
        wait_seconds: float = 3,
        timeout: Optional[float] = 3600,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self.airbyte_conn_id = airbyte_conn_id
        self.connection_id = connection_id
        self.timeout = timeout
        self.api_version = api_version
        self.wait_seconds = wait_seconds
        self.asynchronous = asynchronous

    def execute(self, context: 'Context') -> None:
        """Create Airbyte Job and wait to finish"""
        self.hook = AirbyteHook(airbyte_conn_id=self.airbyte_conn_id, api_version=self.api_version)
        job_object = self.hook.submit_sync_connection(connection_id=self.connection_id)
        self.job_id = job_object.json()['job']['id']

        self.log.info("Job %s was submitted to Airbyte Server", self.job_id)
        if not self.asynchronous:
            self.log.info('Waiting for job %s to complete', self.job_id)
            self.hook.wait_for_job(job_id=self.job_id, wait_seconds=self.wait_seconds, timeout=self.timeout)
            self.log.info('Job %s completed successfully', self.job_id)

        return self.job_id

    def on_kill(self):
        """Cancel the job if task is cancelled"""
        if self.job_id:
            self.log.info('on_kill: cancel the airbyte Job %s', self.job_id)
            self.hook.cancel_job(self.job_id)
Exemple #3
0
    def execute(self, context) -> None:
        """Create Airbyte Job and wait to finish"""
        hook = AirbyteHook(airbyte_conn_id=self.airbyte_conn_id, api_version=self.api_version)
        job_object = hook.submit_sync_connection(connection_id=self.connection_id)
        job_id = job_object.json()['job']['id']

        self.log.info("Job %s was submitted to Airbyte Server", job_id)
        if not self.asynchronous:
            self.log.info('Waiting for job %s to complete', job_id)
            hook.wait_for_job(job_id=job_id, wait_seconds=self.wait_seconds, timeout=self.timeout)
            self.log.info('Job %s completed successfully', job_id)

        return job_id
Exemple #4
0
    def poke(self, context: dict) -> bool:
        hook = AirbyteHook(airbyte_conn_id=self.airbyte_conn_id, api_version=self.api_version)
        job = hook.get_job(job_id=self.airbyte_job_id)
        status = job.json()['job']['status']

        if status == hook.FAILED:
            raise AirflowException(f"Job failed: \n{job}")
        elif status == hook.CANCELLED:
            raise AirflowException(f"Job was cancelled: \n{job}")
        elif status == hook.SUCCEEDED:
            self.log.info("Job %s completed successfully.", self.airbyte_job_id)
            return True
        elif status == hook.ERROR:
            self.log.info("Job %s attempt has failed.", self.airbyte_job_id)

        self.log.info("Waiting for job %s to complete.", self.airbyte_job_id)
        return False
Exemple #5
0
class TestAirbyteHook(unittest.TestCase):
    """
    Test all functions from Airbyte Hook
    """

    airbyte_conn_id = 'airbyte_conn_id_test'
    connection_id = 'conn_test_sync'
    job_id = 1
    sync_connection_endpoint = 'http://test-airbyte:8001/api/v1/connections/sync'
    get_job_endpoint = 'http://test-airbyte:8001/api/v1/jobs/get'
    _mock_sync_conn_success_response_body = {'job': {'id': 1}}
    _mock_job_status_success_response_body = {'job': {'status': 'succeeded'}}

    def setUp(self):
        db.merge_conn(
            Connection(
                conn_id='airbyte_conn_id_test', conn_type='http', host='http://test-airbyte', port=8001
            )
        )
        self.hook = AirbyteHook(airbyte_conn_id=self.airbyte_conn_id)

    def return_value_get_job(self, status):
        response = mock.Mock()
        response.json.return_value = {'job': {'status': status}}
        return response

    @requests_mock.mock()
    def test_submit_sync_connection(self, m):
        m.post(
            self.sync_connection_endpoint, status_code=200, json=self._mock_sync_conn_success_response_body
        )
        resp = self.hook.submit_sync_connection(connection_id=self.connection_id)
        assert resp.status_code == 200
        assert resp.json() == self._mock_sync_conn_success_response_body

    @requests_mock.mock()
    def test_get_job_status(self, m):
        m.post(self.get_job_endpoint, status_code=200, json=self._mock_job_status_success_response_body)
        resp = self.hook.get_job(job_id=self.job_id)
        assert resp.status_code == 200
        assert resp.json() == self._mock_job_status_success_response_body

    @mock.patch('airflow.providers.airbyte.hooks.airbyte.AirbyteHook.get_job')
    def test_wait_for_job_succeeded(self, mock_get_job):
        mock_get_job.side_effect = [self.return_value_get_job(self.hook.SUCCEEDED)]
        self.hook.wait_for_job(job_id=self.job_id, wait_seconds=0)
        mock_get_job.assert_called_once_with(job_id=self.job_id)

    @mock.patch('airflow.providers.airbyte.hooks.airbyte.AirbyteHook.get_job')
    def test_wait_for_job_error(self, mock_get_job):
        mock_get_job.side_effect = [
            self.return_value_get_job(self.hook.RUNNING),
            self.return_value_get_job(self.hook.ERROR),
        ]
        with pytest.raises(AirflowException, match="Job failed"):
            self.hook.wait_for_job(job_id=self.job_id, wait_seconds=0)

        calls = [mock.call(job_id=self.job_id), mock.call(job_id=self.job_id)]
        assert mock_get_job.has_calls(calls)

    @mock.patch('airflow.providers.airbyte.hooks.airbyte.AirbyteHook.get_job')
    def test_wait_for_job_timeout(self, mock_get_job):
        mock_get_job.side_effect = [
            self.return_value_get_job(self.hook.PENDING),
            self.return_value_get_job(self.hook.RUNNING),
            self.return_value_get_job(self.hook.RUNNING),
        ]
        with pytest.raises(AirflowException, match="Timeout"):
            self.hook.wait_for_job(job_id=self.job_id, wait_seconds=2, timeout=1)

        calls = [mock.call(job_id=self.job_id), mock.call(job_id=self.job_id), mock.call(job_id=self.job_id)]
        assert mock_get_job.has_calls(calls)

    @mock.patch('airflow.providers.airbyte.hooks.airbyte.AirbyteHook.get_job')
    def test_wait_for_job_state_unrecognized(self, mock_get_job):
        mock_get_job.side_effect = [
            self.return_value_get_job(self.hook.RUNNING),
            self.return_value_get_job("UNRECOGNIZED"),
        ]
        with pytest.raises(Exception, match="unexpected state"):
            self.hook.wait_for_job(job_id=self.job_id, wait_seconds=0)

        calls = [mock.call(job_id=self.job_id), mock.call(job_id=self.job_id)]
        assert mock_get_job.has_calls(calls)

    @mock.patch('airflow.providers.airbyte.hooks.airbyte.AirbyteHook.get_job')
    def test_wait_for_job_cancelled(self, mock_get_job):
        mock_get_job.side_effect = [
            self.return_value_get_job(self.hook.RUNNING),
            self.return_value_get_job(self.hook.CANCELLED),
        ]
        with pytest.raises(AirflowException, match="Job was cancelled"):
            self.hook.wait_for_job(job_id=self.job_id, wait_seconds=0)

        calls = [mock.call(job_id=self.job_id), mock.call(job_id=self.job_id)]
        assert mock_get_job.has_calls(calls)