def setUp(self): db.merge_conn( Connection( conn_id='airbyte_conn_id_test', conn_type='http', host='http://test-airbyte', port=8001 ) ) self.hook = AirbyteHook(airbyte_conn_id=self.airbyte_conn_id)
class AirbyteTriggerSyncOperator(BaseOperator): """ This operator allows you to submit a job to an Airbyte server to run a integration process between your source and destination. .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:AirbyteTriggerSyncOperator` :param airbyte_conn_id: Required. The name of the Airflow connection to get connection information for Airbyte. :param connection_id: Required. The Airbyte ConnectionId UUID between a source and destination. :param asynchronous: Optional. Flag to get job_id after submitting the job to the Airbyte API. This is useful for submitting long running jobs and waiting on them asynchronously using the AirbyteJobSensor. :param api_version: Optional. Airbyte API version. :param wait_seconds: Optional. Number of seconds between checks. Only used when ``asynchronous`` is False. :param timeout: Optional. The amount of time, in seconds, to wait for the request to complete. Only used when ``asynchronous`` is False. """ template_fields: Sequence[str] = ('connection_id',) def __init__( self, connection_id: str, airbyte_conn_id: str = "airbyte_default", asynchronous: Optional[bool] = False, api_version: str = "v1", wait_seconds: float = 3, timeout: Optional[float] = 3600, **kwargs, ) -> None: super().__init__(**kwargs) self.airbyte_conn_id = airbyte_conn_id self.connection_id = connection_id self.timeout = timeout self.api_version = api_version self.wait_seconds = wait_seconds self.asynchronous = asynchronous def execute(self, context: 'Context') -> None: """Create Airbyte Job and wait to finish""" self.hook = AirbyteHook(airbyte_conn_id=self.airbyte_conn_id, api_version=self.api_version) job_object = self.hook.submit_sync_connection(connection_id=self.connection_id) self.job_id = job_object.json()['job']['id'] self.log.info("Job %s was submitted to Airbyte Server", self.job_id) if not self.asynchronous: self.log.info('Waiting for job %s to complete', self.job_id) self.hook.wait_for_job(job_id=self.job_id, wait_seconds=self.wait_seconds, timeout=self.timeout) self.log.info('Job %s completed successfully', self.job_id) return self.job_id def on_kill(self): """Cancel the job if task is cancelled""" if self.job_id: self.log.info('on_kill: cancel the airbyte Job %s', self.job_id) self.hook.cancel_job(self.job_id)
def execute(self, context) -> None: """Create Airbyte Job and wait to finish""" hook = AirbyteHook(airbyte_conn_id=self.airbyte_conn_id, api_version=self.api_version) job_object = hook.submit_sync_connection(connection_id=self.connection_id) job_id = job_object.json()['job']['id'] self.log.info("Job %s was submitted to Airbyte Server", job_id) if not self.asynchronous: self.log.info('Waiting for job %s to complete', job_id) hook.wait_for_job(job_id=job_id, wait_seconds=self.wait_seconds, timeout=self.timeout) self.log.info('Job %s completed successfully', job_id) return job_id
def poke(self, context: dict) -> bool: hook = AirbyteHook(airbyte_conn_id=self.airbyte_conn_id, api_version=self.api_version) job = hook.get_job(job_id=self.airbyte_job_id) status = job.json()['job']['status'] if status == hook.FAILED: raise AirflowException(f"Job failed: \n{job}") elif status == hook.CANCELLED: raise AirflowException(f"Job was cancelled: \n{job}") elif status == hook.SUCCEEDED: self.log.info("Job %s completed successfully.", self.airbyte_job_id) return True elif status == hook.ERROR: self.log.info("Job %s attempt has failed.", self.airbyte_job_id) self.log.info("Waiting for job %s to complete.", self.airbyte_job_id) return False
class TestAirbyteHook(unittest.TestCase): """ Test all functions from Airbyte Hook """ airbyte_conn_id = 'airbyte_conn_id_test' connection_id = 'conn_test_sync' job_id = 1 sync_connection_endpoint = 'http://test-airbyte:8001/api/v1/connections/sync' get_job_endpoint = 'http://test-airbyte:8001/api/v1/jobs/get' _mock_sync_conn_success_response_body = {'job': {'id': 1}} _mock_job_status_success_response_body = {'job': {'status': 'succeeded'}} def setUp(self): db.merge_conn( Connection( conn_id='airbyte_conn_id_test', conn_type='http', host='http://test-airbyte', port=8001 ) ) self.hook = AirbyteHook(airbyte_conn_id=self.airbyte_conn_id) def return_value_get_job(self, status): response = mock.Mock() response.json.return_value = {'job': {'status': status}} return response @requests_mock.mock() def test_submit_sync_connection(self, m): m.post( self.sync_connection_endpoint, status_code=200, json=self._mock_sync_conn_success_response_body ) resp = self.hook.submit_sync_connection(connection_id=self.connection_id) assert resp.status_code == 200 assert resp.json() == self._mock_sync_conn_success_response_body @requests_mock.mock() def test_get_job_status(self, m): m.post(self.get_job_endpoint, status_code=200, json=self._mock_job_status_success_response_body) resp = self.hook.get_job(job_id=self.job_id) assert resp.status_code == 200 assert resp.json() == self._mock_job_status_success_response_body @mock.patch('airflow.providers.airbyte.hooks.airbyte.AirbyteHook.get_job') def test_wait_for_job_succeeded(self, mock_get_job): mock_get_job.side_effect = [self.return_value_get_job(self.hook.SUCCEEDED)] self.hook.wait_for_job(job_id=self.job_id, wait_seconds=0) mock_get_job.assert_called_once_with(job_id=self.job_id) @mock.patch('airflow.providers.airbyte.hooks.airbyte.AirbyteHook.get_job') def test_wait_for_job_error(self, mock_get_job): mock_get_job.side_effect = [ self.return_value_get_job(self.hook.RUNNING), self.return_value_get_job(self.hook.ERROR), ] with pytest.raises(AirflowException, match="Job failed"): self.hook.wait_for_job(job_id=self.job_id, wait_seconds=0) calls = [mock.call(job_id=self.job_id), mock.call(job_id=self.job_id)] assert mock_get_job.has_calls(calls) @mock.patch('airflow.providers.airbyte.hooks.airbyte.AirbyteHook.get_job') def test_wait_for_job_timeout(self, mock_get_job): mock_get_job.side_effect = [ self.return_value_get_job(self.hook.PENDING), self.return_value_get_job(self.hook.RUNNING), self.return_value_get_job(self.hook.RUNNING), ] with pytest.raises(AirflowException, match="Timeout"): self.hook.wait_for_job(job_id=self.job_id, wait_seconds=2, timeout=1) calls = [mock.call(job_id=self.job_id), mock.call(job_id=self.job_id), mock.call(job_id=self.job_id)] assert mock_get_job.has_calls(calls) @mock.patch('airflow.providers.airbyte.hooks.airbyte.AirbyteHook.get_job') def test_wait_for_job_state_unrecognized(self, mock_get_job): mock_get_job.side_effect = [ self.return_value_get_job(self.hook.RUNNING), self.return_value_get_job("UNRECOGNIZED"), ] with pytest.raises(Exception, match="unexpected state"): self.hook.wait_for_job(job_id=self.job_id, wait_seconds=0) calls = [mock.call(job_id=self.job_id), mock.call(job_id=self.job_id)] assert mock_get_job.has_calls(calls) @mock.patch('airflow.providers.airbyte.hooks.airbyte.AirbyteHook.get_job') def test_wait_for_job_cancelled(self, mock_get_job): mock_get_job.side_effect = [ self.return_value_get_job(self.hook.RUNNING), self.return_value_get_job(self.hook.CANCELLED), ] with pytest.raises(AirflowException, match="Job was cancelled"): self.hook.wait_for_job(job_id=self.job_id, wait_seconds=0) calls = [mock.call(job_id=self.job_id), mock.call(job_id=self.job_id)] assert mock_get_job.has_calls(calls)