def execute(self, context: dict) -> None: hook = DataFusionHook( gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, api_version=self.api_version, impersonation_chain=self.impersonation_chain, ) self.log.info("Starting Data Fusion pipeline: %s", self.pipeline_name) instance = hook.get_instance( instance_name=self.instance_name, location=self.location, project_id=self.project_id, ) api_url = instance["apiEndpoint"] pipeline_id = hook.start_pipeline( pipeline_name=self.pipeline_name, instance_url=api_url, namespace=self.namespace, runtime_args=self.runtime_args, ) self.log.info("Pipeline started") if self.success_states: hook.wait_for_pipeline_state( success_states=self.success_states, pipeline_id=pipeline_id, pipeline_name=self.pipeline_name, namespace=self.namespace, instance_url=api_url, timeout=self.pipeline_timeout, )
def execute(self, context: dict) -> dict: hook = DataFusionHook( gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, api_version=self.api_version, impersonation_chain=self.impersonation_chain, ) self.log.info("Creating Data Fusion instance: %s", self.instance_name) try: operation = hook.create_instance( instance_name=self.instance_name, instance=self.instance, location=self.location, project_id=self.project_id, ) instance = hook.wait_for_operation(operation) self.log.info("Instance %s created successfully", self.instance_name) except HttpError as err: if err.resp.status not in (409, '409'): raise self.log.info("Instance %s already exists", self.instance_name) instance = hook.get_instance( instance_name=self.instance_name, location=self.location, project_id=self.project_id ) # Wait for instance to be ready for time_to_wait in exponential_sleep_generator(initial=10, maximum=120): if instance['state'] != 'CREATING': break sleep(time_to_wait) instance = hook.get_instance( instance_name=self.instance_name, location=self.location, project_id=self.project_id ) return instance
def execute(self, context: dict) -> dict: hook = DataFusionHook( gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, api_version=self.api_version, impersonation_chain=self.impersonation_chain, ) self.log.info("Listing Data Fusion pipelines") instance = hook.get_instance( instance_name=self.instance_name, location=self.location, project_id=self.project_id, ) api_url = instance["apiEndpoint"] pipelines = hook.list_pipelines( instance_url=api_url, namespace=self.namespace, artifact_version=self.artifact_version, artifact_name=self.artifact_name, ) self.log.info("%s", pipelines) return pipelines
def execute(self, context: 'Context') -> None: hook = DataFusionHook( gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, api_version=self.api_version, impersonation_chain=self.impersonation_chain, ) self.log.info("Restarting Data Fusion instance: %s", self.instance_name) operation = hook.restart_instance( instance_name=self.instance_name, location=self.location, project_id=self.project_id, ) instance = hook.wait_for_operation(operation) self.log.info("Instance %s restarted successfully", self.instance_name) project_id = self.project_id or DataFusionPipelineLinkHelper.get_project_id( instance) DataFusionInstanceLink.persist(context=context, task_instance=self, project_id=project_id)
def poke(self, context: dict) -> bool: self.log.info( "Waiting for pipeline %s to be in one of the states: %s.", self.pipeline_id, ", ".join(self.expected_statuses), ) hook = DataFusionHook( gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, impersonation_chain=self.impersonation_chain, ) instance = hook.get_instance( instance_name=self.instance_name, location=self.location, project_id=self.project_id, ) api_url = instance["apiEndpoint"] pipeline_status = None try: pipeline_workflow = hook.get_pipeline_workflow( pipeline_name=self.pipeline_name, instance_url=api_url, pipeline_id=self.pipeline_id, namespace=self.namespace, ) pipeline_status = pipeline_workflow["status"] except AirflowException: pass # Because the pipeline may not be visible in system yet if self.failure_statuses and pipeline_status in self.failure_statuses: raise AirflowException( f"Pipeline with id '{self.pipeline_id}' state is: {pipeline_status}. " f"Terminating sensor...") self.log.debug("Current status of the pipeline workflow for %s: %s.", self.pipeline_id, pipeline_status) return pipeline_status in self.expected_statuses
def execute(self, context: 'Context') -> str: hook = DataFusionHook( gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, api_version=self.api_version, impersonation_chain=self.impersonation_chain, ) self.log.info("Starting Data Fusion pipeline: %s", self.pipeline_name) instance = hook.get_instance( instance_name=self.instance_name, location=self.location, project_id=self.project_id, ) api_url = instance["apiEndpoint"] pipeline_id = hook.start_pipeline( pipeline_name=self.pipeline_name, instance_url=api_url, namespace=self.namespace, runtime_args=self.runtime_args, ) self.log.info("Pipeline %s submitted successfully.", pipeline_id) DataFusionPipelineLink.persist(context=context, task_instance=self, uri=instance["serviceEndpoint"]) if not self.asynchronous: self.log.info( "Waiting when pipeline %s will be in one of the success states", pipeline_id) hook.wait_for_pipeline_state( success_states=self.success_states, pipeline_id=pipeline_id, pipeline_name=self.pipeline_name, namespace=self.namespace, instance_url=api_url, timeout=self.pipeline_timeout, ) self.log.info("Job %s discover success state.", pipeline_id) return pipeline_id
def hook(): with mock.patch( "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.__init__", new=mock_base_gcp_hook_default_project_id, ): yield DataFusionHook(gcp_conn_id=GCP_CONN_ID)