Example #1
0
    def execute(self, context: dict) -> None:
        hook = DataFusionHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            api_version=self.api_version,
            impersonation_chain=self.impersonation_chain,
        )
        self.log.info("Starting Data Fusion pipeline: %s", self.pipeline_name)
        instance = hook.get_instance(
            instance_name=self.instance_name,
            location=self.location,
            project_id=self.project_id,
        )
        api_url = instance["apiEndpoint"]
        pipeline_id = hook.start_pipeline(
            pipeline_name=self.pipeline_name,
            instance_url=api_url,
            namespace=self.namespace,
            runtime_args=self.runtime_args,
        )

        self.log.info("Pipeline started")
        if self.success_states:
            hook.wait_for_pipeline_state(
                success_states=self.success_states,
                pipeline_id=pipeline_id,
                pipeline_name=self.pipeline_name,
                namespace=self.namespace,
                instance_url=api_url,
                timeout=self.pipeline_timeout,
            )
Example #2
0
 def execute(self, context: dict) -> dict:
     hook = DataFusionHook(
         gcp_conn_id=self.gcp_conn_id,
         delegate_to=self.delegate_to,
         api_version=self.api_version,
         impersonation_chain=self.impersonation_chain,
     )
     self.log.info("Creating Data Fusion instance: %s", self.instance_name)
     try:
         operation = hook.create_instance(
             instance_name=self.instance_name,
             instance=self.instance,
             location=self.location,
             project_id=self.project_id,
         )
         instance = hook.wait_for_operation(operation)
         self.log.info("Instance %s created successfully", self.instance_name)
     except HttpError as err:
         if err.resp.status not in (409, '409'):
             raise
         self.log.info("Instance %s already exists", self.instance_name)
         instance = hook.get_instance(
             instance_name=self.instance_name, location=self.location, project_id=self.project_id
         )
         # Wait for instance to be ready
         for time_to_wait in exponential_sleep_generator(initial=10, maximum=120):
             if instance['state'] != 'CREATING':
                 break
             sleep(time_to_wait)
             instance = hook.get_instance(
                 instance_name=self.instance_name, location=self.location, project_id=self.project_id
             )
     return instance
Example #3
0
 def execute(self, context: dict) -> dict:
     hook = DataFusionHook(
         gcp_conn_id=self.gcp_conn_id,
         delegate_to=self.delegate_to,
         api_version=self.api_version,
         impersonation_chain=self.impersonation_chain,
     )
     self.log.info("Listing Data Fusion pipelines")
     instance = hook.get_instance(
         instance_name=self.instance_name,
         location=self.location,
         project_id=self.project_id,
     )
     api_url = instance["apiEndpoint"]
     pipelines = hook.list_pipelines(
         instance_url=api_url,
         namespace=self.namespace,
         artifact_version=self.artifact_version,
         artifact_name=self.artifact_name,
     )
     self.log.info("%s", pipelines)
     return pipelines
Example #4
0
    def execute(self, context: 'Context') -> None:
        hook = DataFusionHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            api_version=self.api_version,
            impersonation_chain=self.impersonation_chain,
        )
        self.log.info("Restarting Data Fusion instance: %s",
                      self.instance_name)
        operation = hook.restart_instance(
            instance_name=self.instance_name,
            location=self.location,
            project_id=self.project_id,
        )
        instance = hook.wait_for_operation(operation)
        self.log.info("Instance %s restarted successfully", self.instance_name)

        project_id = self.project_id or DataFusionPipelineLinkHelper.get_project_id(
            instance)
        DataFusionInstanceLink.persist(context=context,
                                       task_instance=self,
                                       project_id=project_id)
Example #5
0
    def poke(self, context: dict) -> bool:
        self.log.info(
            "Waiting for pipeline %s to be in one of the states: %s.",
            self.pipeline_id,
            ", ".join(self.expected_statuses),
        )
        hook = DataFusionHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )

        instance = hook.get_instance(
            instance_name=self.instance_name,
            location=self.location,
            project_id=self.project_id,
        )
        api_url = instance["apiEndpoint"]
        pipeline_status = None
        try:
            pipeline_workflow = hook.get_pipeline_workflow(
                pipeline_name=self.pipeline_name,
                instance_url=api_url,
                pipeline_id=self.pipeline_id,
                namespace=self.namespace,
            )
            pipeline_status = pipeline_workflow["status"]
        except AirflowException:
            pass  # Because the pipeline may not be visible in system yet

        if self.failure_statuses and pipeline_status in self.failure_statuses:
            raise AirflowException(
                f"Pipeline with id '{self.pipeline_id}' state is: {pipeline_status}. "
                f"Terminating sensor...")

        self.log.debug("Current status of the pipeline workflow for %s: %s.",
                       self.pipeline_id, pipeline_status)
        return pipeline_status in self.expected_statuses
Example #6
0
    def execute(self, context: 'Context') -> str:
        hook = DataFusionHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            api_version=self.api_version,
            impersonation_chain=self.impersonation_chain,
        )
        self.log.info("Starting Data Fusion pipeline: %s", self.pipeline_name)
        instance = hook.get_instance(
            instance_name=self.instance_name,
            location=self.location,
            project_id=self.project_id,
        )
        api_url = instance["apiEndpoint"]
        pipeline_id = hook.start_pipeline(
            pipeline_name=self.pipeline_name,
            instance_url=api_url,
            namespace=self.namespace,
            runtime_args=self.runtime_args,
        )
        self.log.info("Pipeline %s submitted successfully.", pipeline_id)

        DataFusionPipelineLink.persist(context=context,
                                       task_instance=self,
                                       uri=instance["serviceEndpoint"])

        if not self.asynchronous:
            self.log.info(
                "Waiting when pipeline %s will be in one of the success states",
                pipeline_id)
            hook.wait_for_pipeline_state(
                success_states=self.success_states,
                pipeline_id=pipeline_id,
                pipeline_name=self.pipeline_name,
                namespace=self.namespace,
                instance_url=api_url,
                timeout=self.pipeline_timeout,
            )
            self.log.info("Job %s discover success state.", pipeline_id)
        return pipeline_id
Example #7
0
def hook():
    with mock.patch(
            "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.__init__",
            new=mock_base_gcp_hook_default_project_id,
    ):
        yield DataFusionHook(gcp_conn_id=GCP_CONN_ID)