Ejemplo n.º 1
0
    def extract_on_complete(self, task_instance) -> [StepMetadata]:
        log.debug(f"extract_on_complete({task_instance})")
        context = self.parse_sql_context()
        source = self._source()

        try:
            bigquery_job_id = self._get_xcom_bigquery_job_id(task_instance)
            context['bigquery.job_id'] = bigquery_job_id
            if bigquery_job_id is None:
                raise Exception("Xcom could not resolve BigQuery job id." +
                                "Job may have failed.")
        except Exception as e:
            log.error(f"Cannot retrieve job details from BigQuery.Client. {e}",
                      exc_info=True)
            context['bigquery.extractor.client_error'] = \
                f"{e}: {traceback.format_exc()}"
            return [StepMetadata(
                name=get_job_name(task=self.operator),
                context=context,
                inputs=None,
                outputs=None
            )]

        inputs = None
        outputs = None
        try:
            client = bigquery.Client()
            try:
                job = client.get_job(job_id=bigquery_job_id)
                job_properties_str = json.dumps(job._properties)
                context['bigquery.job_properties'] = job_properties_str

                inputs = self._get_input_from_bq(job, context, source, client)
                outputs = self._get_output_from_bq(job, source, client)
            finally:
                # Ensure client has close() defined, otherwise ignore.
                # NOTE: close() was introduced in python-bigquery v1.23.0
                if hasattr(client, "close"):
                    client.close()
        except Exception as e:
            log.error(f"Cannot retrieve job details from BigQuery.Client. {e}",
                      exc_info=True)
            context['bigquery.extractor.error'] = \
                f"{e}: {traceback.format_exc()}"

        return [StepMetadata(
            name=get_job_name(task=self.operator),
            inputs=inputs,
            outputs=outputs,
            context=context
        )]
Ejemplo n.º 2
0
 def extract_on_complete(self, task_instance) -> StepMetadata:
     inputs = [
         Dataset.from_table_schema(self.source, DbTableSchema(
             schema_name='schema',
             table_name=DbTableName('extract_on_complete_input1'),
             columns=[DbColumn(
                 name='field1',
                 type='text',
                 description='',
                 ordinal_position=1
             ),
                 DbColumn(
                 name='field2',
                 type='text',
                 description='',
                 ordinal_position=2
             )]
         ))
     ]
     outputs = [
         Dataset.from_table(self.source, "extract_on_complete_output1")
     ]
     return StepMetadata(
         name=get_job_name(task=self.operator),
         inputs=inputs,
         outputs=outputs,
         context={
             "extract_on_complete": "extract_on_complete"
         }
     )
Ejemplo n.º 3
0
 def extract(self) -> [StepMetadata]:
     inputs = [Dataset.from_table(self.source, "extract_input1")]
     outputs = [Dataset.from_table(self.source, "extract_output1")]
     return [
         StepMetadata(name=get_job_name(task=self.operator),
                      inputs=inputs,
                      outputs=outputs,
                      context={"extract": "extract"})
     ]
Ejemplo n.º 4
0
    def extract_on_complete(self, task_instance) -> Optional[StepMetadata]:
        log.debug(f"extract_on_complete({task_instance})")
        context = self.parse_sql_context()

        try:
            bigquery_job_id = self._get_xcom_bigquery_job_id(task_instance)
            if bigquery_job_id is None:
                raise Exception("Xcom could not resolve BigQuery job id." +
                                "Job may have failed.")
        except Exception as e:
            log.error(f"Cannot retrieve job details from BigQuery.Client. {e}",
                      exc_info=True)
            return StepMetadata(
                name=get_job_name(task=self.operator),
                inputs=None,
                outputs=None,
                run_facets={
                    "bigQuery_error": BigQueryErrorRunFacet(
                        clientError=f"{e}: {traceback.format_exc()}",
                        parserError=context.parser_error
                    )
                }
            )

        inputs = None
        output = None
        run_facets = {}
        try:
            client = bigquery.Client()
            try:
                job = client.get_job(job_id=bigquery_job_id)
                props = job._properties

                run_stat_facet, dataset_stat_facet = self._get_output_statistics(props)

                run_facets.update({
                    "bigQuery_statistics": run_stat_facet
                })

                inputs = self._get_input_from_bq(props, client)
                output = self._get_output_from_bq(props, client)
                if output:
                    output.custom_facets.update({
                        "stats": dataset_stat_facet
                    })

            finally:
                # Ensure client has close() defined, otherwise ignore.
                # NOTE: close() was introduced in python-bigquery v1.23.0
                if hasattr(client, "close"):
                    client.close()
        except Exception as e:
            log.error(f"Cannot retrieve job details from BigQuery.Client. {e}",
                      exc_info=True)
            run_facets.update({
                "bigQuery_error": BigQueryErrorRunFacet(
                    clientError=f"{e}: {traceback.format_exc()}",
                    parserError=context.parser_error
                )
            })

        return StepMetadata(
            name=get_job_name(task=self.operator),
            inputs=inputs,
            outputs=[output] if output else [],
            run_facets=run_facets
        )