Beispiel #1
0
    def _get_output_statistics(self, properties) \
            -> Tuple[BigQueryJobRunFacet, Optional[BigQueryStatisticsDatasetFacet]]:
        stages = get_from_nullable_chain(properties,
                                         ['statistics', 'query', 'queryPlan'])
        json_props = json.dumps(properties)

        if not stages:
            if get_from_nullable_chain(properties, ['statistics', 'query', 'statementType']) \
                    in ['CREATE_VIEW', 'CREATE_TABLE', 'ALTER_TABLE']:
                return BigQueryJobRunFacet(cached=False), None

            # we're probably getting cached results
            if get_from_nullable_chain(properties,
                                       ['statistics', 'query', 'cacheHit']):
                return BigQueryJobRunFacet(cached=True), None
            if get_from_nullable_chain(properties,
                                       ['status', 'state']) != "DONE":
                raise ValueError(
                    "Trying to extract data from running bigquery job")
            raise ValueError(
                f"BigQuery properties did not have required data: queryPlan - {json_props}"
            )

        out_stage = stages[-1]
        out_rows = out_stage.get("recordsWritten", None)
        out_bytes = out_stage.get("shuffleOutputBytes", None)
        billed_bytes = get_from_nullable_chain(
            properties, ['statistics', 'query', 'totalBytesBilled'])
        return BigQueryJobRunFacet(
            cached=False,
            billedBytes=int(billed_bytes) if billed_bytes else None,
            properties=json_props), BigQueryStatisticsDatasetFacet(
                rowCount=int(out_rows),
                size=int(out_bytes)) if out_bytes and out_rows else None
Beispiel #2
0
 def can_accept(cls,
                expectation_result: ExpectationValidationResult) -> bool:
     expectation_type = get_from_nullable_chain(
         expectation_result, ['expectation_config', 'expectation_type'])
     extracted_column = get_from_nullable_chain(
         expectation_result, ['expectation_config', 'kwargs', 'column'])
     return expectation_type and extracted_column and expectation_type == cls.expectation_key
Beispiel #3
0
 def parse_expectation_result(
         expectation_result: ExpectationValidationResult) -> Any:
     return ExpectationsParserResult(
         'nullCount',
         get_from_nullable_chain(expectation_result,
                                 ['result', 'unexpected_count']),
         get_from_nullable_chain(
             expectation_result,
             ['expectation_config', 'kwargs', 'column']))
Beispiel #4
0
 def parse_expectation_result(
         cls, expectation_result: dict) -> ExpectationsParserResult:
     observed_values = get_from_nullable_chain(expectation_result,
                                               ['result', 'observed_value'])
     return ExpectationsParserResult(
         'quantiles',
         cls.quantile_to_map(observed_values) if observed_values else None,
         get_from_nullable_chain(
             expectation_result,
             ['expectation_config', 'kwargs', 'column']))
Beispiel #5
0
 def parse_expectation_result(
     expectation_result: ExpectationValidationResult
 ) -> ExpectationsParserResult:
     return ExpectationsParserResult(
         'min',
         get_from_nullable_chain(expectation_result,
                                 ['result', 'observed_value']),
         get_from_nullable_chain(
             expectation_result,
             ['expectation_config', 'kwargs', 'column']))
Beispiel #6
0
 def parse_expectation_result(
     expectation_result: ExpectationValidationResult
 ) -> ExpectationsParserResult:
     count = get_from_nullable_chain(expectation_result,
                                     ['result', 'element_count'])
     return ExpectationsParserResult(
         'count', count,
         get_from_nullable_chain(
             expectation_result,
             ['expectation_config', 'kwargs', 'column']))
Beispiel #7
0
    def _get_output_from_bq(self, properties) -> Optional[Dataset]:
        bq_output_table = get_from_nullable_chain(
            properties, ['configuration', 'query', 'destinationTable'])
        if not bq_output_table:
            return None

        output_table_name = self._bq_table_name(bq_output_table)
        source = self._source()

        table_schema = self._get_table_safely(output_table_name)
        if table_schema:
            return Dataset.from_table_schema(
                source=source,
                table_schema=table_schema,
            )
        else:
            self.logger.warning("Could not resolve output table from bq")
            return Dataset.from_table(source, output_table_name)
Beispiel #8
0
    def _get_table(self, table: str) -> Optional[DbTableSchema]:
        bq_table = self.client.get_table(table)
        if not bq_table._properties:
            return
        table = bq_table._properties

        fields = get_from_nullable_chain(table, ['schema', 'fields'])
        if not fields:
            return

        columns = [
            DbColumn(name=fields[i].get('name'),
                     type=fields[i].get('type'),
                     description=fields[i].get('description'),
                     ordinal_position=i) for i in range(len(fields))
        ]

        return DbTableSchema(
            schema_name=table.get('tableReference').get('projectId') + '.' +
            table.get('tableReference').get('datasetId'),
            table_name=DbTableName(table.get('tableReference').get('tableId')),
            columns=columns)
Beispiel #9
0
    def _get_input_from_bq(self, properties):
        bq_input_tables = get_from_nullable_chain(
            properties, ['statistics', 'query', 'referencedTables'])
        if not bq_input_tables:
            return []

        input_table_names = [
            self._bq_table_name(bq_t) for bq_t in bq_input_tables
        ]
        sources = [self._source() for bq_t in bq_input_tables]
        try:
            return [
                Dataset.from_table_schema(source=source,
                                          table_schema=table_schema)
                for table_schema, source in zip(
                    self._get_table_schemas(input_table_names), sources)
            ]
        except Exception as e:
            self.logger.warning(f'Could not extract schema from bigquery. {e}')
            return [
                Dataset.from_table(source, table)
                for table, source in zip(input_table_names, sources)
            ]
Beispiel #10
0
    def test_nullable_chain_works(self):
        x = {"first": {"second": {"third": 42}}}
        assert get_from_nullable_chain(x, ['first', 'second', 'third']) == 42

        x = {"first": {"second": {"third": 42, "fourth": {"empty": 56}}}}
        assert get_from_nullable_chain(x, ['first', 'second', 'third']) == 42
Beispiel #11
0
 def test_nullable_chain_fails(self):
     x = {"first": {"second": {}}}
     assert get_from_nullable_chain(x, ['first', 'second', 'third']) is None
Beispiel #12
0
 def parse_expectation_result(cls, expectation_result: ExpectationValidationResult) \
         -> ExpectationsParserResult:
     return ExpectationsParserResult(
         cls.facet_key,
         get_from_nullable_chain(expectation_result,
                                 ['result', 'observed_value']))