def _get_output_statistics(self, properties) \ -> Tuple[BigQueryJobRunFacet, Optional[BigQueryStatisticsDatasetFacet]]: stages = get_from_nullable_chain(properties, ['statistics', 'query', 'queryPlan']) json_props = json.dumps(properties) if not stages: if get_from_nullable_chain(properties, ['statistics', 'query', 'statementType']) \ in ['CREATE_VIEW', 'CREATE_TABLE', 'ALTER_TABLE']: return BigQueryJobRunFacet(cached=False), None # we're probably getting cached results if get_from_nullable_chain(properties, ['statistics', 'query', 'cacheHit']): return BigQueryJobRunFacet(cached=True), None if get_from_nullable_chain(properties, ['status', 'state']) != "DONE": raise ValueError( "Trying to extract data from running bigquery job") raise ValueError( f"BigQuery properties did not have required data: queryPlan - {json_props}" ) out_stage = stages[-1] out_rows = out_stage.get("recordsWritten", None) out_bytes = out_stage.get("shuffleOutputBytes", None) billed_bytes = get_from_nullable_chain( properties, ['statistics', 'query', 'totalBytesBilled']) return BigQueryJobRunFacet( cached=False, billedBytes=int(billed_bytes) if billed_bytes else None, properties=json_props), BigQueryStatisticsDatasetFacet( rowCount=int(out_rows), size=int(out_bytes)) if out_bytes and out_rows else None
def can_accept(cls, expectation_result: ExpectationValidationResult) -> bool: expectation_type = get_from_nullable_chain( expectation_result, ['expectation_config', 'expectation_type']) extracted_column = get_from_nullable_chain( expectation_result, ['expectation_config', 'kwargs', 'column']) return expectation_type and extracted_column and expectation_type == cls.expectation_key
def parse_expectation_result( expectation_result: ExpectationValidationResult) -> Any: return ExpectationsParserResult( 'nullCount', get_from_nullable_chain(expectation_result, ['result', 'unexpected_count']), get_from_nullable_chain( expectation_result, ['expectation_config', 'kwargs', 'column']))
def parse_expectation_result( cls, expectation_result: dict) -> ExpectationsParserResult: observed_values = get_from_nullable_chain(expectation_result, ['result', 'observed_value']) return ExpectationsParserResult( 'quantiles', cls.quantile_to_map(observed_values) if observed_values else None, get_from_nullable_chain( expectation_result, ['expectation_config', 'kwargs', 'column']))
def parse_expectation_result( expectation_result: ExpectationValidationResult ) -> ExpectationsParserResult: return ExpectationsParserResult( 'min', get_from_nullable_chain(expectation_result, ['result', 'observed_value']), get_from_nullable_chain( expectation_result, ['expectation_config', 'kwargs', 'column']))
def parse_expectation_result( expectation_result: ExpectationValidationResult ) -> ExpectationsParserResult: count = get_from_nullable_chain(expectation_result, ['result', 'element_count']) return ExpectationsParserResult( 'count', count, get_from_nullable_chain( expectation_result, ['expectation_config', 'kwargs', 'column']))
def _get_output_from_bq(self, properties) -> Optional[Dataset]: bq_output_table = get_from_nullable_chain( properties, ['configuration', 'query', 'destinationTable']) if not bq_output_table: return None output_table_name = self._bq_table_name(bq_output_table) source = self._source() table_schema = self._get_table_safely(output_table_name) if table_schema: return Dataset.from_table_schema( source=source, table_schema=table_schema, ) else: self.logger.warning("Could not resolve output table from bq") return Dataset.from_table(source, output_table_name)
def _get_table(self, table: str) -> Optional[DbTableSchema]: bq_table = self.client.get_table(table) if not bq_table._properties: return table = bq_table._properties fields = get_from_nullable_chain(table, ['schema', 'fields']) if not fields: return columns = [ DbColumn(name=fields[i].get('name'), type=fields[i].get('type'), description=fields[i].get('description'), ordinal_position=i) for i in range(len(fields)) ] return DbTableSchema( schema_name=table.get('tableReference').get('projectId') + '.' + table.get('tableReference').get('datasetId'), table_name=DbTableName(table.get('tableReference').get('tableId')), columns=columns)
def _get_input_from_bq(self, properties): bq_input_tables = get_from_nullable_chain( properties, ['statistics', 'query', 'referencedTables']) if not bq_input_tables: return [] input_table_names = [ self._bq_table_name(bq_t) for bq_t in bq_input_tables ] sources = [self._source() for bq_t in bq_input_tables] try: return [ Dataset.from_table_schema(source=source, table_schema=table_schema) for table_schema, source in zip( self._get_table_schemas(input_table_names), sources) ] except Exception as e: self.logger.warning(f'Could not extract schema from bigquery. {e}') return [ Dataset.from_table(source, table) for table, source in zip(input_table_names, sources) ]
def test_nullable_chain_works(self): x = {"first": {"second": {"third": 42}}} assert get_from_nullable_chain(x, ['first', 'second', 'third']) == 42 x = {"first": {"second": {"third": 42, "fourth": {"empty": 56}}}} assert get_from_nullable_chain(x, ['first', 'second', 'third']) == 42
def test_nullable_chain_fails(self): x = {"first": {"second": {}}} assert get_from_nullable_chain(x, ['first', 'second', 'third']) is None
def parse_expectation_result(cls, expectation_result: ExpectationValidationResult) \ -> ExpectationsParserResult: return ExpectationsParserResult( cls.facet_key, get_from_nullable_chain(expectation_result, ['result', 'observed_value']))