def _get_table(self, project_id, dataset_id, table_id): request = bigquery.BigqueryTablesGetRequest(projectId=project_id, datasetId=dataset_id, tableId=table_id) response = self.client.tables.Get(request) # The response is a bigquery.Table instance. return response
def _update_bigquery_schema_on_append(self): # type: (bool) -> None # if table does not exist, do not need to update the schema. # TODO (yifangchen): Move the logic into validate(). output_table_re_match = re.match( r'^((?P<project>.+):)(?P<dataset>\w+)\.(?P<table>[\w\$]+)$', self._output_table) credentials = GoogleCredentials.get_application_default( ).create_scoped(['https://www.googleapis.com/auth/bigquery']) client = bigquery.BigqueryV2(credentials=credentials) try: project_id = output_table_re_match.group('project') dataset_id = output_table_re_match.group('dataset') table_id = output_table_re_match.group('table') existing_table = client.tables.Get( bigquery.BigqueryTablesGetRequest(projectId=project_id, datasetId=dataset_id, tableId=table_id)) except exceptions.HttpError: return new_schema = bigquery.TableSchema() new_schema.fields = _get_merged_field_schemas( existing_table.schema.fields, self._schema.fields) existing_table.schema = new_schema try: client.tables.Update( bigquery.BigqueryTablesUpdateRequest(projectId=project_id, datasetId=dataset_id, table=existing_table, tableId=table_id)) except exceptions.HttpError as e: raise RuntimeError('BigQuery schema update failed: %s' % str(e))
def _get_schema(input_table): # type: (str) -> bigquery_v2.TableSchema project_id, dataset_id, table_id = bigquery_util.parse_table_reference( input_table) credentials = (client.GoogleCredentials.get_application_default(). create_scoped(['https://www.googleapis.com/auth/bigquery'])) bigquery_client = bigquery_v2.BigqueryV2(credentials=credentials) table = bigquery_client.tables.Get(bigquery_v2.BigqueryTablesGetRequest( projectId=project_id, datasetId=dataset_id, tableId=table_id)) return table.schema
def side_effect(request): if (request == bigquery.BigqueryTablesGetRequest( projectId='project', datasetId='dataset', tableId='table__sample_info')): raise exceptions.HttpError(response={'status': '404'}, url='', content='') return bigquery.Table(tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table__chr1_part1'))
def table_exist(client, project_id, dataset_id, table_id): # type: (beam_bigquery.BigqueryV2, str, str, str) -> bool try: client.tables.Get( beam_bigquery.BigqueryTablesGetRequest(projectId=project_id, datasetId=dataset_id, tableId=table_id)) except exceptions.HttpError as e: if e.status_code == 404: return False else: raise return True
def validate(self, parsed_args, client=None): # type: (argparse.Namespace, bigquery.BigqueryV2) -> None if not parsed_args.output_table and parsed_args.output_avro_path: # Writing into BigQuery is not requested; no more BigQuery checks needed. return output_table_re_match = re.match( r'^((?P<project>.+):)(?P<dataset>\w+)\.(?P<table>[\w\$]+)$', parsed_args.output_table) if not output_table_re_match: raise ValueError( 'Expected a table reference (PROJECT:DATASET.TABLE) ' 'instead of {}.'.format(parsed_args.output_table)) if not client: credentials = GoogleCredentials.get_application_default( ).create_scoped(['https://www.googleapis.com/auth/bigquery']) client = bigquery.BigqueryV2(credentials=credentials) project_id = output_table_re_match.group('project') dataset_id = output_table_re_match.group('dataset') table_id = output_table_re_match.group('table') try: client.datasets.Get( bigquery.BigqueryDatasetsGetRequest(projectId=project_id, datasetId=dataset_id)) except exceptions.HttpError as e: if e.status_code == 404: raise ValueError('Dataset %s:%s does not exist.' % (project_id, dataset_id)) else: # For the rest of the errors, use BigQuery error message. raise # Ensuring given output table doesn't already exist to avoid overwriting it. if not parsed_args.append: if parsed_args.update_schema_on_append: raise ValueError( '--update_schema_on_append requires --append to be ' 'true.') try: client.tables.Get( bigquery.BigqueryTablesGetRequest(projectId=project_id, datasetId=dataset_id, tableId=table_id)) raise ValueError( 'Table %s:%s.%s already exists, cannot overwrite it.' % (project_id, dataset_id, table_id)) except exceptions.HttpError as e: if e.status_code == 404: # This is expected, output table must not already exist pass else: # For the rest of the errors, use BigQuery error message. raise
def get_table(self, project_id, dataset_id, table_id): """Lookup a table's metadata object. Args: client: bigquery.BigqueryV2 instance project_id, dataset_id, table_id: table lookup parameters Returns: bigquery.Table instance Raises: HttpError if lookup failed. """ request = bigquery.BigqueryTablesGetRequest( projectId=project_id, datasetId=dataset_id, tableId=table_id) response = self.client.tables.Get(request) return response
def raise_error_if_table_exists(client, project_id, dataset_id, table_id): # type: (bigquery.BigqueryV2, str, str, str) -> None try: client.tables.Get( bigquery.BigqueryTablesGetRequest(projectId=project_id, datasetId=dataset_id, tableId=table_id)) raise ValueError( 'Table %s:%s.%s already exists, cannot overwrite it.' % (project_id, dataset_id, table_id)) except exceptions.HttpError as e: if e.status_code == 404: # This is expected, output table must not already exist pass else: # For the rest of the errors, use BigQuery error message. raise