def _validate_output_tables(self, client, output_table_base_name, sharding_config_path, append, is_main_output): if (output_table_base_name != bigquery_util.get_table_base_name(output_table_base_name)): raise ValueError( ('Output table cannot contain "{}". we reserve this ' 'string to mark sharded output tables.').format( bigquery_util.TABLE_SUFFIX_SEPARATOR)) project_id, dataset_id, table_id = bigquery_util.parse_table_reference( output_table_base_name) bigquery_util.raise_error_if_dataset_not_exists( client, project_id, dataset_id) all_output_tables = [] if is_main_output: all_output_tables.append( bigquery_util.compose_table_name(table_id, SAMPLE_INFO_TABLE_SUFFIX)) sharding = variant_sharding.VariantSharding(sharding_config_path) num_shards = sharding.get_num_shards() # In case there is no residual in config we will ignore the last shard. if not sharding.should_keep_shard(sharding.get_residual_index()): num_shards -= 1 for i in range(num_shards): table_suffix = sharding.get_output_table_suffix(i) if table_suffix != bigquery_util.get_table_base_name(table_suffix): raise ValueError( ('Table suffix cannot contain "{}" we reserve this ' 'string to mark sharded output tables.').format( bigquery_util.TABLE_SUFFIX_SEPARATOR)) all_output_tables.append( bigquery_util.compose_table_name(table_id, table_suffix)) for output_table in all_output_tables: if append: if not bigquery_util.table_exist(client, project_id, dataset_id, output_table): raise ValueError( 'Table {}:{}.{} does not exist, cannot append to it.'. format(project_id, dataset_id, output_table)) else: if bigquery_util.table_exist(client, project_id, dataset_id, output_table): raise ValueError(( 'Table {}:{}.{} already exists, cannot overwrite it. Please ' 'set `--append True` if you want to append to it.' ).format(project_id, dataset_id, output_table))
def test_raise_error_if_dataset_not_exists(self): client = mock.Mock() client.datasets.Get.return_value = bigquery.Dataset( datasetReference=bigquery.DatasetReference(projectId='project', datasetId='dataset')) bigquery_util.raise_error_if_dataset_not_exists( client, 'project', 'dataset') client.datasets.Get.side_effect = exceptions.HttpError( response={'status': '404'}, url='', content='') self.assertRaises(ValueError, bigquery_util.raise_error_if_dataset_not_exists, client, 'project', 'dataset') client.datasets.Get.side_effect = exceptions.HttpError( response={'status': '401'}, url='', content='') self.assertRaises(exceptions.HttpError, bigquery_util.raise_error_if_dataset_not_exists, client, 'project', 'dataset')
def validate(self, parsed_args, client=None): # type: (argparse.Namespace, bigquery.BigqueryV2) -> None if not parsed_args.output_table and parsed_args.output_avro_path: # Writing into BigQuery is not requested; no more BigQuery checks needed. return project_id, dataset_id, table_id = bigquery_util.parse_table_reference( parsed_args.output_table) if not client: credentials = GoogleCredentials.get_application_default( ).create_scoped(['https://www.googleapis.com/auth/bigquery']) client = bigquery.BigqueryV2(credentials=credentials) bigquery_util.raise_error_if_dataset_not_exists( client, project_id, dataset_id) # Ensuring given output table doesn't already exist to avoid overwriting it. if not parsed_args.append: if parsed_args.update_schema_on_append: raise ValueError( '--update_schema_on_append requires --append to be ' 'true.') bigquery_util.raise_error_if_table_exists(client, project_id, dataset_id, table_id)