コード例 #1
0
    def _validate_output_tables(self, client, output_table_base_name,
                                sharding_config_path, append, is_main_output):
        if (output_table_base_name !=
                bigquery_util.get_table_base_name(output_table_base_name)):
            raise ValueError(
                ('Output table cannot contain "{}". we reserve this '
                 'string to mark sharded output tables.').format(
                     bigquery_util.TABLE_SUFFIX_SEPARATOR))

        project_id, dataset_id, table_id = bigquery_util.parse_table_reference(
            output_table_base_name)
        bigquery_util.raise_error_if_dataset_not_exists(
            client, project_id, dataset_id)
        all_output_tables = []
        if is_main_output:
            all_output_tables.append(
                bigquery_util.compose_table_name(table_id,
                                                 SAMPLE_INFO_TABLE_SUFFIX))
        sharding = variant_sharding.VariantSharding(sharding_config_path)
        num_shards = sharding.get_num_shards()
        # In case there is no residual in config we will ignore the last shard.
        if not sharding.should_keep_shard(sharding.get_residual_index()):
            num_shards -= 1
        for i in range(num_shards):
            table_suffix = sharding.get_output_table_suffix(i)
            if table_suffix != bigquery_util.get_table_base_name(table_suffix):
                raise ValueError(
                    ('Table suffix cannot contain "{}" we reserve this  '
                     'string to mark sharded output tables.').format(
                         bigquery_util.TABLE_SUFFIX_SEPARATOR))
            all_output_tables.append(
                bigquery_util.compose_table_name(table_id, table_suffix))

        for output_table in all_output_tables:
            if append:
                if not bigquery_util.table_exist(client, project_id,
                                                 dataset_id, output_table):
                    raise ValueError(
                        'Table {}:{}.{} does not exist, cannot append to it.'.
                        format(project_id, dataset_id, output_table))
            else:
                if bigquery_util.table_exist(client, project_id, dataset_id,
                                             output_table):
                    raise ValueError((
                        'Table {}:{}.{} already exists, cannot overwrite it. Please '
                        'set `--append True` if you want to append to it.'
                    ).format(project_id, dataset_id, output_table))
コード例 #2
0
    def test_raise_error_if_dataset_not_exists(self):
        client = mock.Mock()
        client.datasets.Get.return_value = bigquery.Dataset(
            datasetReference=bigquery.DatasetReference(projectId='project',
                                                       datasetId='dataset'))
        bigquery_util.raise_error_if_dataset_not_exists(
            client, 'project', 'dataset')

        client.datasets.Get.side_effect = exceptions.HttpError(
            response={'status': '404'}, url='', content='')
        self.assertRaises(ValueError,
                          bigquery_util.raise_error_if_dataset_not_exists,
                          client, 'project', 'dataset')

        client.datasets.Get.side_effect = exceptions.HttpError(
            response={'status': '401'}, url='', content='')
        self.assertRaises(exceptions.HttpError,
                          bigquery_util.raise_error_if_dataset_not_exists,
                          client, 'project', 'dataset')
コード例 #3
0
    def validate(self, parsed_args, client=None):
        # type: (argparse.Namespace, bigquery.BigqueryV2) -> None
        if not parsed_args.output_table and parsed_args.output_avro_path:
            # Writing into BigQuery is not requested; no more BigQuery checks needed.
            return

        project_id, dataset_id, table_id = bigquery_util.parse_table_reference(
            parsed_args.output_table)

        if not client:
            credentials = GoogleCredentials.get_application_default(
            ).create_scoped(['https://www.googleapis.com/auth/bigquery'])
            client = bigquery.BigqueryV2(credentials=credentials)

        bigquery_util.raise_error_if_dataset_not_exists(
            client, project_id, dataset_id)
        # Ensuring given output table doesn't already exist to avoid overwriting it.
        if not parsed_args.append:
            if parsed_args.update_schema_on_append:
                raise ValueError(
                    '--update_schema_on_append requires --append to be '
                    'true.')
            bigquery_util.raise_error_if_table_exists(client, project_id,
                                                      dataset_id, table_id)