def validate(self, parsed_args, client=None):
     output_table_re_match = re.match(
         r'^((?P<project>.+):)(?P<dataset>\w+)\.(?P<table>[\w\$]+)$',
         parsed_args.output_table)
     if not output_table_re_match:
         raise ValueError(
             'Expected a table reference (PROJECT:DATASET.TABLE) '
             'instead of {}.'.format(parsed_args.output_table))
     try:
         if not client:
             credentials = GoogleCredentials.get_application_default(
             ).create_scoped(['https://www.googleapis.com/auth/bigquery'])
             client = bigquery.BigqueryV2(credentials=credentials)
         client.datasets.Get(
             bigquery.BigqueryDatasetsGetRequest(
                 projectId=output_table_re_match.group('project'),
                 datasetId=output_table_re_match.group('dataset')))
     except exceptions.HttpError as e:
         if e.status_code == 404:
             raise ValueError('Dataset %s:%s does not exist.' %
                              (output_table_re_match.group('project'),
                               output_table_re_match.group('dataset')))
         else:
             # For the rest of the errors, use BigQuery error message.
             raise
Example #2
0
def raise_error_if_dataset_not_exists(client, project_id, dataset_id):
    # type: (beam_bigquery.BigqueryV2, str, str) -> None
    try:
        client.datasets.Get(
            beam_bigquery.BigqueryDatasetsGetRequest(projectId=project_id,
                                                     datasetId=dataset_id))
    except exceptions.HttpError as e:
        if e.status_code == 404:
            raise ValueError('Dataset %s:%s does not exist.' %
                             (project_id, dataset_id)) from e
        # For the rest of the errors, use BigQuery error message.
        raise
Example #3
0
 def clean_up_temporary_dataset(self, project_id):
   temp_table = self._get_temp_table(project_id)
   try:
     self.client.datasets.Get(bigquery.BigqueryDatasetsGetRequest(
         projectId=project_id, datasetId=temp_table.datasetId))
   except HttpError as exn:
     if exn.status_code == 404:
       logging.warning('Dataset %s:%s does not exist', project_id,
                       temp_table.datasetId)
       return
     else:
       raise
   self._delete_dataset(temp_table.projectId, temp_table.datasetId, True)
Example #4
0
 def validate(self, parsed_args, client=None):
     # type: (argparse.Namespace, bigquery.BigqueryV2) -> None
     if not parsed_args.output_table and parsed_args.output_avro_path:
         # Writing into BigQuery is not requested; no more BigQuery checks needed.
         return
     output_table_re_match = re.match(
         r'^((?P<project>.+):)(?P<dataset>\w+)\.(?P<table>[\w\$]+)$',
         parsed_args.output_table)
     if not output_table_re_match:
         raise ValueError(
             'Expected a table reference (PROJECT:DATASET.TABLE) '
             'instead of {}.'.format(parsed_args.output_table))
     if not client:
         credentials = GoogleCredentials.get_application_default(
         ).create_scoped(['https://www.googleapis.com/auth/bigquery'])
         client = bigquery.BigqueryV2(credentials=credentials)
     project_id = output_table_re_match.group('project')
     dataset_id = output_table_re_match.group('dataset')
     table_id = output_table_re_match.group('table')
     try:
         client.datasets.Get(
             bigquery.BigqueryDatasetsGetRequest(projectId=project_id,
                                                 datasetId=dataset_id))
     except exceptions.HttpError as e:
         if e.status_code == 404:
             raise ValueError('Dataset %s:%s does not exist.' %
                              (project_id, dataset_id))
         else:
             # For the rest of the errors, use BigQuery error message.
             raise
     # Ensuring given output table doesn't already exist to avoid overwriting it.
     if not parsed_args.append:
         if parsed_args.update_schema_on_append:
             raise ValueError(
                 '--update_schema_on_append requires --append to be '
                 'true.')
         try:
             client.tables.Get(
                 bigquery.BigqueryTablesGetRequest(projectId=project_id,
                                                   datasetId=dataset_id,
                                                   tableId=table_id))
             raise ValueError(
                 'Table %s:%s.%s already exists, cannot overwrite it.' %
                 (project_id, dataset_id, table_id))
         except exceptions.HttpError as e:
             if e.status_code == 404:
                 # This is expected, output table must not already exist
                 pass
             else:
                 # For the rest of the errors, use BigQuery error message.
                 raise
Example #5
0
 def create_temporary_dataset(self, project_id):
   dataset_id = BigQueryWrapper.TEMP_DATASET + self._temporary_table_suffix
   # Check if dataset exists to make sure that the temporary id is unique
   try:
     self.client.datasets.Get(bigquery.BigqueryDatasetsGetRequest(
         projectId=project_id, datasetId=dataset_id))
     if project_id is not None:
       # Unittests don't pass projectIds so they can be run without error
       raise RuntimeError(
           'Dataset %s:%s already exists so cannot be used as temporary.'
           % (project_id, dataset_id))
   except HttpError as exn:
     if exn.status_code == 404:
       logging.warning('Dataset does not exist so we will create it')
       self.get_or_create_dataset(project_id, dataset_id)
     else:
       raise
Example #6
0
 def get_or_create_dataset(self, project_id, dataset_id):
   # Check if dataset already exists otherwise create it
   try:
     dataset = self.client.datasets.Get(bigquery.BigqueryDatasetsGetRequest(
         projectId=project_id, datasetId=dataset_id))
     return dataset
   except HttpError as exn:
     if exn.status_code == 404:
       dataset = bigquery.Dataset(
           datasetReference=bigquery.DatasetReference(
               projectId=project_id, datasetId=dataset_id))
       request = bigquery.BigqueryDatasetsInsertRequest(
           projectId=project_id, dataset=dataset)
       response = self.client.datasets.Insert(request)
       # The response is a bigquery.Dataset instance.
       return response
     else:
       raise