def validate(self, parsed_args, client=None):
        if not client:
            credentials = GoogleCredentials.get_application_default(
            ).create_scoped(['https://www.googleapis.com/auth/bigquery'])
            client = bigquery.BigqueryV2(credentials=credentials)

        project_id, dataset_id, table_id = bigquery_util.parse_table_reference(
            parsed_args.input_table)
        if not bigquery_util.table_exist(client, project_id, dataset_id,
                                         table_id):
            raise ValueError('Table {}:{}.{} does not exist.'.format(
                project_id, dataset_id, table_id))
        if table_id.count(TABLE_SUFFIX_SEPARATOR) != 1:
            raise ValueError(
                'Input table {} is malformed - exactly one suffix separator "{}" is '
                'required'.format(parsed_args.input_table,
                                  TABLE_SUFFIX_SEPARATOR))
        base_table_id = table_id[:table_id.find(TABLE_SUFFIX_SEPARATOR)]
        sample_table_id = bigquery_util.compose_table_name(
            base_table_id, SAMPLE_INFO_TABLE_SUFFIX)

        if not bigquery_util.table_exist(client, project_id, dataset_id,
                                         sample_table_id):
            raise ValueError('Sample table {}:{}.{} does not exist.'.format(
                project_id, dataset_id, sample_table_id))
예제 #2
0
    def _update_bigquery_schema_on_append(self):
        # type: (bool) -> None
        # if table does not exist, do not need to update the schema.
        # TODO (yifangchen): Move the logic into validate().
        output_table_re_match = re.match(
            r'^((?P<project>.+):)(?P<dataset>\w+)\.(?P<table>[\w\$]+)$',
            self._output_table)
        credentials = GoogleCredentials.get_application_default(
        ).create_scoped(['https://www.googleapis.com/auth/bigquery'])
        client = bigquery.BigqueryV2(credentials=credentials)
        try:
            project_id = output_table_re_match.group('project')
            dataset_id = output_table_re_match.group('dataset')
            table_id = output_table_re_match.group('table')
            existing_table = client.tables.Get(
                bigquery.BigqueryTablesGetRequest(projectId=project_id,
                                                  datasetId=dataset_id,
                                                  tableId=table_id))
        except exceptions.HttpError:
            return

        new_schema = bigquery.TableSchema()
        new_schema.fields = _get_merged_field_schemas(
            existing_table.schema.fields, self._schema.fields)
        existing_table.schema = new_schema
        try:
            client.tables.Update(
                bigquery.BigqueryTablesUpdateRequest(projectId=project_id,
                                                     datasetId=dataset_id,
                                                     table=existing_table,
                                                     tableId=table_id))
        except exceptions.HttpError as e:
            raise RuntimeError('BigQuery schema update failed: %s' % str(e))
    def validate(self, parsed_args, client=None):
        # type: (argparse.Namespace, bigquery.BigqueryV2) -> None
        if parsed_args.update_schema_on_append and not parsed_args.append:
            raise ValueError(
                '--update_schema_on_append requires --append to be '
                'true.')
        if (not parsed_args.sharding_config_path
                or not parsed_args.sharding_config_path.strip()):
            raise ValueError(
                '--sharding_config_path must point to a valid config file.')

        if not client:
            credentials = GoogleCredentials.get_application_default(
            ).create_scoped(['https://www.googleapis.com/auth/bigquery'])
            client = bigquery.BigqueryV2(credentials=credentials)
        if not parsed_args.output_table:
            raise ValueError('--output_table must have a value.')
        self._validate_output_tables(client, parsed_args.output_table,
                                     parsed_args.sharding_config_path,
                                     parsed_args.append, True)

        if parsed_args.sample_lookup_optimized_output_table:
            if (parsed_args.output_table ==
                    parsed_args.sample_lookup_optimized_output_table):
                raise ValueError(
                    'sample_lookup_optimized_output_table cannot be the '
                    'same as output_table.')
            self._validate_output_tables(
                client, parsed_args.sample_lookup_optimized_output_table,
                parsed_args.sharding_config_path, parsed_args.append, False)
 def validate(self, parsed_args, client=None):
     output_table_re_match = re.match(
         r'^((?P<project>.+):)(?P<dataset>\w+)\.(?P<table>[\w\$]+)$',
         parsed_args.output_table)
     if not output_table_re_match:
         raise ValueError(
             'Expected a table reference (PROJECT:DATASET.TABLE) '
             'instead of {}.'.format(parsed_args.output_table))
     try:
         if not client:
             credentials = GoogleCredentials.get_application_default(
             ).create_scoped(['https://www.googleapis.com/auth/bigquery'])
             client = bigquery.BigqueryV2(credentials=credentials)
         client.datasets.Get(
             bigquery.BigqueryDatasetsGetRequest(
                 projectId=output_table_re_match.group('project'),
                 datasetId=output_table_re_match.group('dataset')))
     except exceptions.HttpError as e:
         if e.status_code == 404:
             raise ValueError('Dataset %s:%s does not exist.' %
                              (output_table_re_match.group('project'),
                               output_table_re_match.group('dataset')))
         else:
             # For the rest of the errors, use BigQuery error message.
             raise
예제 #5
0
파일: bigquery.py 프로젝트: zhouzach/beam
 def __init__(self, client=None):
   self.client = client or bigquery.BigqueryV2(
       credentials=auth.get_service_credentials())
   self._unique_row_id = 0
   # For testing scenarios where we pass in a client we do not want a
   # randomized prefix for row IDs.
   self._row_id_prefix = '' if client else uuid.uuid4()
   self._temporary_table_suffix = uuid.uuid4().hex
예제 #6
0
 def __init__(self, client=None):
     self.client = client or bigquery.BigqueryV2(
         http=get_new_http(),
         credentials=auth.get_service_credentials(),
         response_encoding=None if sys.version_info[0] < 3 else 'utf8')
     self._unique_row_id = 0
     # For testing scenarios where we pass in a client we do not want a
     # randomized prefix for row IDs.
     self._row_id_prefix = '' if client else uuid.uuid4()
     self._temporary_table_suffix = uuid.uuid4().hex
예제 #7
0
def _get_schema(input_table):
  # type: (str) -> bigquery_v2.TableSchema
  project_id, dataset_id, table_id = bigquery_util.parse_table_reference(
      input_table)
  credentials = (client.GoogleCredentials.get_application_default().
                 create_scoped(['https://www.googleapis.com/auth/bigquery']))
  bigquery_client = bigquery_v2.BigqueryV2(credentials=credentials)
  table = bigquery_client.tables.Get(bigquery_v2.BigqueryTablesGetRequest(
      projectId=project_id, datasetId=dataset_id, tableId=table_id))
  return table.schema
예제 #8
0
 def validate(self, parsed_args, client=None):
     # type: (argparse.Namespace, bigquery.BigqueryV2) -> None
     if not parsed_args.output_table and parsed_args.output_avro_path:
         # Writing into BigQuery is not requested; no more BigQuery checks needed.
         return
     output_table_re_match = re.match(
         r'^((?P<project>.+):)(?P<dataset>\w+)\.(?P<table>[\w\$]+)$',
         parsed_args.output_table)
     if not output_table_re_match:
         raise ValueError(
             'Expected a table reference (PROJECT:DATASET.TABLE) '
             'instead of {}.'.format(parsed_args.output_table))
     if not client:
         credentials = GoogleCredentials.get_application_default(
         ).create_scoped(['https://www.googleapis.com/auth/bigquery'])
         client = bigquery.BigqueryV2(credentials=credentials)
     project_id = output_table_re_match.group('project')
     dataset_id = output_table_re_match.group('dataset')
     table_id = output_table_re_match.group('table')
     try:
         client.datasets.Get(
             bigquery.BigqueryDatasetsGetRequest(projectId=project_id,
                                                 datasetId=dataset_id))
     except exceptions.HttpError as e:
         if e.status_code == 404:
             raise ValueError('Dataset %s:%s does not exist.' %
                              (project_id, dataset_id))
         else:
             # For the rest of the errors, use BigQuery error message.
             raise
     # Ensuring given output table doesn't already exist to avoid overwriting it.
     if not parsed_args.append:
         if parsed_args.update_schema_on_append:
             raise ValueError(
                 '--update_schema_on_append requires --append to be '
                 'true.')
         try:
             client.tables.Get(
                 bigquery.BigqueryTablesGetRequest(projectId=project_id,
                                                   datasetId=dataset_id,
                                                   tableId=table_id))
             raise ValueError(
                 'Table %s:%s.%s already exists, cannot overwrite it.' %
                 (project_id, dataset_id, table_id))
         except exceptions.HttpError as e:
             if e.status_code == 404:
                 # This is expected, output table must not already exist
                 pass
             else:
                 # For the rest of the errors, use BigQuery error message.
                 raise
    def validate(self, parsed_args, client=None):
        # type: (argparse.Namespace, bigquery.BigqueryV2) -> None
        if not parsed_args.output_table and parsed_args.output_avro_path:
            # Writing into BigQuery is not requested; no more BigQuery checks needed.
            return

        project_id, dataset_id, table_id = bigquery_util.parse_table_reference(
            parsed_args.output_table)

        if not client:
            credentials = GoogleCredentials.get_application_default(
            ).create_scoped(['https://www.googleapis.com/auth/bigquery'])
            client = bigquery.BigqueryV2(credentials=credentials)

        bigquery_util.raise_error_if_dataset_not_exists(
            client, project_id, dataset_id)
        # Ensuring given output table doesn't already exist to avoid overwriting it.
        if not parsed_args.append:
            if parsed_args.update_schema_on_append:
                raise ValueError(
                    '--update_schema_on_append requires --append to be '
                    'true.')
            bigquery_util.raise_error_if_table_exists(client, project_id,
                                                      dataset_id, table_id)