예제 #1
0
  def Run(self, args):
    """This is what gets called when the user runs this command.

    Args:
      args: an argparse namespace, All the arguments that were provided to this
        command invocation.

    Returns:
      A Table message.
    """
    apitools_client = self.context[commands.APITOOLS_CLIENT_KEY]
    bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY]
    resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY]
    resource = resource_parser.Parse(
        args.table_or_view, collection='bigquery.tables')
    reference = message_conversions.TableResourceToReference(
        bigquery_messages, resource)
    request = bigquery_messages.BigqueryTablesGetRequest(
        projectId=reference.projectId,
        datasetId=reference.datasetId,
        tableId=reference.tableId)
    try:
      return apitools_client.tables.Get(request)
    except apitools_base.HttpError as server_error:
      raise bigquery.Error.ForHttpError(server_error)
예제 #2
0
    def _DoAddRows(self, json_file, args):
        """Add rows from json_file to args.target_table."""
        apitools_client = self.context[commands.APITOOLS_CLIENT_KEY]
        bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY]
        resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY]
        resource = resource_parser.Parse(args.target_table,
                                         collection='bigquery.tables')
        reference = message_conversions.TableResourceToReference(
            bigquery_messages, resource)
        message_builder = bigquery_json_object_messages.MessageBuilder(
            bigquery_messages)

        batch = []
        lineno = 0
        errors_found = False

        for line in json_file:
            lineno += 1
            trimmed_line = line.strip()
            if trimmed_line:
                try:
                    parsed_row = message_builder.Build(trimmed_line)
                except bigquery.ClientError as e:
                    raise bigquery.Error(
                        'Line {num}: {msg}'.format(num=lineno, msg=str(e)),
                        None, [])
                batch.append(parsed_row)
                if _MAX_ROWS_PER_REQUEST and (len(batch)
                                              == _MAX_ROWS_PER_REQUEST):
                    result = TablesAddRows._InsertTableRows(
                        reference, batch, apitools_client, bigquery_messages)
                    del batch[:]
                    if result.insertErrors:
                        errors_found = True
                        break

        if lineno == 0:
            log.status.Print('JSON file was empty.')
            return

        if batch and not errors_found:
            result = TablesAddRows._InsertTableRows(reference, batch,
                                                    apitools_client,
                                                    bigquery_messages)
            errors_found = bool(result.insertErrors)

        if errors_found:
            for entry in result.insertErrors:
                log.err.Print('Record {0} could not be inserted:'.format(
                    entry.index))
                for error in entry.errors:
                    log.err.Print('\t{0}: {1}'.format(error.reason,
                                                      error.message))
            raise bigquery.Error('Rows not added')
        else:
            if not args.quiet:
                log.status.Print('Rows added successfully.')
예제 #3
0
    def Run(self, args):
        """This is what gets called when the user runs this command.

    Args:
      args: an argparse namespace, All the arguments that were provided to this
        command invocation.

    Raises:
      ToolException: when destination uri is not specified or invalid.
    """
        apitools_client = self.context[commands.APITOOLS_CLIENT_KEY]
        bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY]
        resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY]
        project_id = properties.VALUES.core.project.Get(required=True)
        source_table_resource = resource_parser.Parse(
            args.source_table, collection='bigquery.tables')
        source_table_reference = message_conversions.TableResourceToReference(
            bigquery_messages, source_table_resource)

        if not args.destination_uri:
            raise exceptions.ToolException(
                'At least one destination URI must be specified.')
        destination_uris = args.destination_uri
        for uri in destination_uris:
            if not uri.startswith('gs://'):
                raise exceptions.ToolException((
                    'Illegal URI: {0}. Only Google Storage ("gs://") URIs are '
                    'supported.').format(uri))

        job = job_control.ExecuteJob(
            apitools_client,
            bigquery_messages,
            args,
            configuration=bigquery_messages.JobConfiguration(
                extract=bigquery_messages.JobConfigurationExtract(
                    sourceTable=source_table_reference,
                    destinationUris=destination_uris,
                    destinationFormat=bigquery_client_helper.
                    NormalizeTextualFormat(args.destination_format),
                    fieldDelimiter=bigquery_client_helper.
                    NormalizeFieldDelimiter(args.field_delimiter))),
            async=args. async,
            project_id=project_id,
            job_id=job_ids.JobIdProvider().GetJobId(args.job_id,
                                                    args.fingerprint_job_id))

        if args. async:
            job_resource = resource_parser.Create(
                'bigquery.jobs',
                projectId=job.jobReference.projectId,
                jobId=job.jobReference.jobId)
            log.CreatedResource(job_resource)
예제 #4
0
    def Run(self, args):
        """This is what gets called when the user runs this command.

    Args:
      args: an argparse namespeace, All the arguments that were provided to this
        command invocation.

    Returns:
      Some value that we want to have printed later.
    """
        apitools_client = self.context[commands.APITOOLS_CLIENT_KEY]
        bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY]
        resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY]
        resource = resource_parser.Parse(args.table_or_view,
                                         collection='bigquery.tables')
        reference = message_conversions.TableResourceToReference(
            bigquery_messages, resource)

        if args.expiration:
            expiration_instant_in_millis = int(
                (bigquery.CurrentTimeInSec() + args.expiration) * 1000)
        else:
            expiration_instant_in_millis = None

        if args.schema:
            new_schema = bigquery_schemas.ReadSchema(args.schema,
                                                     bigquery_messages)
        elif args.schema_file:
            new_schema = bigquery_schemas.ReadSchemaFile(
                args.schema_file, bigquery_messages)
        else:
            new_schema = None

        request = bigquery_messages.BigqueryTablesPatchRequest(
            projectId=reference.projectId,
            datasetId=reference.datasetId,
            tableId=reference.tableId,
            table=bigquery_messages.Table(
                tableReference=reference,
                description=args.description,
                expirationTime=expiration_instant_in_millis,
                friendlyName=args.friendly_name,
                schema=new_schema))

        try:
            apitools_client.tables.Patch(request)
        except apitools_base.HttpError as e:
            raise bigquery.Error.ForHttpError(e)
        log.UpdatedResource(reference)
예제 #5
0
    def Run(self, args):
        """This is what gets called when the user runs this command.

    Args:
      args: an argparse namespace, All the arguments that were provided to this
        command invocation.

    Raises:
      bigquery.DuplicateError: if table already exists.
    Returns:
      None
    """
        apitools_client = self.context[commands.APITOOLS_CLIENT_KEY]
        bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY]
        resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY]
        resource = resource_parser.Parse(args.table,
                                         collection='bigquery.tables')
        reference = message_conversions.TableResourceToReference(
            bigquery_messages, resource)

        table_or_view = 'View' if args.view else 'Table'
        if bigquery_client_helper.TableExists(apitools_client,
                                              bigquery_messages, reference):
            if args.if_exists == 'skip':
                log.status.Print(
                    'Skipping this operation because a table or view named '
                    '[{0}] already exists.'.format(reference))
                return
            else:
                message = (
                    '{0} [{1}] could not be created; a table with this name '
                    'already exists.'.format(table_or_view, reference))
                raise bigquery.DuplicateError(message, None, [])
        if args.schema:
            schema = bigquery_schemas.ReadSchema(args.schema,
                                                 bigquery_messages)
        elif args.schema_file:
            schema = bigquery_schemas.ReadSchemaFile(args.schema_file,
                                                     bigquery_messages)
        else:
            schema = None

        if args.expiration:
            expiration_instant_seconds = time.time() + args.expiration
            expiration_instant_millis = int(1000 * expiration_instant_seconds)
        else:
            expiration_instant_millis = None

        if args.view:
            view_definition = bigquery_messages.ViewDefinition(query=args.view)
        else:
            view_definition = None

        request = bigquery_messages.BigqueryTablesInsertRequest(
            projectId=reference.projectId,
            datasetId=reference.datasetId,
            table=bigquery_messages.Table(
                tableReference=bigquery_messages.TableReference(
                    projectId=reference.projectId,
                    datasetId=reference.datasetId,
                    tableId=reference.tableId),
                description=args.description,
                expirationTime=expiration_instant_millis,
                schema=schema,
                view=view_definition))

        try:
            apitools_client.tables.Insert(request)
        except apitools_base.HttpError as server_error:
            raise bigquery.Error.ForHttpError(server_error)

        log.CreatedResource(resource)
예제 #6
0
  def Run(self, args):
    """This is what gets called when the user runs this command.

    Args:
      args: an argparse namespace, All the arguments that were provided to this
        command invocation.

    Raises:
       ToolException: if no query was provided.

    Returns:
      If the --dry_run or --async flag was specified, None; otherwise, a
      SchemaAndRows object.
    """
    apitools_client = self.context[commands.APITOOLS_CLIENT_KEY]
    bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY]
    resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY]
    project_id = properties.VALUES.core.project.Get(required=True)
    if not args.sql_query:
      raise exceptions.ToolException('No query string provided')
    destination_table = args.append_to or args.write_to
    if destination_table:
      output_resource = resource_parser.Parse(
          destination_table, collection='bigquery.tables')
      output_reference = message_conversions.TableResourceToReference(
          bigquery_messages, output_resource)
    else:
      output_reference = None
    query_configuration = bigquery_messages.JobConfigurationQuery(
        allowLargeResults=args.allow_large_results,
        createDisposition='CREATE_NEVER' if args.require_cache else None,
        # Set defaultDataset here if we choose to support a
        # --default-dataset-in-query flag.
        destinationTable=output_reference,
        flattenResults=not args.structured,
        preserveNulls=None,
        priority='BATCH' if args.batch else None,
        query=args.sql_query,
        useQueryCache=args.use_cache,
        writeDisposition=(
            (args.append_to and 'WRITE_APPEND')
            or (args.write_to and 'WRITE_TRUNCATE')))
    job = job_control.ExecuteJob(
        apitools_client,
        bigquery_messages,
        args,
        configuration=bigquery_messages.JobConfiguration(
            query=query_configuration, dryRun=args.dry_run),
        async=args.async,
        project_id=project_id,
        job_id=job_ids.JobIdProvider().GetJobId(
            args.job_id, args.fingerprint_job_id))

    if args.dry_run:
      log.Print(
          'Query successfully validated. Assuming the tables are not '
          'modified, running this query will process {0} bytes of data.'
          .format(job.statistics.query.totalBytesProcessed))
    elif args.async:
      job_resource = resource_parser.Create(
          'bigquery.jobs',
          projectId=job.jobReference.projectId,
          jobId=job.jobReference.jobId)
      log.CreatedResource(job_resource)
    else:
      return schema_and_rows.GetJobSchemaAndRows(
          apitools_client, bigquery_messages, job.jobReference, args.start_row,
          args.limit)
예제 #7
0
파일: copy.py 프로젝트: bopopescu/brydzenie
    def Run(self, args):
        """This is what gets called when the user runs this command.

    Args:
      args: an argparse namespace, All the arguments that were provided to this
        command invocation.
    Returns:
      None
    Raises:
      bigqueryError.BigqueryError: If the source and destination files are not
        both specified.
      calliope_exceptions.ToolException: If user cancels this operation.
      Exception: If an unexpected value for the --if-exists flag passed gcloud
        validation (which should never happen)
    """
        apitools_client = self.context[commands.APITOOLS_CLIENT_KEY]
        bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY]
        resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY]
        project_id = properties.VALUES.core.project.Get(required=True)

        source_reference = resource_parser.Parse(args.source,
                                                 collection='bigquery.tables')
        source_reference_message = message_conversions.TableResourceToReference(
            bigquery_messages, source_reference)

        destination_resource = resource_parser.Parse(
            args.destination, collection='bigquery.tables')
        destination_reference = message_conversions.TableResourceToReference(
            bigquery_messages, destination_resource)

        if args.if_exists == 'append':
            write_disposition = 'WRITE_APPEND'
            ignore_already_exists = True
        elif args.if_exists == 'fail':
            write_disposition = 'WRITE_EMPTY'
            ignore_already_exists = False
        elif args.if_exists == 'prompt':
            write_disposition = 'WRITE_TRUNCATE'
            ignore_already_exists = False
            if bigquery_client_helper.TableExists(apitools_client,
                                                  bigquery_messages,
                                                  destination_reference):
                if not console_io.PromptContinue(prompt_string='Replace {0}'.
                                                 format(destination_resource)):
                    raise calliope_exceptions.ToolException('canceled by user')
        elif args.if_exists == 'replace':
            write_disposition = 'WRITE_TRUNCATE'
            ignore_already_exists = False
        elif args.if_exists == 'skip':
            if bigquery_client_helper.TableExists(apitools_client,
                                                  bigquery_messages,
                                                  destination_reference):
                return
        else:
            # This should be unreachable.
            raise core_exceptions.InternalError(
                'Unexpected value "{0}" for --if-exists flag.'.format(
                    args.if_exists))

        copy_config = bigquery_messages.JobConfigurationTableCopy(
            sourceTable=source_reference_message,
            destinationTable=destination_reference,
            writeDisposition=write_disposition)

        job_id = job_ids.JobIdProvider().GetJobId(args.job_id,
                                                  args.fingerprint_job_id)

        try:
            job = job_control.ExecuteJob(
                apitools_client,
                bigquery_messages,
                args,
                configuration=bigquery_messages.JobConfiguration(
                    copy=copy_config),
                project_id=project_id,
                job_id=job_id)
        except bigquery.DuplicateError as e:
            if ignore_already_exists:
                job = None
            else:
                raise e

        if job is None:
            log.status.Print('Table "{0}" already exists, skipping'.format(
                destination_resource))
        elif args. async:
            registry = self.context[commands.BIGQUERY_REGISTRY_KEY]
            job_resource = registry.Create(
                'bigquery.jobs',
                projectId=job.jobReference.projectId,
                jobId=job.jobReference.jobId)
            log.CreatedResource(job_resource)
        else:
            log.status.Print('Table [{0}] successfully copied to [{1}]'.format(
                source_reference, destination_resource))
예제 #8
0
  def Run(self, args):
    """This is what gets called when the user runs this command.

    Args:
      args: an argparse namespace, All the arguments that were provided to this
        command invocation.

    Returns:

    """
    apitools_client = self.context[commands.APITOOLS_CLIENT_KEY]
    bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY]
    resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY]
    project_id = properties.VALUES.core.project.Get(required=True)
    table_resource = resource_parser.Parse(
        args.destination_table, collection='bigquery.tables')
    # TODO(user): Define constants for collection names in one place
    table_reference = message_conversions.TableResourceToReference(
        bigquery_messages, table_resource)

    sources = _ProcessSources(args.source)

    if args.schema:
      table_schema = bigquery_schemas.ReadSchema(args.schema, bigquery_messages)
    elif args.schema_file:
      table_schema = bigquery_schemas.ReadSchemaFile(
          args.schema_file, bigquery_messages)
    else:
      table_schema = None

    normalized_source_format = bigquery_client_helper.NormalizeTextualFormat(
        args.source_format)

    if (not normalized_source_format) or normalized_source_format == 'CSV':
      normalized_quote = (
          args.quote
          and bigquery_client_helper.NormalizeFieldDelimiter(args.quote))
      normalized_skip_leading_rows = args.skip_leading_rows
    else:
      # Server accepts non-None quote and skipLeadingRows only for CSV source
      # format:
      normalized_quote = None
      normalized_skip_leading_rows = None

    load_config = bigquery_messages.JobConfigurationLoad(
        allowJaggedRows=args.allow_jagged_rows,
        allowQuotedNewlines=args.allow_quoted_newlines,
        destinationTable=table_reference,
        encoding=args.encoding and args.encoding.upper(),
        fieldDelimiter=(
            args.field_delimiter
            and bigquery_client_helper.NormalizeFieldDelimiter(
                args.field_delimiter)),
        ignoreUnknownValues=args.ignore_unknown_values,
        maxBadRecords=args.max_bad_records,
        quote=normalized_quote,
        schema=table_schema,
        skipLeadingRows=normalized_skip_leading_rows,
        sourceFormat=normalized_source_format,
        sourceUris=sources if sources[0].startswith('gs://') else [],
        writeDisposition='WRITE_TRUNCATE' if args.replace else None,
    )
    job = job_control.ExecuteJob(
        apitools_client,
        bigquery_messages,
        args,
        configuration=bigquery_messages.JobConfiguration(load=load_config),
        async=args.async,
        project_id=project_id,
        upload_file=None if sources[0].startswith('gs://') else sources[0],
        job_id=job_ids.JobIdProvider().GetJobId(
            args.job_id, args.fingerprint_job_id))
    if args.async:
      job_resource = resource_parser.Create(
          'bigquery.jobs',
          projectId=job.jobReference.projectId,
          jobId=job.jobReference.jobId)
      log.CreatedResource(job_resource)