Beispiel #1
0
    def Run(self, args):
        """This is what gets called when the user runs this command.

    Args:
      args: an argparse namespace, All the arguments that were provided to this
        command invocation.

    Raises:
      ToolException: when destination uri is not specified or invalid.
    """
        apitools_client = self.context[commands.APITOOLS_CLIENT_KEY]
        bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY]
        resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY]
        project_id = properties.VALUES.core.project.Get(required=True)
        source_table_resource = resource_parser.Parse(
            args.source_table, collection='bigquery.tables')
        source_table_reference = message_conversions.TableResourceToReference(
            bigquery_messages, source_table_resource)

        if not args.destination_uri:
            raise exceptions.ToolException(
                'At least one destination URI must be specified.')
        destination_uris = args.destination_uri
        for uri in destination_uris:
            if not uri.startswith('gs://'):
                raise exceptions.ToolException((
                    'Illegal URI: {0}. Only Google Storage ("gs://") URIs are '
                    'supported.').format(uri))

        job = job_control.ExecuteJob(
            apitools_client,
            bigquery_messages,
            args,
            configuration=bigquery_messages.JobConfiguration(
                extract=bigquery_messages.JobConfigurationExtract(
                    sourceTable=source_table_reference,
                    destinationUris=destination_uris,
                    destinationFormat=bigquery_client_helper.
                    NormalizeTextualFormat(args.destination_format),
                    fieldDelimiter=bigquery_client_helper.
                    NormalizeFieldDelimiter(args.field_delimiter))),
            async=args. async,
            project_id=project_id,
            job_id=job_ids.JobIdProvider().GetJobId(args.job_id,
                                                    args.fingerprint_job_id))

        if args. async:
            job_resource = resource_parser.Create(
                'bigquery.jobs',
                projectId=job.jobReference.projectId,
                jobId=job.jobReference.jobId)
            log.CreatedResource(job_resource)
Beispiel #2
0
    def Run(self, args):
        """This is what gets called when the user runs this command.

    Args:
      args: an argparse namespace, All the arguments that were provided to this
        command invocation.

    Raises:
       ToolException: if no query was provided.

    Returns:
      If the --dry_run or --async flag was specified, None; otherwise, a
      SchemaAndRows object.
    """
        apitools_client = self.context[commands.APITOOLS_CLIENT_KEY]
        bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY]
        resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY]
        project_id = properties.VALUES.core.project.Get(required=True)
        if not args.sql_query:
            raise exceptions.ToolException('No query string provided')
        destination_table = args.append_to or args.write_to
        if destination_table:
            output_resource = resource_parser.Parse(
                destination_table, collection='bigquery.tables')
            output_reference = message_conversions.TableResourceToReference(
                bigquery_messages, output_resource)
        else:
            output_reference = None
        query_configuration = bigquery_messages.JobConfigurationQuery(
            allowLargeResults=args.allow_large_results,
            createDisposition='CREATE_NEVER' if args.require_cache else None,
            # Set defaultDataset here if we choose to support a
            # --default-dataset-in-query flag.
            destinationTable=output_reference,
            flattenResults=not args.structured,
            preserveNulls=None,
            priority='BATCH' if args.batch else None,
            query=args.sql_query,
            useQueryCache=args.use_cache,
            writeDisposition=((args.append_to and 'WRITE_APPEND')
                              or (args.write_to and 'WRITE_TRUNCATE')))
        job = job_control.ExecuteJob(
            apitools_client,
            bigquery_messages,
            args,
            configuration=bigquery_messages.JobConfiguration(
                query=query_configuration, dryRun=args.dry_run),
            async=args. async,
            project_id=project_id,
            job_id=job_ids.JobIdProvider().GetJobId(args.job_id,
                                                    args.fingerprint_job_id))

        if args.dry_run:
            log.Print(
                'Query successfully validated. Assuming the tables are not '
                'modified, running this query will process {0} bytes of data.'.
                format(job.statistics.query.totalBytesProcessed))
            return None
        if args. async:
            job_resource = resource_parser.Create(
                'bigquery.jobs',
                projectId=job.jobReference.projectId,
                jobId=job.jobReference.jobId)
            log.CreatedResource(job_resource)
            self.default_format = 'table(jobId, projectId)'
            return job_resource
        result = schema_and_rows.GetJobSchemaAndRows(apitools_client,
                                                     bigquery_messages,
                                                     job.jobReference,
                                                     args.start_row,
                                                     args.limit)
        if not result:
            return None
        self.default_format = result.GetDefaultFormat()
        return result.PrepareForDisplay()
Beispiel #3
0
    def Run(self, args):
        """This is what gets called when the user runs this command.

    Args:
      args: an argparse namespace, All the arguments that were provided to this
        command invocation.

    Returns:

    """
        apitools_client = self.context[commands.APITOOLS_CLIENT_KEY]
        bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY]
        resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY]
        project_id = properties.VALUES.core.project.Get(required=True)
        table_resource = resource_parser.Parse(args.destination_table,
                                               collection='bigquery.tables')
        # TODO(user): Define constants for collection names in one place
        table_reference = message_conversions.TableResourceToReference(
            bigquery_messages, table_resource)

        sources = _ProcessSources(args.source)

        if args.schema:
            table_schema = bigquery_schemas.ReadSchema(args.schema,
                                                       bigquery_messages)
        elif args.schema_file:
            table_schema = bigquery_schemas.ReadSchemaFile(
                args.schema_file, bigquery_messages)
        else:
            table_schema = None

        normalized_source_format = bigquery_client_helper.NormalizeTextualFormat(
            args.source_format)

        if (not normalized_source_format) or normalized_source_format == 'CSV':
            normalized_quote = (
                args.quote
                and bigquery_client_helper.NormalizeFieldDelimiter(args.quote))
            normalized_skip_leading_rows = args.skip_leading_rows
        else:
            # Server accepts non-None quote and skipLeadingRows only for CSV source
            # format:
            normalized_quote = None
            normalized_skip_leading_rows = None

        load_config = bigquery_messages.JobConfigurationLoad(
            allowJaggedRows=args.allow_jagged_rows,
            allowQuotedNewlines=args.allow_quoted_newlines,
            destinationTable=table_reference,
            encoding=args.encoding and args.encoding.upper(),
            fieldDelimiter=(args.field_delimiter
                            and bigquery_client_helper.NormalizeFieldDelimiter(
                                args.field_delimiter)),
            ignoreUnknownValues=args.ignore_unknown_values,
            maxBadRecords=args.max_bad_records,
            quote=normalized_quote,
            schema=table_schema,
            skipLeadingRows=normalized_skip_leading_rows,
            sourceFormat=normalized_source_format,
            sourceUris=sources if sources[0].startswith('gs://') else [],
            writeDisposition='WRITE_TRUNCATE' if args.replace else None,
        )
        job = job_control.ExecuteJob(
            apitools_client,
            bigquery_messages,
            args,
            configuration=bigquery_messages.JobConfiguration(load=load_config),
            async=args. async,
            project_id=project_id,
            upload_file=None if sources[0].startswith('gs://') else sources[0],
            job_id=job_ids.JobIdProvider().GetJobId(args.job_id,
                                                    args.fingerprint_job_id))
        if args. async:
            job_resource = resource_parser.Create(
                'bigquery.jobs',
                projectId=job.jobReference.projectId,
                jobId=job.jobReference.jobId)
            log.CreatedResource(job_resource)
Beispiel #4
0
    def Run(self, args):
        """This is what gets called when the user runs this command.

    Args:
      args: an argparse namespace, All the arguments that were provided to this
        command invocation.
    Returns:
      None
    Raises:
      bigqueryError.BigqueryError: If the source and destination files are not
        both specified.
      calliope_exceptions.ToolException: If user cancels this operation.
      Exception: If an unexpected value for the --if-exists flag passed gcloud
        validation (which should never happen)
    """
        apitools_client = self.context[commands.APITOOLS_CLIENT_KEY]
        bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY]
        resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY]
        project_id = properties.VALUES.core.project.Get(required=True)

        source_reference = resource_parser.Parse(args.source,
                                                 collection='bigquery.tables')
        source_reference_message = message_conversions.TableResourceToReference(
            bigquery_messages, source_reference)

        destination_resource = resource_parser.Parse(
            args.destination, collection='bigquery.tables')
        destination_reference = message_conversions.TableResourceToReference(
            bigquery_messages, destination_resource)

        if args.if_exists == 'append':
            write_disposition = 'WRITE_APPEND'
            ignore_already_exists = True
        elif args.if_exists == 'fail':
            write_disposition = 'WRITE_EMPTY'
            ignore_already_exists = False
        elif args.if_exists == 'prompt':
            write_disposition = 'WRITE_TRUNCATE'
            ignore_already_exists = False
            if bigquery_client_helper.TableExists(apitools_client,
                                                  bigquery_messages,
                                                  destination_reference):
                if not console_io.PromptContinue(prompt_string='Replace {0}'.
                                                 format(destination_resource)):
                    raise calliope_exceptions.ToolException('canceled by user')
        elif args.if_exists == 'replace':
            write_disposition = 'WRITE_TRUNCATE'
            ignore_already_exists = False
        elif args.if_exists == 'skip':
            if bigquery_client_helper.TableExists(apitools_client,
                                                  bigquery_messages,
                                                  destination_reference):
                return
        else:
            # This should be unreachable.
            raise core_exceptions.InternalError(
                'Unexpected value "{0}" for --if-exists flag.'.format(
                    args.if_exists))

        copy_config = bigquery_messages.JobConfigurationTableCopy(
            sourceTable=source_reference_message,
            destinationTable=destination_reference,
            writeDisposition=write_disposition)

        job_id = job_ids.JobIdProvider().GetJobId(args.job_id,
                                                  args.fingerprint_job_id)

        try:
            job = job_control.ExecuteJob(
                apitools_client,
                bigquery_messages,
                args,
                configuration=bigquery_messages.JobConfiguration(
                    copy=copy_config),
                project_id=project_id,
                job_id=job_id)
        except bigquery.DuplicateError as e:
            if ignore_already_exists:
                job = None
            else:
                raise e

        if job is None:
            log.status.Print('Table "{0}" already exists, skipping'.format(
                destination_resource))
        elif args. async:
            registry = self.context[commands.BIGQUERY_REGISTRY_KEY]
            job_resource = registry.Create(
                'bigquery.jobs',
                projectId=job.jobReference.projectId,
                jobId=job.jobReference.jobId)
            log.CreatedResource(job_resource)
        else:
            log.status.Print('Table [{0}] successfully copied to [{1}]'.format(
                source_reference, destination_resource))