def _DoAddRows(self, json_file, args): """Add rows from json_file to args.target_table.""" apitools_client = self.context[commands.APITOOLS_CLIENT_KEY] bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY] resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY] resource = resource_parser.Parse(args.target_table, collection='bigquery.tables') reference = message_conversions.TableResourceToReference( bigquery_messages, resource) message_builder = bigquery_json_object_messages.MessageBuilder( bigquery_messages) batch = [] lineno = 0 errors_found = False for line in json_file: lineno += 1 trimmed_line = line.strip() if trimmed_line: try: parsed_row = message_builder.Build(trimmed_line) except bigquery.ClientError as e: raise bigquery.Error( 'Line {num}: {msg}'.format(num=lineno, msg=str(e)), None, []) batch.append(parsed_row) if _MAX_ROWS_PER_REQUEST and (len(batch) == _MAX_ROWS_PER_REQUEST): result = TablesAddRows._InsertTableRows( reference, batch, apitools_client, bigquery_messages) del batch[:] if result.insertErrors: errors_found = True break if lineno == 0: log.status.Print('JSON file was empty.') return if batch and not errors_found: result = TablesAddRows._InsertTableRows(reference, batch, apitools_client, bigquery_messages) errors_found = bool(result.insertErrors) if errors_found: for entry in result.insertErrors: log.err.Print('Record {0} could not be inserted:'.format( entry.index)) for error in entry.errors: log.err.Print('\t{0}: {1}'.format(error.reason, error.message)) raise bigquery.Error('Rows not added') else: if not args.quiet: log.status.Print('Rows added successfully.')
def Run(self, args): """This is what gets called when the user runs this command. Args: args: an argparse namespace, All the arguments that were provided to this command invocation. Raises: bigquery.Error: if no job id was provided and no jobs were running. bigquery.TimeoutError: on time out. Returns: A Job message for the job we were waiting for. """ apitools_client = self.context[commands.APITOOLS_CLIENT_KEY] bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY] resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY] project_id = properties.VALUES.core.project.Get(required=True) try: max_wait = int(args.max_wait) if args.max_wait else sys.maxint except ValueError: raise bigquery.ClientError('Invalid wait time: {0}'.format( args.max_wait)) if args.job_id: job_resource = resource_parser.Parse(args.job_id, collection='bigquery.jobs') job_reference = message_conversions.JobResourceToReference( bigquery_messages, job_resource) else: project = bigquery.Project(project_id) running_jobs = [ job for job in project.GetCurrentJobsGenerator() if job.state in ('PENDING', 'RUNNING') ] if len(running_jobs) != 1: raise bigquery.Error( 'No job ID provided, found {0} running jobs'.format( len(running_jobs))) job_reference = running_jobs[0].jobReference start_time = bigquery.CurrentTimeInSec() job = None progress_reporter = job_progress.ProgressReporter( job_progress.STATUS_REPORTING_CHANGES if args. changed_status_only else job_progress.STATUS_REPORTING_PERIODIC) # Create an iterator for polling intervals that yields 1,1,1,1,1,1,1,1, # 2,5,8,11,14,17,20,23,26,29, 30,30,30,... polling_intervals = itertools.chain(itertools.repeat(1, 8), xrange(2, 30, 3), itertools.repeat(30)) total_wait_so_far = 0 current_status = 'UNKNOWN' while total_wait_so_far < max_wait: try: request = bigquery_messages.BigqueryJobsGetRequest( projectId=job_reference.projectId, jobId=job_reference.jobId) job = apitools_client.jobs.Get(request) current_status = job.status.state if current_status == 'DONE': progress_reporter.Print(job_reference.jobId, total_wait_so_far, current_status) break except apitools_base.HttpError as server_error: try: raise bigquery.Error.ForHttpError(server_error) except bigquery.CommunicationError as e: # Communication errors while waiting on a job are okay. log.status.Print( 'Transient error during job status check: {0}'.format( e)) except bigquery.BackendError as e: # Temporary server errors while waiting on a job are okay. log.status.Print( 'Transient error during job status check: {0}'.format( e)) # Every second of this polling interval, update the display of the time # waited so far: seconds_in_interval = polling_intervals.next() total_wait_so_far = bigquery.CurrentTimeInSec() - start_time for _ in xrange(seconds_in_interval): progress_reporter.Print(job_reference.jobId, total_wait_so_far, current_status) bigquery.Wait(1) total_wait_so_far = bigquery.CurrentTimeInSec() - start_time else: raise bigquery.TimeoutError( ('Wait timed out. Operation not finished, in state {0}'.format( current_status)), None, []) progress_reporter.Done() return job