Esempio n. 1
0
def fetch_big_query_data(bigquery, project_id, query, num_attempts):

    query_body = {
        "timeoutMs": 180000,
        "useQueryCache": False,
        "query": query
    }

    request = bigquery.jobs().query(
        projectId=project_id,
        body=query_body)
    result = execute_request_with_retries(request)

    job_finished = bool(result['jobComplete'])
    job_id = result['jobReference']['jobId']
    current_try = 1

    while not job_finished and current_try <= num_attempts:
        logging.info("Number of attempt to poll the query result [{}] of [{}].".format(current_try, num_attempts))

        request = bigquery.jobs().getQueryResults(
            projectId=project_id,
            jobId=job_id,
            timeoutMs=180000)
        result = execute_request_with_retries(request)

        job_finished = bool(result['jobComplete'])

        current_try += 1

    if not job_finished:
        logging.info("The job for the query requested has not been completed.")
        raise Exception("Query job requested not finished")

    return result
Esempio n. 2
0
def async_query(
        bigquery, project_id, query,
        destination_dataset, destination_table,
        batch=False, num_retries=5, use_legacy_sql=True, truncate=True):
    # Generate a unique job ID so retries
    # don't accidentally duplicate query
    job_data = {
        'jobReference': {
            'projectId': project_id,
            'jobId': str(uuid.uuid4())
        },
        'configuration': {
            'query': {
                'query': query,
                'priority': 'BATCH' if batch else 'INTERACTIVE',
                # Set to False to use standard SQL syntax. See:
                # https://cloud.google.com/bigquery/sql-reference/enabling-standard-sql
                'useLegacySql': use_legacy_sql,
                'allowLargeResults': True,
                "destinationTable": {
                      "projectId": cfg['ids']['project_id'],
                      "datasetId": destination_dataset,
                      "tableId": destination_table
                },
                "schemaUpdateOptions": '' if truncate else 'ALLOW_FIELD_ADDITION',
                "createDisposition": "CREATE_IF_NEEDED",
                "writeDisposition": 'WRITE_TRUNCATE' if truncate else 'WRITE_APPEND',
            }
        }
    }
    request = bigquery.jobs().insert(
        projectId=project_id,
        body=job_data)
    return execute_request_with_retries(request, num_retries=num_retries)
Esempio n. 3
0
def poll_job(bigquery, job, self):
    """Waits for a job to complete."""

    logging.info('Waiting for job to finish...')

    request = bigquery.jobs().get(
        projectId=job['jobReference']['projectId'],
        jobId=job['jobReference']['jobId'])

    while True:
        result = execute_request_with_retries(request, timeout=1)

        if result['status']['state'] == 'DONE':
            if 'errorResult' in result['status']:
                self.response.write("{error}<br/>".format(error=result['status']['errorResult']))
                logging.error(result['status']['errorResult'])
            else:
                self.response.write('DONE!<br/>')
                logging.info('Job complete.')
            return

        time.sleep(1)