Beispiel #1
0
def table_to_rows(auth,
                  project_id,
                  dataset_id,
                  table_id,
                  fields=None,
                  row_start=0,
                  row_max=None,
                  as_object=False):
    if project.verbose:
        print('BIGQUERY ROWS:', project_id, dataset_id, table_id)

    table = API_BigQuery(auth).tables().get(projectId=project_id,
                                            datasetId=dataset_id,
                                            tableId=table_id).execute()
    table_schema = table['schema'].get('fields', [])
    table_type = table['type']
    table_legacy = table.get('view', {}).get('useLegacySql', False)

    if table_type == 'TABLE':

        for row in API_BigQuery(auth, iterate=True).tabledata().list(
                projectId=project_id,
                datasetId=dataset_id,
                tableId=table_id,
                selectedFields=fields,
                startIndex=row_start,
                maxResults=row_max,
        ).execute():
            yield row_to_json(row, table_schema, as_object)

    else:
        yield from query_to_rows(auth, project_id, dataset_id,
                                 'SELECT * FROM %s' % table_id, row_max,
                                 table_legacy, as_object)
Beispiel #2
0
def query_to_rows(auth,
                  project_id,
                  dataset_id,
                  query,
                  row_max=None,
                  legacy=True,
                  as_object=False):
  if project.verbose:
    print('BIGQUERY QUERY:', project_id, dataset_id)

  # Create the query
  body = {
      'kind': 'bigquery#queryRequest',
      'query': query,
      'timeoutMs': 10000,
      'dryRun': False,
      'useQueryCache': True,
      'useLegacySql': legacy
  }

  if row_max:
    body['maxResults'] = row_max

  if dataset_id:
    body['defaultDataset'] = {'projectId': project_id, 'datasetId': dataset_id}

  # wait for query to complete

  response = API_BigQuery(auth).jobs().query(
      projectId=project_id, body=body).execute()
  while not response['jobComplete']:
    sleep(5)
    response = API_BigQuery(auth).jobs().getQueryResults(
        projectId=project_id,
        jobId=response['jobReference']['jobId']).execute(iterate=False)

  # fetch query results
  schema = response.get('schema', {}).get('fields', None)

  row_count = 0
  while 'rows' in response:

    for row in response['rows']:
      yield row_to_json(row, schema, as_object)
      row_count += 1

    if 'PageToken' in response:
      response = API_BigQuery(auth).jobs().getQueryResults(
          projectId=project_id,
          jobId=response['jobReference']['jobId'],
          pageToken=response['PageToken']).execute(iterate=False)
    elif row_count < int(response['totalRows']):
      response = API_BigQuery(auth).jobs().getQueryResults(
          projectId=project_id,
          jobId=response['jobReference']['jobId'],
          startIndex=row_count).execute(iterate=False)
    else:
      break
Beispiel #3
0
def query_to_rows(auth, project_id, dataset_id, query, row_max=None, legacy=True):

  # Create the query
  body = {
    "kind": "bigquery#queryRequest",
    "query": query,
    "timeoutMs": 10000,
    "dryRun": False,
    "useQueryCache": True,
    "useLegacySql": legacy
  }

  if row_max: body['maxResults'] = row_max

  if dataset_id:
    body['defaultDataset'] = {
      "projectId": project_id,
      "datasetId": dataset_id
    }

  # wait for query to complete

  response = API_BigQuery(auth).jobs().query(projectId=project_id, body=body).execute()
  while not response['jobComplete']:
    sleep(5)
    response = API_BigQuery(auth).jobs().getQueryResults(projectId=project_id, jobId=response['jobReference']['jobId']).execute(iterate=False)

  # fetch query results

  row_count = 0
  while 'rows' in response:
    converters = _build_converter_array(response.get('schema', None), None, len(response['rows'][0].get('f')))

    for row in response['rows']:
      yield [converters[i](next(iter(r.values()))) for i, r in enumerate(row['f'])] # may break if we attempt nested reads
      row_count += 1

    if 'PageToken' in response:
      response = API_BigQuery(auth).jobs().getQueryResults(projectId=project_id, jobId=response['jobReference']['jobId'], pageToken=response['PageToken']).execute(iterate=False)
    elif row_count < int(response['totalRows']):
      response = API_BigQuery(auth).jobs().getQueryResults(projectId=project_id, jobId=response['jobReference']['jobId'], startIndex=row_count).execute(iterate=False)
    else:
      break