Beispiel #1
0
def drop_table(auth,
               project_id,
               dataset_id,
               table_id,
               billing_project_id=None):
    if not billing_project_id:
        billing_project_id = project_id

    query = ('DROP TABLE `' + project_id + '.' + dataset_id + '.' + table_id +
             '` ')

    body = {
        "kind": "bigquery#queryRequest",
        'query': query,
        'defaultDataset': {
            'datasetId': dataset_id,
        },
        'useLegacySql': False,
    }

    job_wait(
        auth,
        API_BigQuery(auth).jobs().query(projectId=billing_project_id,
                                        body=body).execute())
Beispiel #2
0
def _get_min_date_from_table(auth,
                             project_id,
                             dataset_id,
                             table_id,
                             billing_project_id=None):
    if not billing_project_id:
        billing_project_id = project_id

    query = ('SELECT MIN(Report_Day) FROM `' + project_id + '.' + dataset_id +
             '.' + table_id + '` ')

    body = {
        "kind": "bigquery#queryRequest",
        'query': query,
        'defaultDataset': {
            'datasetId': dataset_id,
        },
        'useLegacySql': False,
    }

    job = API_BigQuery(auth).jobs().query(projectId=billing_project_id,
                                          body=body).execute()

    return job['rows'][0]['f'][0]['v']
Beispiel #3
0
def query_to_table(config,
                   auth,
                   project_id,
                   dataset_id,
                   table_id,
                   query,
                   disposition='WRITE_TRUNCATE',
                   legacy=True,
                   billing_project_id=None,
                   target_project_id=None):
    target_project_id = target_project_id or project_id

    if not billing_project_id:
        billing_project_id = project_id

    body = {
        'configuration': {
            'query': {
                'useLegacySql': legacy,
                'query': query,
                'destinationTable': {
                    'projectId': target_project_id,
                    'datasetId': dataset_id,
                    'tableId': table_id
                },
                'createDisposition': 'CREATE_IF_NEEDED',
                'writeDisposition': disposition,
                'allowLargeResults': True
            },
        }
    }

    job_wait(
        config, auth,
        API_BigQuery(config, auth).jobs().insert(projectId=billing_project_id,
                                                 body=body).execute())
Beispiel #4
0
def query_to_rows(auth,
                  project_id,
                  dataset_id,
                  query,
                  row_max=None,
                  legacy=True):

    # Create the query
    body = {
        "kind": "bigquery#queryRequest",
        "query": query,
        "timeoutMs": 10000,
        "dryRun": False,
        "useQueryCache": True,
        "useLegacySql": legacy
    }

    if row_max: body['maxResults'] = row_max

    if dataset_id:
        body['defaultDataset'] = {
            "projectId": project_id,
            "datasetId": dataset_id
        }

    # wait for query to complete

    response = API_BigQuery(auth).jobs().query(projectId=project_id,
                                               body=body).execute()
    while not response['jobComplete']:
        sleep(5)
        response = API_BigQuery(auth).jobs().getQueryResults(
            projectId=project_id,
            jobId=response['jobReference']['jobId']).execute(iterate=False)

    # fetch query results

    row_count = 0
    while 'rows' in response:
        converters = _build_converter_array(response.get('schema', None), None,
                                            len(response['rows'][0].get('f')))

        for row in response['rows']:
            yield [
                converters[i](next(iter(r.values())))
                for i, r in enumerate(row['f'])
            ]  # may break if we attempt nested reads
            row_count += 1

        if 'PageToken' in response:
            response = API_BigQuery(auth).jobs().getQueryResults(
                projectId=project_id,
                jobId=response['jobReference']['jobId'],
                pageToken=response['PageToken']).execute(iterate=False)
        elif row_count < int(response['totalRows']):
            response = API_BigQuery(auth).jobs().getQueryResults(
                projectId=project_id,
                jobId=response['jobReference']['jobId'],
                startIndex=row_count).execute(iterate=False)
        else:
            break
Beispiel #5
0
def table_to_schema(auth, project_id, dataset_id, table_id):
    if project.verbose:
        print('TABLE SCHEMA:', project_id, dataset_id, table_id)
    return API_BigQuery(auth).tables().get(
        projectId=project_id, datasetId=dataset_id,
        tableId=table_id).execute()['schema']
Beispiel #6
0
def table_get(auth, project_id, dataset_id, table_id):
    return API_BigQuery(auth).tables().get(projectId=project_id,
                                           datasetId=dataset_id,
                                           tableId=table_id).execute()
Beispiel #7
0
def io_to_table(auth,
                project_id,
                dataset_id,
                table_id,
                data,
                source_format='CSV',
                schema=None,
                skip_rows=0,
                disposition='WRITE_TRUNCATE',
                wait=True):

    # if data exists, write data to table
    data.seek(0, 2)
    if data.tell() > 0:
        data.seek(0)

        media = MediaIoBaseUpload(BytesIO(data.read().encode('utf8')),
                                  mimetype='application/octet-stream',
                                  resumable=True,
                                  chunksize=BIGQUERY_CHUNKSIZE)

        body = {
            'configuration': {
                'load': {
                    'destinationTable': {
                        'projectId': project_id,
                        'datasetId': dataset_id,
                        'tableId': table_id,
                    },
                    'sourceFormat': source_format,
                    'writeDisposition': disposition,
                    'autodetect': True,
                    'allowJaggedRows': True,
                    'allowQuotedNewlines': True,
                    'ignoreUnknownValues': True,
                }
            }
        }

        if schema:
            body['configuration']['load']['schema'] = {'fields': schema}
            body['configuration']['load']['autodetect'] = False

        if source_format == 'CSV':
            body['configuration']['load']['skipLeadingRows'] = skip_rows

        job = API_BigQuery(auth).jobs().insert(
            projectId=project.id, body=body,
            media_body=media).execute(run=False)
        execution = job.execute()

        response = None
        while response is None:
            status, response = job.next_chunk()
            if project.verbose and status:
                print("Uploaded %d%%." % int(status.progress() * 100))
        if project.verbose: print("Uploaded 100%")
        if wait: job_wait(auth, job.execute())
        else: return job

    # if it does not exist and write, clear the table
    elif disposition == 'WRITE_TRUNCATE':
        if project.verbose: print("BIGQUERY: No data, clearing table.")

        body = {
            "tableReference": {
                "projectId": project_id,
                "datasetId": dataset_id,
                "tableId": table_id
            },
            "schema": {
                "fields": schema
            }
        }
        # change project_id to be project.id, better yet project.cloud_id from JSON
        API_BigQuery(auth).tables().insert(projectId=project.id,
                                           datasetId=dataset_id,
                                           body=body).execute()
Beispiel #8
0
def query_to_rows(auth,
                  project_id,
                  dataset_id,
                  query,
                  row_max=None,
                  legacy=True,
                  as_object=False):
    if project.verbose:
        print('BIGQUERY QUERY:', project_id, dataset_id)

    # Create the query
    body = {
        'kind': 'bigquery#queryRequest',
        'query': query,
        'timeoutMs': 10000,
        'dryRun': False,
        'useQueryCache': True,
        'useLegacySql': legacy
    }

    if row_max:
        body['maxResults'] = row_max

    if dataset_id:
        body['defaultDataset'] = {
            'projectId': project_id,
            'datasetId': dataset_id
        }

    # wait for query to complete

    response = API_BigQuery(auth).jobs().query(projectId=project_id,
                                               body=body).execute()
    while not response['jobComplete']:
        sleep(5)
        response = API_BigQuery(auth).jobs().getQueryResults(
            projectId=project_id,
            jobId=response['jobReference']['jobId']).execute(iterate=False)

    # fetch query results
    schema = response.get('schema', {}).get('fields', None)

    row_count = 0
    while 'rows' in response:

        for row in response['rows']:
            yield row_to_json(row, schema, as_object)
            row_count += 1

        if 'PageToken' in response:
            response = API_BigQuery(auth).jobs().getQueryResults(
                projectId=project_id,
                jobId=response['jobReference']['jobId'],
                pageToken=response['PageToken']).execute(iterate=False)
        elif row_count < int(response['totalRows']):
            response = API_BigQuery(auth).jobs().getQueryResults(
                projectId=project_id,
                jobId=response['jobReference']['jobId'],
                startIndex=row_count).execute(iterate=False)
        else:
            break
Beispiel #9
0
def table_to_type(auth, project_id, dataset_id, table_id):
    if project.verbose:
        print('TABLE TYPE:', project_id, dataset_id, table_id)
    return API_BigQuery(auth).tables().get(projectId=project_id,
                                           datasetId=dataset_id,
                                           tableId=table_id).execute()['type']
Beispiel #10
0
def io_to_table(auth,
                project_id,
                dataset_id,
                table_id,
                data_bytes,
                source_format='CSV',
                schema=None,
                skip_rows=0,
                disposition='WRITE_TRUNCATE',
                wait=True):

    # if data exists, write data to table
    data_bytes.seek(0, 2)
    if data_bytes.tell() > 0:
        data_bytes.seek(0)

        media = MediaIoBaseUpload(data_bytes,
                                  mimetype='application/octet-stream',
                                  resumable=True,
                                  chunksize=BIGQUERY_CHUNKSIZE)

        body = {
            'configuration': {
                'load': {
                    'destinationTable': {
                        'projectId': project_id,
                        'datasetId': dataset_id,
                        'tableId': table_id,
                    },
                    'sourceFormat':
                    source_format,  # CSV, NEWLINE_DELIMITED_JSON
                    'writeDisposition':
                    disposition,  # WRITE_TRUNCATE, WRITE_APPEND, WRITE_EMPTY
                    'autodetect': True,
                    'allowJaggedRows': True,
                    'allowQuotedNewlines': True,
                    'ignoreUnknownValues': True,
                }
            }
        }

        if schema:
            body['configuration']['load']['schema'] = {'fields': schema}
            body['configuration']['load']['autodetect'] = False

        if disposition == 'WRITE_APPEND':
            body['configuration']['load']['autodetect'] = False

        if source_format == 'CSV':
            body['configuration']['load']['skipLeadingRows'] = skip_rows

        job = API_BigQuery(auth).jobs().insert(
            projectId=project.id, body=body,
            media_body=media).execute(run=False)
        execution = job.execute()

        response = None
        while response is None:
            status, response = job.next_chunk()
            if project.verbose and status:
                print('Uploaded %d%%.' % int(status.progress() * 100))
        if project.verbose:
            print('Uploaded 100%')

        if wait:
            job_wait(auth, execution)
        else:
            return execution

    # if it does not exist and write, clear the table
    elif disposition == 'WRITE_TRUNCATE':
        if project.verbose:
            print('BIGQUERY: No data, clearing table.')
        table_create(auth, project_id, dataset_id, table_id, schema)