Beispiel #1
0
def _clear_data_in_date_range_from_table(auth,
                                         project_id,
                                         dataset_id,
                                         table_id,
                                         start_date,
                                         end_date,
                                         billing_project_id=None):
    if not billing_project_id:
        billing_project_id = project_id

    service = get_service('bigquery', 'v2', auth)

    query = ('DELETE FROM `' + project_id + '.' + dataset_id + '.' + table_id +
             '` ' + 'WHERE Date >= "' + start_date + '"' + 'AND Date <= "' +
             end_date + '"')

    body = {
        "kind": "bigquery#queryRequest",
        'query': query,
        'defaultDataset': {
            'datasetId': dataset_id,
        },
        'useLegacySql': False,
    }

    job = API_BigQuery(auth).jobs().query(projectId=billing_project_id,
                                          body=body).execute(run=False)
    job_wait(service, job.execute(num_retries=BIGQUERY_RETRIES))
Beispiel #2
0
def drop_table(auth,
               project_id,
               dataset_id,
               table_id,
               billing_project_id=None):
    if not billing_project_id:
        billing_project_id = project_id

    service = get_service('bigquery', 'v2', auth)
    query = ('DROP TABLE `' + project_id + '.' + dataset_id + '.' + table_id +
             '` ')

    body = {
        "kind": "bigquery#queryRequest",
        'query': query,
        'defaultDataset': {
            'datasetId': dataset_id,
        },
        'useLegacySql': False,
    }

    job = API_BigQuery(auth).jobs().query(projectId=billing_project_id,
                                          body=body).execute(run=False)

    max_date = job_wait(service, job.execute(num_retries=BIGQUERY_RETRIES))
Beispiel #3
0
def execute_statement(auth,
                      project_id,
                      dataset_id,
                      statement,
                      billing_project_id=None,
                      use_legacy_sql=False):
    if not billing_project_id:
        billing_project_id = project_id

    service = get_service('bigquery', 'v2', auth)

    body = {
        "kind": "bigquery#queryRequest",
        'query': statement,
        'defaultDataset': {
            'datasetId': dataset_id,
        },
        'useLegacySql': use_legacy_sql,
    }

    job = API_BigQuery(auth).jobs().query(projectId=billing_project_id,
                                          body=body).execute(run=False)
    job_wait(service, job.execute(num_retries=BIGQUERY_RETRIES))
Beispiel #4
0
def io_to_table(auth,
                project_id,
                dataset_id,
                table_id,
                data,
                source_format='CSV',
                schema=None,
                skip_rows=0,
                disposition='WRITE_TRUNCATE',
                wait=True):

    # if data exists, write data to table
    data.seek(0, 2)
    if data.tell() > 0:
        data.seek(0)

        media = MediaIoBaseUpload(BytesIO(data.read().encode('utf8')),
                                  mimetype='application/octet-stream',
                                  resumable=True,
                                  chunksize=BIGQUERY_CHUNKSIZE)

        body = {
            'configuration': {
                'load': {
                    'destinationTable': {
                        'projectId': project_id,
                        'datasetId': dataset_id,
                        'tableId': table_id,
                    },
                    'sourceFormat': source_format,
                    'writeDisposition': disposition,
                    'autodetect': True,
                    'allowJaggedRows': True,
                    'allowQuotedNewlines': True,
                    'ignoreUnknownValues': True,
                }
            }
        }

        if schema:
            body['configuration']['load']['schema'] = {'fields': schema}
            body['configuration']['load']['autodetect'] = False

        if source_format == 'CSV':
            body['configuration']['load']['skipLeadingRows'] = skip_rows

        job = API_BigQuery(auth).jobs().insert(
            projectId=project.id, body=body,
            media_body=media).execute(run=False)
        execution = job.execute()

        response = None
        while response is None:
            status, response = job.next_chunk()
            if project.verbose and status:
                print("Uploaded %d%%." % int(status.progress() * 100))
        if project.verbose: print("Uploaded 100%")
        if wait: job_wait(auth, job.execute())
        else: return job

    # if it does not exist and write, clear the table
    elif disposition == 'WRITE_TRUNCATE':
        if project.verbose: print("BIGQUERY: No data, clearing table.")

        body = {
            "tableReference": {
                "projectId": project_id,
                "datasetId": dataset_id,
                "tableId": table_id
            },
            "schema": {
                "fields": schema
            }
        }
        # change project_id to be project.id, better yet project.cloud_id from JSON
        API_BigQuery(auth).tables().insert(projectId=project.id,
                                           datasetId=dataset_id,
                                           body=body).execute()
Beispiel #5
0
def io_to_table(auth,
                project_id,
                dataset_id,
                table_id,
                data_bytes,
                source_format='CSV',
                schema=None,
                skip_rows=0,
                disposition='WRITE_TRUNCATE',
                wait=True):

    # if data exists, write data to table
    data_bytes.seek(0, 2)
    if data_bytes.tell() > 0:
        data_bytes.seek(0)

        media = MediaIoBaseUpload(data_bytes,
                                  mimetype='application/octet-stream',
                                  resumable=True,
                                  chunksize=BIGQUERY_CHUNKSIZE)

        body = {
            'configuration': {
                'load': {
                    'destinationTable': {
                        'projectId': project_id,
                        'datasetId': dataset_id,
                        'tableId': table_id,
                    },
                    'sourceFormat':
                    source_format,  # CSV, NEWLINE_DELIMITED_JSON
                    'writeDisposition':
                    disposition,  # WRITE_TRUNCATE, WRITE_APPEND, WRITE_EMPTY
                    'autodetect': True,
                    'allowJaggedRows': True,
                    'allowQuotedNewlines': True,
                    'ignoreUnknownValues': True,
                }
            }
        }

        if schema:
            body['configuration']['load']['schema'] = {'fields': schema}
            body['configuration']['load']['autodetect'] = False

        if disposition == 'WRITE_APPEND':
            body['configuration']['load']['autodetect'] = False

        if source_format == 'CSV':
            body['configuration']['load']['skipLeadingRows'] = skip_rows

        job = API_BigQuery(auth).jobs().insert(
            projectId=project.id, body=body,
            media_body=media).execute(run=False)
        execution = job.execute()

        response = None
        while response is None:
            status, response = job.next_chunk()
            if project.verbose and status:
                print('Uploaded %d%%.' % int(status.progress() * 100))
        if project.verbose:
            print('Uploaded 100%')

        if wait:
            job_wait(auth, execution)
        else:
            return execution

    # if it does not exist and write, clear the table
    elif disposition == 'WRITE_TRUNCATE':
        if project.verbose:
            print('BIGQUERY: No data, clearing table.')
        table_create(auth, project_id, dataset_id, table_id, schema)