예제 #1
0
def io_to_table(auth,
                project_id,
                dataset_id,
                table_id,
                data,
                source_format='CSV',
                schema=None,
                skip_rows=0,
                disposition='WRITE_TRUNCATE',
                wait=True):

    # if data exists, write data to table
    data.seek(0, 2)
    if data.tell() > 0:
        data.seek(0)

        media = MediaIoBaseUpload(BytesIO(data.read().encode('utf8')),
                                  mimetype='application/octet-stream',
                                  resumable=True,
                                  chunksize=BIGQUERY_CHUNKSIZE)

        body = {
            'configuration': {
                'load': {
                    'destinationTable': {
                        'projectId': project_id,
                        'datasetId': dataset_id,
                        'tableId': table_id,
                    },
                    'sourceFormat': source_format,
                    'writeDisposition': disposition,
                    'autodetect': True,
                    'allowJaggedRows': True,
                    'allowQuotedNewlines': True,
                    'ignoreUnknownValues': True,
                }
            }
        }

        if schema:
            body['configuration']['load']['schema'] = {'fields': schema}
            body['configuration']['load']['autodetect'] = False

        if source_format == 'CSV':
            body['configuration']['load']['skipLeadingRows'] = skip_rows

        job = API_BigQuery(auth).jobs().insert(
            projectId=project.id, body=body,
            media_body=media).execute(run=False)
        execution = job.execute()

        response = None
        while response is None:
            status, response = job.next_chunk()
            if project.verbose and status:
                print("Uploaded %d%%." % int(status.progress() * 100))
        if project.verbose: print("Uploaded 100%")
        if wait: job_wait(auth, job.execute())
        else: return job

    # if it does not exist and write, clear the table
    elif disposition == 'WRITE_TRUNCATE':
        if project.verbose: print("BIGQUERY: No data, clearing table.")

        body = {
            "tableReference": {
                "projectId": project_id,
                "datasetId": dataset_id,
                "tableId": table_id
            },
            "schema": {
                "fields": schema
            }
        }
        # change project_id to be project.id, better yet project.cloud_id from JSON
        API_BigQuery(auth).tables().insert(projectId=project.id,
                                           datasetId=dataset_id,
                                           body=body).execute()
예제 #2
0
def io_to_table(auth,
                project_id,
                dataset_id,
                table_id,
                data_bytes,
                source_format='CSV',
                schema=None,
                skip_rows=0,
                disposition='WRITE_TRUNCATE',
                wait=True):

    # if data exists, write data to table
    data_bytes.seek(0, 2)
    if data_bytes.tell() > 0:
        data_bytes.seek(0)

        media = MediaIoBaseUpload(data_bytes,
                                  mimetype='application/octet-stream',
                                  resumable=True,
                                  chunksize=BIGQUERY_CHUNKSIZE)

        body = {
            'configuration': {
                'load': {
                    'destinationTable': {
                        'projectId': project_id,
                        'datasetId': dataset_id,
                        'tableId': table_id,
                    },
                    'sourceFormat':
                    source_format,  # CSV, NEWLINE_DELIMITED_JSON
                    'writeDisposition':
                    disposition,  # WRITE_TRUNCATE, WRITE_APPEND, WRITE_EMPTY
                    'autodetect': True,
                    'allowJaggedRows': True,
                    'allowQuotedNewlines': True,
                    'ignoreUnknownValues': True,
                }
            }
        }

        if schema:
            body['configuration']['load']['schema'] = {'fields': schema}
            body['configuration']['load']['autodetect'] = False

        if disposition == 'WRITE_APPEND':
            body['configuration']['load']['autodetect'] = False

        if source_format == 'CSV':
            body['configuration']['load']['skipLeadingRows'] = skip_rows

        job = API_BigQuery(auth).jobs().insert(
            projectId=project.id, body=body,
            media_body=media).execute(run=False)
        execution = job.execute()

        response = None
        while response is None:
            status, response = job.next_chunk()
            if project.verbose and status:
                print('Uploaded %d%%.' % int(status.progress() * 100))
        if project.verbose:
            print('Uploaded 100%')

        if wait:
            job_wait(auth, execution)
        else:
            return execution

    # if it does not exist and write, clear the table
    elif disposition == 'WRITE_TRUNCATE':
        if project.verbose:
            print('BIGQUERY: No data, clearing table.')
        table_create(auth, project_id, dataset_id, table_id, schema)