def io_to_table(auth, project_id, dataset_id, table_id, data, source_format='CSV', schema=None, skip_rows=0, disposition='WRITE_TRUNCATE', wait=True): # if data exists, write data to table data.seek(0, 2) if data.tell() > 0: data.seek(0) media = MediaIoBaseUpload(BytesIO(data.read().encode('utf8')), mimetype='application/octet-stream', resumable=True, chunksize=BIGQUERY_CHUNKSIZE) body = { 'configuration': { 'load': { 'destinationTable': { 'projectId': project_id, 'datasetId': dataset_id, 'tableId': table_id, }, 'sourceFormat': source_format, 'writeDisposition': disposition, 'autodetect': True, 'allowJaggedRows': True, 'allowQuotedNewlines': True, 'ignoreUnknownValues': True, } } } if schema: body['configuration']['load']['schema'] = {'fields': schema} body['configuration']['load']['autodetect'] = False if source_format == 'CSV': body['configuration']['load']['skipLeadingRows'] = skip_rows job = API_BigQuery(auth).jobs().insert( projectId=project.id, body=body, media_body=media).execute(run=False) execution = job.execute() response = None while response is None: status, response = job.next_chunk() if project.verbose and status: print("Uploaded %d%%." % int(status.progress() * 100)) if project.verbose: print("Uploaded 100%") if wait: job_wait(auth, job.execute()) else: return job # if it does not exist and write, clear the table elif disposition == 'WRITE_TRUNCATE': if project.verbose: print("BIGQUERY: No data, clearing table.") body = { "tableReference": { "projectId": project_id, "datasetId": dataset_id, "tableId": table_id }, "schema": { "fields": schema } } # change project_id to be project.id, better yet project.cloud_id from JSON API_BigQuery(auth).tables().insert(projectId=project.id, datasetId=dataset_id, body=body).execute()
def io_to_table(auth, project_id, dataset_id, table_id, data_bytes, source_format='CSV', schema=None, skip_rows=0, disposition='WRITE_TRUNCATE', wait=True): # if data exists, write data to table data_bytes.seek(0, 2) if data_bytes.tell() > 0: data_bytes.seek(0) media = MediaIoBaseUpload(data_bytes, mimetype='application/octet-stream', resumable=True, chunksize=BIGQUERY_CHUNKSIZE) body = { 'configuration': { 'load': { 'destinationTable': { 'projectId': project_id, 'datasetId': dataset_id, 'tableId': table_id, }, 'sourceFormat': source_format, # CSV, NEWLINE_DELIMITED_JSON 'writeDisposition': disposition, # WRITE_TRUNCATE, WRITE_APPEND, WRITE_EMPTY 'autodetect': True, 'allowJaggedRows': True, 'allowQuotedNewlines': True, 'ignoreUnknownValues': True, } } } if schema: body['configuration']['load']['schema'] = {'fields': schema} body['configuration']['load']['autodetect'] = False if disposition == 'WRITE_APPEND': body['configuration']['load']['autodetect'] = False if source_format == 'CSV': body['configuration']['load']['skipLeadingRows'] = skip_rows job = API_BigQuery(auth).jobs().insert( projectId=project.id, body=body, media_body=media).execute(run=False) execution = job.execute() response = None while response is None: status, response = job.next_chunk() if project.verbose and status: print('Uploaded %d%%.' % int(status.progress() * 100)) if project.verbose: print('Uploaded 100%') if wait: job_wait(auth, execution) else: return execution # if it does not exist and write, clear the table elif disposition == 'WRITE_TRUNCATE': if project.verbose: print('BIGQUERY: No data, clearing table.') table_create(auth, project_id, dataset_id, table_id, schema)