def _clear_data_in_date_range_from_table(auth, project_id, dataset_id, table_id, start_date, end_date, billing_project_id=None): if not billing_project_id: billing_project_id = project_id service = get_service('bigquery', 'v2', auth) query = ('DELETE FROM `' + project_id + '.' + dataset_id + '.' + table_id + '` ' + 'WHERE Date >= "' + start_date + '"' + 'AND Date <= "' + end_date + '"') body = { "kind": "bigquery#queryRequest", 'query': query, 'defaultDataset': { 'datasetId': dataset_id, }, 'useLegacySql': False, } job = API_BigQuery(auth).jobs().query(projectId=billing_project_id, body=body).execute(run=False) job_wait(service, job.execute(num_retries=BIGQUERY_RETRIES))
def drop_table(auth, project_id, dataset_id, table_id, billing_project_id=None): if not billing_project_id: billing_project_id = project_id service = get_service('bigquery', 'v2', auth) query = ('DROP TABLE `' + project_id + '.' + dataset_id + '.' + table_id + '` ') body = { "kind": "bigquery#queryRequest", 'query': query, 'defaultDataset': { 'datasetId': dataset_id, }, 'useLegacySql': False, } job = API_BigQuery(auth).jobs().query(projectId=billing_project_id, body=body).execute(run=False) max_date = job_wait(service, job.execute(num_retries=BIGQUERY_RETRIES))
def execute_statement(auth, project_id, dataset_id, statement, billing_project_id=None, use_legacy_sql=False): if not billing_project_id: billing_project_id = project_id service = get_service('bigquery', 'v2', auth) body = { "kind": "bigquery#queryRequest", 'query': statement, 'defaultDataset': { 'datasetId': dataset_id, }, 'useLegacySql': use_legacy_sql, } job = API_BigQuery(auth).jobs().query(projectId=billing_project_id, body=body).execute(run=False) job_wait(service, job.execute(num_retries=BIGQUERY_RETRIES))
def io_to_table(auth, project_id, dataset_id, table_id, data, source_format='CSV', schema=None, skip_rows=0, disposition='WRITE_TRUNCATE', wait=True): # if data exists, write data to table data.seek(0, 2) if data.tell() > 0: data.seek(0) media = MediaIoBaseUpload(BytesIO(data.read().encode('utf8')), mimetype='application/octet-stream', resumable=True, chunksize=BIGQUERY_CHUNKSIZE) body = { 'configuration': { 'load': { 'destinationTable': { 'projectId': project_id, 'datasetId': dataset_id, 'tableId': table_id, }, 'sourceFormat': source_format, 'writeDisposition': disposition, 'autodetect': True, 'allowJaggedRows': True, 'allowQuotedNewlines': True, 'ignoreUnknownValues': True, } } } if schema: body['configuration']['load']['schema'] = {'fields': schema} body['configuration']['load']['autodetect'] = False if source_format == 'CSV': body['configuration']['load']['skipLeadingRows'] = skip_rows job = API_BigQuery(auth).jobs().insert( projectId=project.id, body=body, media_body=media).execute(run=False) execution = job.execute() response = None while response is None: status, response = job.next_chunk() if project.verbose and status: print("Uploaded %d%%." % int(status.progress() * 100)) if project.verbose: print("Uploaded 100%") if wait: job_wait(auth, job.execute()) else: return job # if it does not exist and write, clear the table elif disposition == 'WRITE_TRUNCATE': if project.verbose: print("BIGQUERY: No data, clearing table.") body = { "tableReference": { "projectId": project_id, "datasetId": dataset_id, "tableId": table_id }, "schema": { "fields": schema } } # change project_id to be project.id, better yet project.cloud_id from JSON API_BigQuery(auth).tables().insert(projectId=project.id, datasetId=dataset_id, body=body).execute()
def io_to_table(auth, project_id, dataset_id, table_id, data_bytes, source_format='CSV', schema=None, skip_rows=0, disposition='WRITE_TRUNCATE', wait=True): # if data exists, write data to table data_bytes.seek(0, 2) if data_bytes.tell() > 0: data_bytes.seek(0) media = MediaIoBaseUpload(data_bytes, mimetype='application/octet-stream', resumable=True, chunksize=BIGQUERY_CHUNKSIZE) body = { 'configuration': { 'load': { 'destinationTable': { 'projectId': project_id, 'datasetId': dataset_id, 'tableId': table_id, }, 'sourceFormat': source_format, # CSV, NEWLINE_DELIMITED_JSON 'writeDisposition': disposition, # WRITE_TRUNCATE, WRITE_APPEND, WRITE_EMPTY 'autodetect': True, 'allowJaggedRows': True, 'allowQuotedNewlines': True, 'ignoreUnknownValues': True, } } } if schema: body['configuration']['load']['schema'] = {'fields': schema} body['configuration']['load']['autodetect'] = False if disposition == 'WRITE_APPEND': body['configuration']['load']['autodetect'] = False if source_format == 'CSV': body['configuration']['load']['skipLeadingRows'] = skip_rows job = API_BigQuery(auth).jobs().insert( projectId=project.id, body=body, media_body=media).execute(run=False) execution = job.execute() response = None while response is None: status, response = job.next_chunk() if project.verbose and status: print('Uploaded %d%%.' % int(status.progress() * 100)) if project.verbose: print('Uploaded 100%') if wait: job_wait(auth, execution) else: return execution # if it does not exist and write, clear the table elif disposition == 'WRITE_TRUNCATE': if project.verbose: print('BIGQUERY: No data, clearing table.') table_create(auth, project_id, dataset_id, table_id, schema)