def main(): service = auth.build_bq_client() # Load configuration with the destination specified. load_config = { 'destinationTable': { 'projectId': auth.PROJECT_ID, 'datasetId': 'ch06', # You can update this for each example. 'tableId': 'example_resumable' } } # Setup the job here. # load[property] = value load_config['schema'] = { 'fields': [ {'name':'string_f', 'type':'STRING'}, {'name':'boolean_f', 'type':'BOOLEAN'}, {'name':'integer_f', 'type':'INTEGER'}, {'name':'float_f', 'type':'FLOAT'}, {'name':'timestamp_f', 'type':'TIMESTAMP'} ] } upload = MediaFileUpload('sample.csv', mimetype='application/octet-stream', # This enables resumable uploads. resumable=True) # End of job configuration. run_load.start_and_wait(service.jobs(), auth.PROJECT_ID, load_config, media_body=upload)
def main(): service = auth.build_bq_client() # Load configuration with the destination specified. load_config = { 'destinationTable': { 'projectId': auth.PROJECT_ID, 'datasetId': 'ch06', # You can update this for each example. 'tableId': 'example_bad_data' } } # Setup the job here. # load[property] = value load_config['schema'] = { 'fields': [ {'name':'string_f', 'type':'STRING'}, {'name':'boolean_f', 'type':'BOOLEAN'}, {'name':'integer_f', 'type':'INTEGER', 'mode':'REQUIRED'}, {'name':'float_f', 'type':'FLOAT'}, {'name':'timestamp_f', 'type':'TIMESTAMP'} ] } load_config['sourceUris'] = [ 'gs://bigquery-e2e/chapters/06/sample_bad.csv', ] # End of job configuration. run_load.start_and_wait(service.jobs(), auth.PROJECT_ID, load_config)
def main(command): bq = auth.build_bq_client() if command == 'refresh': print json.dumps(update_top_apps(bq.jobs()), indent=2) elif command == 'read': print format_rows(read_cache(bq.tabledata(), TOP_APPS_ID)) else: print 'Unknown command: %s' % command
def main(): service = auth.build_bq_client() with open(sys.argv[1], 'a+') as infile: tail_and_insert(infile, service.tabledata(), auth.PROJECT_ID, 'ch06', 'streamed')
def main(command): bq = auth.build_bq_client() if command == 'refresh': print json.dumps(update_top_apps(bq.jobs()), indent=2) elif command == 'read': print format_rows( read_cache(bq.tabledata(), TOP_APPS_ID)) else: print 'Unknown command: %s' % command
def __init__(self, project_id, dataset_id, table_id, start_index=None, read_count=None, next_page_token=None): self.project_id = project_id self.dataset_id = dataset_id self.bq_service = auth.build_bq_client() self.next_page_token = next_page_token self.next_index = start_index self.rows_left = read_count self.table_id = table_id
def main(prefix): jobs = auth.build_bq_client().jobs() tables = get_tables(jobs, prefix) if not tables: print 'No tables matched prefix %s' % sys.argv[1] return data = get_data(jobs, tables) print "% 5s | % 5s" % ('kind', 'count') for r in data: print "% 5s | % 5d" % (cell(r, 0), int(cell(r, 1)))
def __init__(self, project_id, job_id=None, client=None): # Only one thread can call the bq_service at once. self.lock = threading.Lock() self.bq_service = client if self.bq_service is None: # Sample code authorization support. import auth self.bq_service = auth.build_bq_client() self.project_id = project_id self.job_id = job_id if job_id else 'job_%d' % int(time.time()) self.start = None
def main(argv): if len(argv) == 0: print 'Usage: query.py <project_id> [query]' return service = auth.build_bq_client() project_id = argv[0] query = QueryRpc(service, project_id) if len(argv) < 2: query_text = 'SELECT 17' else: # The entire rest of the command line is the query. query_text = ' '.join(argv[1:]) query.run(query_text, timeout_ms=1)
def main(argv): if len(argv) == 0: print('Usage: query_job.py <project_id> [query]') return service = auth.build_bq_client() project_id = argv[0] query_job = QueryJob(service, project_id) if len(argv) < 2: query = 'SELECT 17' else: # The entire rest of the command line is the query. query = ' '.join(argv[1:]) destination = { 'projectId': project_id, 'datasetId': 'scratch', 'tableId': 'results'} query_job.run(query, destination_table=destination)
def main(argv): if len(argv) == 0: print('Usage: query_job.py <project_id> [query]') return service = auth.build_bq_client() project_id = argv[0] query_job = QueryJob(service, project_id) if len(argv) < 2: query = 'SELECT 17' else: # The entire rest of the command line is the query. query = ' '.join(argv[1:]) destination = { 'projectId': project_id, 'datasetId': 'scratch', 'tableId': 'results' } query_job.run(query, destination_table=destination)
def main(): service = auth.build_bq_client() # Load configuration with the destination specified. load_config = { 'destinationTable': { 'projectId': auth.PROJECT_ID, 'datasetId': 'ch06', # You can update this for each example. 'tableId': 'example_resumable' } } # Setup the job here. # load[property] = value load_config['schema'] = { 'fields': [{ 'name': 'string_f', 'type': 'STRING' }, { 'name': 'boolean_f', 'type': 'BOOLEAN' }, { 'name': 'integer_f', 'type': 'INTEGER' }, { 'name': 'float_f', 'type': 'FLOAT' }, { 'name': 'timestamp_f', 'type': 'TIMESTAMP' }] } upload = MediaFileUpload( 'sample.csv', mimetype='application/octet-stream', # This enables resumable uploads. resumable=True) # End of job configuration. run_load.start_and_wait(service.jobs(), auth.PROJECT_ID, load_config, media_body=upload)
def main(): service = auth.build_bq_client() # Load configuration with the destination specified. load_config = { 'destinationTable': { 'projectId': 'publicdata', 'datasetId': 'samples', 'tableId': 'mypersonaltable' } } # Setup the job here. # load[property] = value load_config['sourceUris'] = [ 'gs://bigquery-e2e/chapters/06/sample.csv', ] # End of job configuration. run_load.start_and_wait(service.jobs(), auth.PROJECT_ID, load_config)
def main(): service = auth.build_bq_client() project_id = 'bigquery-e2e' query = 'select * from temp.nested' run_query(service, project_id, query, print_results, timeout=1)
#!/usr/bin/python2.7 ## All rights to this package are hereby disclaimed and its contents ## released into the public domain by the authors. '''Runs Python commands used in Chapter 7''' import auth import pprint import time project_id = 'hirenpatelatl-learn-bigquery' service = auth.build_bq_client() job_id = 'job_%d' % int(time.time() * 1000) # Example 1 using jobs().query #response = service.jobs().query( # projectId=project_id, # body={'query': 'SELECT 17'}).execute() #pprint.pprint(response) # # Example 2 using jobs().insert and then getting results from table_ref # #response = service.jobs().insert( # projectId = project_id, # body={'configuration': {'query': {'query': 'SELECT 17'}}, # 'jobReference': {'jobId': job_id, 'projectId': project_id}} # ).execute() ##pprint.pprint(response) #response = service.jobs().get(projectId=project_id, jobId=job_id).execute() #response = service.jobs().get(**response['jobReference']).execute()
import datetime try: import getpass except: pass try: from edx2bigquery_config import PROJECT_ID as DEFAULT_PROJECT_ID except: from local_config import PROJECT_ID as DEFAULT_PROJECT_ID import auth from collections import OrderedDict service = auth.build_bq_client(timeout=480) #service = auth.build_bq_client() projects = service.projects() datasets = service.datasets() tables = service.tables() tabledata = service.tabledata() jobs = service.jobs() PROJECT_NAMES = {} # used to cache project names, key=project_id def default_logger(msg): print msg
def main(): service = auth.build_bq_client() project_id = 'hirenpatelatl-learn-bigquery' query = 'select * from [bigquery-e2e:reference.word_frequency] limit 100' run_query(service, project_id, query, print_results, timeout=1)
current = daily.copy() current['tableId'] = 'devices_current' copy_table(bq.jobs(), daily, current) quarters = {'0331': 1, '0630': 2, '0930': 3, '1231': 4} quarter = quarters.get(date.strftime('%m%d'), None) if quarter: quarterly = daily.copy() quarterly['tableId'] = ('devices_%dq%d' % (date.year, quarter)) copy_table(bq.jobs(), daily, quarterly) # Finally set the daily version to expire. bq.tables().patch(body={ 'expirationTime': long(time.time() * 1000 + EXPIRATION_MS) }, **daily).execute() def main(date): try: date = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:8])) except Exception, e: print 'Invalid date: %s' % date bq = auth.build_bq_client() load_and_backup(bq, date) if __name__ == '__main__': main(sys.argv[1])
import datetime try: import getpass except: pass try: from edx2bigquery_config import PROJECT_ID as DEFAULT_PROJECT_ID except: from local_config import PROJECT_ID as DEFAULT_PROJECT_ID import auth from collections import OrderedDict service = auth.build_bq_client(timeout=480) #service = auth.build_bq_client() projects = service.projects() datasets = service.datasets() tables = service.tables() tabledata = service.tabledata() jobs = service.jobs() PROJECT_NAMES = {} # used to cache project names, key=project_id def default_logger(msg): print msg def get_project_name(project_id=DEFAULT_PROJECT_ID): if project_id in PROJECT_NAMES: # lookup in cache, first
def main(argv): service = auth.build_bq_client() project_id = 'hirenpatelatl-learn-bigquery' if len(argv) == 0 else argv[0] run_tour(service, project_id)
def main(argv): service = auth.build_bq_client() project_id = 'bigquery-e2e' if len(argv) == 0 else argv[0] run_tour(service, project_id)
def main(argv): service = auth.build_bq_client() project_id = "studied-sled-134801" if len(argv) == 0 else argv[0] run_tour(service, project_id)
daily = make_table_ref("devices_" + date.strftime("%Y%m%d")) load_device_data(bq.jobs(), daily) # Make the snapshot representing the latest. current = daily.copy() current["tableId"] = "devices_current" copy_table(bq.jobs(), daily, current) quarters = {"0331": 1, "0630": 2, "0930": 3, "1231": 4} quarter = quarters.get(date.strftime("%m%d"), None) if quarter: quarterly = daily.copy() quarterly["tableId"] = "devices_%dq%d" % (date.year, quarter) copy_table(bq.jobs(), daily, quarterly) # Finally set the daily version to expire. bq.tables().patch(body={"expirationTime": long(time.time() * 1000 + EXPIRATION_MS)}, **daily).execute() def main(date): try: date = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:8])) except Exception, e: print "Invalid date: %s" % date bq = auth.build_bq_client() load_and_backup(bq, date) if __name__ == "__main__": main(sys.argv[1])