コード例 #1
0
def main():
  service = auth.build_bq_client()

  # Load configuration with the destination specified.
  load_config = {
    'destinationTable': {
      'projectId': auth.PROJECT_ID,
      'datasetId': 'ch06',
      # You can update this for each example.
      'tableId': 'example_resumable'
    }
  }
  # Setup the job here.
  # load[property] = value
  load_config['schema'] = {
    'fields': [
      {'name':'string_f', 'type':'STRING'},
      {'name':'boolean_f', 'type':'BOOLEAN'},
      {'name':'integer_f', 'type':'INTEGER'},
      {'name':'float_f', 'type':'FLOAT'},
      {'name':'timestamp_f', 'type':'TIMESTAMP'}
    ]
  }

  upload = MediaFileUpload('sample.csv',
                           mimetype='application/octet-stream',
                           # This enables resumable uploads.
                           resumable=True)
  # End of job configuration.

  run_load.start_and_wait(service.jobs(),
                          auth.PROJECT_ID,
                          load_config,
                          media_body=upload)
コード例 #2
0
def main():
  service = auth.build_bq_client()

  # Load configuration with the destination specified.
  load_config = {
    'destinationTable': {
      'projectId': auth.PROJECT_ID,
      'datasetId': 'ch06',
      # You can update this for each example.
      'tableId': 'example_bad_data'
    }
  }
  # Setup the job here.
  # load[property] = value
  load_config['schema'] = {
    'fields': [
      {'name':'string_f', 'type':'STRING'},
      {'name':'boolean_f', 'type':'BOOLEAN'},
      {'name':'integer_f', 'type':'INTEGER',
       'mode':'REQUIRED'},
      {'name':'float_f', 'type':'FLOAT'},
      {'name':'timestamp_f', 'type':'TIMESTAMP'}
    ]
  }
  load_config['sourceUris'] = [
    'gs://bigquery-e2e/chapters/06/sample_bad.csv',
  ]
  # End of job configuration.

  run_load.start_and_wait(service.jobs(),
                          auth.PROJECT_ID,
                          load_config)
コード例 #3
0
def main(command):
    bq = auth.build_bq_client()
    if command == 'refresh':
        print json.dumps(update_top_apps(bq.jobs()), indent=2)
    elif command == 'read':
        print format_rows(read_cache(bq.tabledata(), TOP_APPS_ID))
    else:
        print 'Unknown command: %s' % command
コード例 #4
0
ファイル: stream.py プロジェクト: gj5615/bigquery-e2e
def main():
  service = auth.build_bq_client()

  with open(sys.argv[1], 'a+') as infile:
    tail_and_insert(infile,
                    service.tabledata(),
                    auth.PROJECT_ID,
                    'ch06',
                    'streamed')
コード例 #5
0
ファイル: cache.py プロジェクト: Goldmer/bigquery-e2e
def main(command):
  bq = auth.build_bq_client()
  if command == 'refresh':
    print json.dumps(update_top_apps(bq.jobs()), indent=2)
  elif command == 'read':
    print format_rows(
      read_cache(bq.tabledata(), TOP_APPS_ID))
  else:
    print 'Unknown command: %s' % command
コード例 #6
0
 def __init__(self, project_id, dataset_id, table_id,
     start_index=None, read_count=None, next_page_token=None):
   self.project_id = project_id
   self.dataset_id = dataset_id
   self.bq_service = auth.build_bq_client()
   self.next_page_token = next_page_token
   self.next_index = start_index
   self.rows_left = read_count
   self.table_id = table_id
コード例 #7
0
ファイル: two_phase.py プロジェクト: Goldmer/bigquery-e2e
def main(prefix):
  jobs = auth.build_bq_client().jobs()
  tables = get_tables(jobs, prefix)
  if not tables:
    print 'No tables matched prefix %s' % sys.argv[1]
    return
  data = get_data(jobs, tables)

  print "% 5s | % 5s" % ('kind', 'count')
  for r in data:
    print "% 5s | % 5d" % (cell(r, 0), int(cell(r, 1)))
コード例 #8
0
def main(prefix):
    jobs = auth.build_bq_client().jobs()
    tables = get_tables(jobs, prefix)
    if not tables:
        print 'No tables matched prefix %s' % sys.argv[1]
        return
    data = get_data(jobs, tables)

    print "% 5s | % 5s" % ('kind', 'count')
    for r in data:
        print "% 5s | % 5d" % (cell(r, 0), int(cell(r, 1)))
コード例 #9
0
ファイル: job_runner.py プロジェクト: xweeta86/bigquery-e2e
 def __init__(self, project_id, job_id=None, client=None):
   # Only one thread can call the bq_service at once.
   self.lock = threading.Lock()
   self.bq_service = client
   if self.bq_service is None:
     # Sample code authorization support.
     import auth
     self.bq_service = auth.build_bq_client()
   self.project_id = project_id
   self.job_id = job_id if job_id else 'job_%d' % int(time.time())
   self.start = None
コード例 #10
0
ファイル: table_reader.py プロジェクト: xweeta86/bigquery-e2e
 def __init__(self,
              project_id,
              dataset_id,
              table_id,
              start_index=None,
              read_count=None,
              next_page_token=None):
     self.project_id = project_id
     self.dataset_id = dataset_id
     self.bq_service = auth.build_bq_client()
     self.next_page_token = next_page_token
     self.next_index = start_index
     self.rows_left = read_count
     self.table_id = table_id
コード例 #11
0
ファイル: query.py プロジェクト: Goldmer/bigquery-e2e
def main(argv):
  if len(argv) == 0:
    print 'Usage: query.py <project_id> [query]'
    return
  service = auth.build_bq_client() 
  project_id = argv[0]
  query = QueryRpc(service, project_id)
  if len(argv) < 2:
    query_text = 'SELECT 17'
  else:
    # The entire rest of the command line is the query.
    query_text = ' '.join(argv[1:])

  query.run(query_text, timeout_ms=1)
コード例 #12
0
ファイル: query.py プロジェクト: xweeta86/bigquery-e2e
def main(argv):
    if len(argv) == 0:
        print 'Usage: query.py <project_id> [query]'
        return
    service = auth.build_bq_client()
    project_id = argv[0]
    query = QueryRpc(service, project_id)
    if len(argv) < 2:
        query_text = 'SELECT 17'
    else:
        # The entire rest of the command line is the query.
        query_text = ' '.join(argv[1:])

    query.run(query_text, timeout_ms=1)
コード例 #13
0
ファイル: query_job.py プロジェクト: gj5615/bigquery-e2e
def main(argv):
  if len(argv) == 0:
    print('Usage: query_job.py <project_id> [query]')
    return
  service = auth.build_bq_client()
  project_id = argv[0]
  query_job = QueryJob(service, project_id)
  if len(argv) < 2:
    query = 'SELECT 17'
  else:
    # The entire rest of the command line is the query.
    query = ' '.join(argv[1:])
  destination = {
      'projectId': project_id,
      'datasetId': 'scratch',
      'tableId': 'results'}
  query_job.run(query, destination_table=destination)
コード例 #14
0
ファイル: query_job.py プロジェクト: gj5615/bigquery-e2e
def main(argv):
    if len(argv) == 0:
        print('Usage: query_job.py <project_id> [query]')
        return
    service = auth.build_bq_client()
    project_id = argv[0]
    query_job = QueryJob(service, project_id)
    if len(argv) < 2:
        query = 'SELECT 17'
    else:
        # The entire rest of the command line is the query.
        query = ' '.join(argv[1:])
    destination = {
        'projectId': project_id,
        'datasetId': 'scratch',
        'tableId': 'results'
    }
    query_job.run(query, destination_table=destination)
コード例 #15
0
def main():
    service = auth.build_bq_client()

    # Load configuration with the destination specified.
    load_config = {
        'destinationTable': {
            'projectId': auth.PROJECT_ID,
            'datasetId': 'ch06',
            # You can update this for each example.
            'tableId': 'example_resumable'
        }
    }
    # Setup the job here.
    # load[property] = value
    load_config['schema'] = {
        'fields': [{
            'name': 'string_f',
            'type': 'STRING'
        }, {
            'name': 'boolean_f',
            'type': 'BOOLEAN'
        }, {
            'name': 'integer_f',
            'type': 'INTEGER'
        }, {
            'name': 'float_f',
            'type': 'FLOAT'
        }, {
            'name': 'timestamp_f',
            'type': 'TIMESTAMP'
        }]
    }

    upload = MediaFileUpload(
        'sample.csv',
        mimetype='application/octet-stream',
        # This enables resumable uploads.
        resumable=True)
    # End of job configuration.

    run_load.start_and_wait(service.jobs(),
                            auth.PROJECT_ID,
                            load_config,
                            media_body=upload)
コード例 #16
0
def main():
    service = auth.build_bq_client()

    # Load configuration with the destination specified.
    load_config = {
        'destinationTable': {
            'projectId': 'publicdata',
            'datasetId': 'samples',
            'tableId': 'mypersonaltable'
        }
    }
    # Setup the job here.
    # load[property] = value
    load_config['sourceUris'] = [
        'gs://bigquery-e2e/chapters/06/sample.csv',
    ]
    # End of job configuration.

    run_load.start_and_wait(service.jobs(), auth.PROJECT_ID, load_config)
コード例 #17
0
def main():
  service = auth.build_bq_client()

  # Load configuration with the destination specified.
  load_config = {
    'destinationTable': {
      'projectId': 'publicdata',
      'datasetId': 'samples',
      'tableId': 'mypersonaltable'
    }
  }
  # Setup the job here.
  # load[property] = value
  load_config['sourceUris'] = [
    'gs://bigquery-e2e/chapters/06/sample.csv',
  ]
  # End of job configuration.

  run_load.start_and_wait(service.jobs(),
                          auth.PROJECT_ID,
                          load_config)
コード例 #18
0
def main():
    service = auth.build_bq_client()
    project_id = 'bigquery-e2e'
    query = 'select * from temp.nested'
    run_query(service, project_id, query, print_results, timeout=1)
コード例 #19
0
#!/usr/bin/python2.7
## All rights to this package are hereby disclaimed and its contents
## released into the public domain by the authors.

'''Runs Python commands used in Chapter 7'''

import auth
import pprint
import time
project_id = 'hirenpatelatl-learn-bigquery'
service = auth.build_bq_client()
job_id = 'job_%d' % int(time.time() * 1000)
# Example 1 using jobs().query
		#response = service.jobs().query(
		#    projectId=project_id,
		#    body={'query': 'SELECT 17'}).execute()
		#pprint.pprint(response)
		#
# Example 2 using jobs().insert and then getting results from table_ref
	# 
	#response = service.jobs().insert(
	#    projectId = project_id,
	#    body={'configuration': {'query': {'query': 'SELECT 17'}},
	#          'jobReference': {'jobId': job_id, 'projectId': project_id}}
	#    ).execute()
	##pprint.pprint(response)

	#response = service.jobs().get(projectId=project_id, jobId=job_id).execute()

	#response = service.jobs().get(**response['jobReference']).execute()
コード例 #20
0
def main():
    service = auth.build_bq_client()

    with open(sys.argv[1], 'a+') as infile:
        tail_and_insert(infile, service.tabledata(), auth.PROJECT_ID, 'ch06',
                        'streamed')
コード例 #21
0
import datetime

try:
    import getpass
except:
    pass

try:
    from edx2bigquery_config import PROJECT_ID as DEFAULT_PROJECT_ID
except:
    from local_config import PROJECT_ID as DEFAULT_PROJECT_ID

import auth
from collections import OrderedDict

service = auth.build_bq_client(timeout=480)
#service = auth.build_bq_client()

projects = service.projects()
datasets = service.datasets()
tables = service.tables()
tabledata = service.tabledata()
jobs = service.jobs()

PROJECT_NAMES = {}  # used to cache project names, key=project_id


def default_logger(msg):
    print msg

コード例 #22
0
ファイル: query.py プロジェクト: gj5615/bigquery-e2e
def main():
    service = auth.build_bq_client()
    project_id = 'hirenpatelatl-learn-bigquery'
    query = 'select * from [bigquery-e2e:reference.word_frequency] limit 100'
    run_query(service, project_id, query, print_results, timeout=1)
コード例 #23
0
ファイル: query.py プロジェクト: Goldmer/bigquery-e2e
def main():
  service = auth.build_bq_client() 
  project_id = 'bigquery-e2e'
  query = 'select * from temp.nested'
  run_query(service, project_id, query, print_results, timeout=1)
コード例 #24
0
    current = daily.copy()
    current['tableId'] = 'devices_current'
    copy_table(bq.jobs(), daily, current)

    quarters = {'0331': 1, '0630': 2, '0930': 3, '1231': 4}
    quarter = quarters.get(date.strftime('%m%d'), None)
    if quarter:
        quarterly = daily.copy()
        quarterly['tableId'] = ('devices_%dq%d' % (date.year, quarter))
        copy_table(bq.jobs(), daily, quarterly)

    # Finally set the daily version to expire.
    bq.tables().patch(body={
        'expirationTime':
        long(time.time() * 1000 + EXPIRATION_MS)
    },
                      **daily).execute()


def main(date):
    try:
        date = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:8]))
    except Exception, e:
        print 'Invalid date: %s' % date
    bq = auth.build_bq_client()
    load_and_backup(bq, date)


if __name__ == '__main__':
    main(sys.argv[1])
コード例 #25
0
ファイル: bqutil.py プロジェクト: AbdouSeck/edx2bigquery
import datetime

try:
    import getpass
except:
    pass

try:
    from edx2bigquery_config import PROJECT_ID as DEFAULT_PROJECT_ID
except:
    from local_config import PROJECT_ID as DEFAULT_PROJECT_ID

import auth
from collections import OrderedDict

service = auth.build_bq_client(timeout=480)
#service = auth.build_bq_client() 

projects = service.projects()
datasets = service.datasets()
tables = service.tables()
tabledata = service.tabledata()
jobs = service.jobs()

PROJECT_NAMES = {}				# used to cache project names, key=project_id

def default_logger(msg):
    print msg

def get_project_name(project_id=DEFAULT_PROJECT_ID):
    if project_id in PROJECT_NAMES:		# lookup in cache, first
コード例 #26
0
ファイル: query.py プロジェクト: hirenpatelatl/bigquery-e2e
def main():
  service = auth.build_bq_client() 
  project_id = 'hirenpatelatl-learn-bigquery'
  query = 'select * from [bigquery-e2e:reference.word_frequency] limit 100'
  run_query(service, project_id, query, print_results, timeout=1)
コード例 #27
0
ファイル: tour.py プロジェクト: gj5615/bigquery-e2e
def main(argv):
  service = auth.build_bq_client() 
  project_id = 'hirenpatelatl-learn-bigquery' if len(argv) == 0 else argv[0]
  run_tour(service, project_id)
コード例 #28
0
ファイル: tour.py プロジェクト: Goldmer/bigquery-e2e
def main(argv):
  service = auth.build_bq_client() 
  project_id = 'bigquery-e2e' if len(argv) == 0 else argv[0]
  run_tour(service, project_id)
コード例 #29
0
ファイル: tour.py プロジェクト: nishanthpudi/BigQuery
def main(argv):
    service = auth.build_bq_client()
    project_id = "studied-sled-134801" if len(argv) == 0 else argv[0]
    run_tour(service, project_id)
コード例 #30
0
ファイル: backup.py プロジェクト: t4ku/bigquery-e2e
    daily = make_table_ref("devices_" + date.strftime("%Y%m%d"))
    load_device_data(bq.jobs(), daily)

    # Make the snapshot representing the latest.
    current = daily.copy()
    current["tableId"] = "devices_current"
    copy_table(bq.jobs(), daily, current)

    quarters = {"0331": 1, "0630": 2, "0930": 3, "1231": 4}
    quarter = quarters.get(date.strftime("%m%d"), None)
    if quarter:
        quarterly = daily.copy()
        quarterly["tableId"] = "devices_%dq%d" % (date.year, quarter)
        copy_table(bq.jobs(), daily, quarterly)

    # Finally set the daily version to expire.
    bq.tables().patch(body={"expirationTime": long(time.time() * 1000 + EXPIRATION_MS)}, **daily).execute()


def main(date):
    try:
        date = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:8]))
    except Exception, e:
        print "Invalid date: %s" % date
    bq = auth.build_bq_client()
    load_and_backup(bq, date)


if __name__ == "__main__":
    main(sys.argv[1])