Ejemplo n.º 1
0
def main():
    name1 = 'githubarchive:github.timeline'
    table1 = bq.table(name1)
    print name1
    for field in table1.schema:
        print field.name + ' [' + field.data_type + ']'

    # pylint: disable=protected-access
    print 'sql representation: ' + table1._repr_sql_()

    print ''

    name2 = 'requestlogs.logs20140615'
    table2 = bq.table(name2)
    table2_md = table2.metadata
    print name2
    print 'full name: ' + str(table2_md)
    print 'friendly name: ' + table2_md.friendly_name
    print 'description: ' + table2_md.description
    print 'rows: ' + str(table2_md.rows)
    print 'size: ' + str(table2_md.size)
    print 'created: ' + str(table2_md.created_on)
    print 'modified: ' + str(table2_md.modified_on)
    for field in table2.schema:
        print field.name + ' [' + field.data_type + ']'

    print table2.sample().to_dataframe()
    print table2.sample(sampling=bq.Sampling.default(count=10)).to_dataframe()

    print ''

    table_list = bq.tables('requestlogs')
    print 'Tables:'
    for t in table_list:
        print t.name
Ejemplo n.º 2
0
def main():
  name1 = 'githubarchive:github.timeline'
  table1 = bq.table(name1)
  print name1
  for field in table1.schema:
    print field.name + ' [' + field.data_type + ']'

  # pylint: disable=protected-access
  print 'sql representation: ' + table1._repr_sql_()

  print ''

  name2 = 'requestlogs.logs20140615'
  table2 = bq.table(name2)
  table2_md = table2.metadata
  print name2
  print 'full name: ' + str(table2_md)
  print 'friendly name: ' + table2_md.friendly_name
  print 'description: ' + table2_md.description
  print 'rows: ' + str(table2_md.rows)
  print 'size: ' + str(table2_md.size)
  print 'created: ' + str(table2_md.created_on)
  print 'modified: ' + str(table2_md.modified_on)
  for field in table2.schema:
    print field.name + ' [' + field.data_type + ']'

  print table2.sample().to_dataframe()
  print table2.sample(sampling=bq.Sampling.default(count=10)).to_dataframe()

  print ''

  table_list = bq.tables('requestlogs')
  print 'Tables:'
  for t in table_list:
    print t.name
Ejemplo n.º 3
0
def main():
  # Simple Query and QueryResult usage
  sql = ('SELECT * '
         'FROM [githubarchive:github.timeline] '
         'LIMIT 1')

  query = bq.query(sql)
  print query

  results = query.results()

  print 'Rows: ' + str(len(results))
  for row in results:
    print json.dumps(row, sort_keys=True, indent=2)

  print 'DataFrame:'
  print results.to_dataframe()

  # SQL arguments
  sql_template = ('SELECT created_at '
                  'FROM [githubarchive:github.timeline] '
                  'WHERE repository_name = $name '
                  'LIMIT 1')
  repo = 'demo-logs-analysis'

  sql = bq.sql(sql_template, name=repo)
  print 'SQL (arg substitution):'
  print sql

  # SQL table arg substitution
  table = bq.table('githubarchive:github.timeline')
  sql = bq.sql('SELECT * FROM $table', table=table)
  print 'SQL (table substitution):'
  print sql

  # SQL nested queries
  query = bq.query('SELECT * FROM [githubarchive:github.timeline]')
  sql = bq.sql('SELECT repository_name, created_at FROM $q LIMIT 1', q=query)
  print 'SQL (nested queries):'
  print sql

  # SQL query sampling
  sql = 'SELECT repository_name FROM [githubarchive:github.timeline]'
  query = bq.query(sql)
  print query.sample().to_dataframe()
Ejemplo n.º 4
0
def main():
    # Simple Query and QueryResult usage
    sql = ('SELECT * ' 'FROM [githubarchive:github.timeline] ' 'LIMIT 1')

    query = bq.query(sql)
    print query

    results = query.results()

    print 'Rows: ' + str(len(results))
    for row in results:
        print json.dumps(row, sort_keys=True, indent=2)

    print 'DataFrame:'
    print results.to_dataframe()

    # SQL arguments
    sql_template = ('SELECT created_at '
                    'FROM [githubarchive:github.timeline] '
                    'WHERE repository_name = $name '
                    'LIMIT 1')
    repo = 'demo-logs-analysis'

    sql = bq.sql(sql_template, name=repo)
    print 'SQL (arg substitution):'
    print sql

    # SQL table arg substitution
    table = bq.table('githubarchive:github.timeline')
    sql = bq.sql('SELECT * FROM $table', table=table)
    print 'SQL (table substitution):'
    print sql

    # SQL nested queries
    query = bq.query('SELECT * FROM [githubarchive:github.timeline]')
    sql = bq.sql('SELECT repository_name, created_at FROM $q LIMIT 1', q=query)
    print 'SQL (nested queries):'
    print sql

    # SQL query sampling
    sql = 'SELECT repository_name FROM [githubarchive:github.timeline]'
    query = bq.query(sql)
    print query.sample().to_dataframe()