def main(): name1 = 'githubarchive:github.timeline' table1 = bq.table(name1) print name1 for field in table1.schema: print field.name + ' [' + field.data_type + ']' # pylint: disable=protected-access print 'sql representation: ' + table1._repr_sql_() print '' name2 = 'requestlogs.logs20140615' table2 = bq.table(name2) table2_md = table2.metadata print name2 print 'full name: ' + str(table2_md) print 'friendly name: ' + table2_md.friendly_name print 'description: ' + table2_md.description print 'rows: ' + str(table2_md.rows) print 'size: ' + str(table2_md.size) print 'created: ' + str(table2_md.created_on) print 'modified: ' + str(table2_md.modified_on) for field in table2.schema: print field.name + ' [' + field.data_type + ']' print table2.sample().to_dataframe() print table2.sample(sampling=bq.Sampling.default(count=10)).to_dataframe() print '' table_list = bq.tables('requestlogs') print 'Tables:' for t in table_list: print t.name
def main(): # Simple Query and QueryResult usage sql = ('SELECT * ' 'FROM [githubarchive:github.timeline] ' 'LIMIT 1') query = bq.query(sql) print query results = query.results() print 'Rows: ' + str(len(results)) for row in results: print json.dumps(row, sort_keys=True, indent=2) print 'DataFrame:' print results.to_dataframe() # SQL arguments sql_template = ('SELECT created_at ' 'FROM [githubarchive:github.timeline] ' 'WHERE repository_name = $name ' 'LIMIT 1') repo = 'demo-logs-analysis' sql = bq.sql(sql_template, name=repo) print 'SQL (arg substitution):' print sql # SQL table arg substitution table = bq.table('githubarchive:github.timeline') sql = bq.sql('SELECT * FROM $table', table=table) print 'SQL (table substitution):' print sql # SQL nested queries query = bq.query('SELECT * FROM [githubarchive:github.timeline]') sql = bq.sql('SELECT repository_name, created_at FROM $q LIMIT 1', q=query) print 'SQL (nested queries):' print sql # SQL query sampling sql = 'SELECT repository_name FROM [githubarchive:github.timeline]' query = bq.query(sql) print query.sample().to_dataframe()