def _execute_cell(args, cell_body): """Implements the BigQuery cell magic used to execute BQ queries. The supported syntax is: %%bq execute <args> [<inline SQL>] Args: args: the optional arguments following '%%bq execute'. cell_body: optional contents of the cell Returns: QueryResultsTable containing query result """ query = google.datalab.utils.commands.get_notebook_item(args['query']) if args['verbose']: print(query.sql) query_params = _get_query_parameters(args, cell_body) if args['to_dataframe']: # re-parse the int arguments because they're passed as strings start_row = int(args['dataframe_start_row']) if args['dataframe_start_row'] else None max_rows = int(args['dataframe_max_rows']) if args['dataframe_max_rows'] else None output_options = QueryOutput.dataframe(start_row=start_row, max_rows=max_rows, use_cache=not args['nocache']) else: output_options = QueryOutput.table(name=args['table'], mode=args['mode'], use_cache=not args['nocache'], allow_large_results=args['large']) context = _construct_context_for_args(args) r = query.execute(output_options, context=context, query_params=query_params) return r.result()
def _extract_cell(args, cell_body): """Implements the BigQuery extract magic used to extract query or table data to GCS. The supported syntax is: %bq extract <args> Args: args: the arguments following '%bigquery extract'. """ if args['table']: source = _get_table(args['table']) if not source: raise Exception('Could not find table %s' % args['table']) job = source.extract(args['path'], format='CSV' if args['format'] == 'csv' else 'NEWLINE_DELIMITED_JSON', csv_delimiter=args['delimiter'], csv_header=args['header'], compress=args['compress']) elif args['query'] or args['view']: source_name = args['view'] or args['query'] source = google.datalab.utils.commands.get_notebook_item(source_name) if not source: raise Exception('Could not find ' + ('view ' + args['view'] if args['view'] else 'query ' + args['query'])) query = source if args[ 'query'] else google.datalab.bigquery.Query.from_view(source) query_params = _get_query_parameters( args, cell_body) if args['query'] else None output_options = QueryOutput.file(path=args['path'], format=args['format'], csv_delimiter=args['delimiter'], csv_header=args['header'], compress=args['compress'], use_cache=not args['nocache']) context = _construct_context_for_args(args) job = query.execute(output_options, context=context, query_params=query_params) else: raise Exception('A query, table, or view is needed to extract') if job.failed: raise Exception('Extract failed: %s' % str(job.fatal_error)) elif job.errors: raise Exception('Extract completed with errors: %s' % str(job.errors)) return job.result()
def _execute_cell(args, cell_body): """Implements the BigQuery cell magic used to execute BQ queries. The supported syntax is: %%bq execute <args> [<inline SQL>] Args: args: the optional arguments following '%%bq execute'. cell_body: optional contents of the cell Returns: QueryResultsTable containing query result """ env = google.datalab.utils.commands.notebook_environment() config = google.datalab.utils.commands.parse_config(cell_body, env, False) or {} parameters = config.get('parameters') or [] if parameters: jsonschema.validate({'parameters': parameters}, BigQuerySchema.QUERY_PARAMS_SCHEMA) table_name = google.datalab.bigquery.Query.resolve_parameters(args['table'], parameters) query = google.datalab.utils.commands.get_notebook_item(args['query']) if args['verbose']: print(query.sql) query_params = get_query_parameters(args, cell_body) if args['to_dataframe']: # re-parse the int arguments because they're passed as strings start_row = int(args['dataframe_start_row']) if args['dataframe_start_row'] else None max_rows = int(args['dataframe_max_rows']) if args['dataframe_max_rows'] else None output_options = QueryOutput.dataframe(start_row=start_row, max_rows=max_rows, use_cache=not args['nocache']) else: output_options = QueryOutput.table( name=table_name, mode=args['mode'], use_cache=not args['nocache'], allow_large_results=args['large']) context = google.datalab.utils._utils._construct_context_for_args(args) r = query.execute(output_options, context=context, query_params=query_params) return r.result()
def _execute_cell(args, cell_body): """Implements the BigQuery cell magic used to execute BQ queries. The supported syntax is: %%bq execute <args> [<inline SQL>] Args: args: the optional arguments following '%%bq execute'. cell_body: optional contents of the cell Returns: QueryResultsTable containing query result """ query = google.datalab.utils.commands.get_notebook_item(args['query']) if args['verbose']: print(query.sql) query_params = _get_query_parameters(args, cell_body) if args['to_dataframe']: # re-parse the int arguments because they're passed as strings start_row = int(args['dataframe_start_row'] ) if args['dataframe_start_row'] else None max_rows = int( args['dataframe_max_rows']) if args['dataframe_max_rows'] else None output_options = QueryOutput.dataframe(start_row=start_row, max_rows=max_rows, use_cache=not args['nocache']) else: output_options = QueryOutput.table(name=args['table'], mode=args['mode'], use_cache=not args['nocache'], allow_large_results=args['large']) context = _construct_context_for_args(args) r = query.execute(output_options, context=context, query_params=query_params) return r.result()
def _extract_cell(args, cell_body): """Implements the BigQuery extract magic used to extract query or table data to GCS. The supported syntax is: %bq extract <args> Args: args: the arguments following '%bigquery extract'. """ env = google.datalab.utils.commands.notebook_environment() config = google.datalab.utils.commands.parse_config(cell_body, env, False) or {} parameters = config.get('parameters') if args['table']: table = google.datalab.bigquery.Query.resolve_parameters(args['table'], parameters) source = _get_table(table) if not source: raise Exception('Could not find table %s' % table) csv_delimiter = args['delimiter'] if args['format'] == 'csv' else None path = google.datalab.bigquery.Query.resolve_parameters(args['path'], parameters) job = source.extract(path, format=args['format'], csv_delimiter=csv_delimiter, csv_header=args['header'], compress=args['compress']) elif args['query'] or args['view']: source_name = args['view'] or args['query'] source = google.datalab.utils.commands.get_notebook_item(source_name) if not source: raise Exception('Could not find ' + ('view ' + args['view'] if args['view'] else 'query ' + args['query'])) query = source if args['query'] else bigquery.Query.from_view(source) query_params = get_query_parameters(args, cell_body) if args['query'] else None output_options = QueryOutput.file(path=args['path'], format=args['format'], csv_delimiter=args['delimiter'], csv_header=args['header'], compress=args['compress'], use_cache=not args['nocache']) context = google.datalab.utils._utils._construct_context_for_args(args) job = query.execute(output_options, context=context, query_params=query_params) else: raise Exception('A query, table, or view is needed to extract') if job.failed: raise Exception('Extract failed: %s' % str(job.fatal_error)) elif job.errors: raise Exception('Extract completed with errors: %s' % str(job.errors)) return job.result()
def _extract_cell(args, cell_body): """Implements the BigQuery extract magic used to extract query or table data to GCS. The supported syntax is: %bq extract <args> Args: args: the arguments following '%bigquery extract'. """ if args['table']: source = _get_table(args['table']) if not source: raise Exception('Could not find table %s' % args['table']) job = source.extract(args['path'], format='CSV' if args['format'] == 'csv' else 'NEWLINE_DELIMITED_JSON', csv_delimiter=args['delimiter'], csv_header=args['header'], compress=args['compress']) elif args['query'] or args['view']: source = google.datalab.utils.commands.get_notebook_item(args['view']) if not source: raise Exception('Could not find ' + ('view ' + args['view'] if args['view'] else 'query ' + args['query'])) query = source if args['query'] else google.datalab.bigquery.Query.from_view(source) query_params = _get_query_parameters(args, cell_body) if args['query'] else None output_options = QueryOutput.file(path=args['path'], format=args['format'], csv_delimiter=args['delimiter'], csv_header=args['header'], compress=args['compress'], use_cache=not args['nocache']) context = _construct_context_for_args(args) job = query.execute(output_options, context=context, query_params=query_params) else: raise Exception('A query, table, or view is needed to extract') if job.failed: raise Exception('Extract failed: %s' % str(job.fatal_error)) elif job.errors: raise Exception('Extract completed with errors: %s' % str(job.errors)) return job.result()
def _sample_cell(args, cell_body): """Implements the BigQuery sample magic for sampling queries The supported sytanx is: %%bq sample <args> [<inline SQL>] Args: args: the optional arguments following '%%bq sample'. cell_body: optional contents of the cell Returns: The results of executing the sampling query, or a profile of the sample data. """ query = None table = None view = None query_params = None if args['query']: query = google.datalab.utils.commands.get_notebook_item(args['query']) if query is None: raise Exception('Cannot find query %s.' % args['query']) query_params = _get_query_parameters(args, cell_body) elif args['table']: table = _get_table(args['table']) if not table: raise Exception('Could not find table %s' % args['table']) elif args['view']: view = google.datalab.utils.commands.get_notebook_item(args['view']) if not isinstance(view, google.datalab.bigquery.View): raise Exception('Could not find view %s' % args['view']) else: raise Exception('A query, table, or view is neede to sample') # parse comma-separated list of fields fields = args['fields'].split(',') if args['fields'] else None count = int(args['count']) if args['count'] else None percent = int(args['percent']) if args['percent'] else None sampling = Sampling._auto(method=args['method'], fields=fields, count=count, percent=percent, key_field=args['key_field'], ascending=(args['order'] == 'ascending')) context = _construct_context_for_args(args) if view: query = google.datalab.bigquery.Query.from_view(view) elif table: query = google.datalab.bigquery.Query.from_table(table) if args['profile']: results = query.execute(QueryOutput.dataframe(), sampling=sampling, context=context, query_params=query_params).result() else: results = query.execute(QueryOutput.table(), sampling=sampling, context=context, query_params=query_params).result() if args['verbose']: print(query.sql) if args['profile']: return google.datalab.utils.commands.profile_df(results) else: return results