def _chart_cell(args, cell): source = args['data'] ipy = IPython.get_ipython() chart_options = _utils.parse_config(cell, ipy.user_ns) if chart_options is None: chart_options = {} fields = args['fields'] if args['fields'] else '*' _HTML_TEMPLATE = u""" <div class="bqgc" id="%s"> </div> <script> require(['extensions/charting', 'element!%s', 'style!/static/extensions/charting.css'], function(charts, dom) { charts.render(dom, {chartStyle:'%s', dataName:'%s', fields:'%s'}, %s, %s); } ); </script> """ div_id = _html.Html.next_id() chart_type = args['chart'] count = 25 if chart_type == 'paged_table' else -1 data, _ = _utils.get_data(source, fields, 0, count) return IPython.core.display.HTML( _HTML_TEMPLATE % (div_id, div_id, chart_type, _utils.get_data_source_index(source), fields, json.dumps(chart_options, cls=gcp._util.JSONEncoder), json.dumps(data, cls=gcp._util.JSONEncoder)))
def _get_query_argument(args, cell, env): """ Get a query argument to a cell magic. The query is specified with args['query']. We look that up and if it is a BQ query just return it. If it is instead a SqlModule or SqlStatement it may have variable references. We resolve those using the arg parser for the SqlModule, then override the resulting defaults with either the Python code in cell, or the dictionary in overrides. The latter is for if the overrides are specified with YAML or JSON and eventually we should eliminate code in favor of this. Args: args: the dictionary of magic arguments. cell: the cell contents which can be variable value overrides (if args has a 'query' value) or inline SQL otherwise. env: a dictionary that is used for looking up variable values. Returns: A Query object. """ sql_arg = args.get('query', None) if sql_arg is None: # Assume we have inline SQL in the cell if not isinstance(cell, basestring): raise Exception('Expected a --query argument or inline SQL') return gcp.bigquery.Query(cell, values=env) item = _get_notebook_item(sql_arg) if isinstance(item, gcp.bigquery.Query): # Queries are already expanded. return item # Create an expanded BQ Query. env = _utils.parse_config(cell, env) item, env = gcp.data.SqlModule.get_sql_statement_with_environment(item, env) if cell: env.update(cell) return gcp.bigquery.Query(item, values=env)
def _create_cell(args, cell_body): """Implements the BigQuery cell magic used to create datasets and tables. The supported syntax is: %%bigquery create dataset -n|--name <name> [-f|--friendly <friendlyname>] [<description>] or: %%bigquery create table -n|--name <tablename> [--overwrite] [<YAML or JSON cell_body defining schema to use for tables>] Args: args: the argument following '%bigquery create <command>'. """ if args['command'] == 'dataset': try: gcp.bigquery.DataSet(args['name']).create(friendly_name=args['friendly'], description=cell_body) except Exception as e: print 'Failed to create dataset %s: %s' % (args['name'], e) else: if cell_body is None: print 'Failed to create %s: no schema specified' % args['name'] else: try: record = _utils.parse_config(cell_body, _utils.notebook_environment(), as_dict=False) schema = gcp.bigquery.Schema(record) gcp.bigquery.Table(args['name']).create(schema=schema, overwrite=args['overwrite']) except Exception as e: print 'Failed to create table %s: %s' % (args['name'], e)
def _pipeline_cell(args, config): """Implements the BigQuery cell magic used to validate, execute or deploy BQ pipelines. The supported syntax is: %%bigquery pipeline -q|--sql <query identifier> <other args> <action> <config> Args: args: the arguments following '%bigquery pipeline'. config: optional contents of the cell interpreted as YAML or JSON. Returns: The QueryResultsTable """ if args['action'] == 'deploy': raise Exception('Deploying a pipeline is not yet supported') env = {} for key, value in _notebook_environment().iteritems(): if isinstance(value, gcp.bigquery._udf.FunctionCall): env[key] = value config = _utils.parse_config(config, env) query = _get_query_argument(args, config, env) if args['verbose']: print query.sql if args['action'] == 'dryrun': print(query.sql) result = query.execute_dry_run() return gcp.bigquery._query_stats.QueryStats(total_bytes=result['totalBytesProcessed'], is_cached=result['cacheHit']) if args['action'] == 'run': return query.execute(args['target'], table_mode=args['mode'], use_cache=not args['nocache'], allow_large_results=args['large']).results
def _get_query_argument(args, cell, env): """ Get a query argument to a cell magic. The query is specified with args['query']. We look that up and if it is a BQ query just return it. If it is instead a SqlModule or SqlStatement it may have variable references. We resolve those using the arg parser for the SqlModule, then override the resulting defaults with either the Python code in cell, or the dictionary in overrides. The latter is for if the overrides are specified with YAML or JSON and eventually we should eliminate code in favor of this. Args: args: the dictionary of magic arguments. cell: the cell contents which can be variable value overrides (if args has a 'query' value) or inline SQL otherwise. env: a dictionary that is used for looking up variable values. Returns: A Query object. """ sql_arg = args.get('query', None) if sql_arg is None: # Assume we have inline SQL in the cell if not isinstance(cell, basestring): raise Exception('Expected a --query argument or inline SQL') return gcp.bigquery.Query(cell, values=env) item = _utils.get_notebook_item(sql_arg) if isinstance(item, gcp.bigquery.Query): # Queries are already expanded. return item # Create an expanded BQ Query. config = _utils.parse_config(cell, env) item, env = gcp.data.SqlModule.get_sql_statement_with_environment(item, config) if cell: env.update(config) # config is both a fallback and an override. return gcp.bigquery.Query(item, values=env)
def _chart_cell(args, cell): source = args['data'] ipy = IPython.get_ipython() chart_options = _utils.parse_config(cell, ipy.user_ns) if chart_options is None: chart_options = {} fields = args['fields'] if args['fields'] else '*' _HTML_TEMPLATE = """ <div class="bqgc" id="%s"> </div> <script> require(['extensions/charting', 'element!%s', 'style!/static/extensions/charting.css'], function(charts, dom) { charts.render(dom, {chartStyle:'%s', dataName:'%s', fields:'%s'}, %s, %s); } ); </script> """ div_id = _html.Html.next_id() chart_type = args['chart'] count = 25 if chart_type == 'paged_table' else -1 data, _ = _utils.get_data(source, fields, 0, count) return IPython.core.display.HTML( _HTML_TEMPLATE % (div_id, div_id, chart_type, _utils.get_data_source_index(source), fields, json.dumps(chart_options, cls=gcp._util.JSONEncoder), json.dumps(data, ensure_ascii=False, cls=gcp._util.JSONEncoder)))
def _sample_cell(args, cell_body): """Implements the bigquery sample cell magic for ipython notebooks. Args: args: the optional arguments following '%%bigquery sample'. cell_body: optional contents of the cell interpreted as SQL, YAML or JSON. Returns: The results of executing the query converted to a dataframe if no variable was specified. None otherwise. """ env = _notebook_environment() query = None table = None view = None if args['query']: config = _utils.parse_config(cell_body, env) query = _get_query_argument(args, config, env) elif args['table']: table = _get_table(args['table']) elif args['view']: view = _get_notebook_item(args['view']) if not isinstance(view, gcp.bigquery.View): raise Exception('%s is not a view' % args['view']) else: query = gcp.bigquery.Query(cell_body, values=env) count = args['count'] method = args['method'] if method == 'random': sampling = gcp.bigquery.Sampling.random(percent=args['percent'], count=count) elif method == 'hashed': sampling = gcp.bigquery.Sampling.hashed(field_name=args['field'], percent=args['percent'], count=count) elif method == 'sorted': ascending = args['order'] == 'ascending' sampling = gcp.bigquery.Sampling.sorted(args['field'], ascending=ascending, count=count) elif method == 'limit': sampling = gcp.bigquery.Sampling.default(count=count) else: sampling = gcp.bigquery.Sampling.default(count=count) if query: results = query.sample(sampling=sampling) elif view: results = view.sample(sampling=sampling) else: results = table.sample(sampling=sampling) if args['verbose']: print results.sql return results
def _chart_cell(args, cell): source = args['data'] ipy = IPython.get_ipython() chart_options = _utils.parse_config(cell, ipy.user_ns) if chart_options is None: chart_options = {} elif not isinstance(chart_options, dict): raise Exception("Could not parse chart options") chart_type = args['chart'] fields = args['fields'] if args['fields'] else '*' return IPython.core.display.HTML(_utils.chart_html('gcharts', chart_type, source=source, chart_options=chart_options, fields=fields))
def _get_chart_data(line, cell_body=''): try: args = line.strip().split() source = _utils._data_sources[int(args[0])] fields = args[1] first_row = int(args[2]) if len(args) > 2 else 0 count = int(args[3]) if len(args) > 3 else -1 env = _utils.parse_config(cell_body, IPython.get_ipython().user_ns) data, _ = _utils.get_data(source, fields, env, first_row, count) except Exception, e: gcp._util.print_exception_with_last_stack(e) data = {}
def _view(args, cell): csv = gcp.data.Csv(args['input']) num_lines = int(args['count'] or 5) headers = None if cell: ipy = IPython.get_ipython() config = _utils.parse_config(cell, ipy.user_ns) if 'columns' in config: headers = [e.strip() for e in config['columns'].split(',')] df = pd.DataFrame(csv.browse(num_lines, headers)) if args['profile']: # TODO(gram): We need to generate a schema and type-convert the columns before this # will be useful for CSV return _utils.profile_df(df) else: return IPython.core.display.HTML(df.to_html(index=False))
def _chart_cell(args, cell): source = args['data'] ipy = IPython.get_ipython() chart_options = _utils.parse_config(cell, ipy.user_ns) if chart_options is None: chart_options = {} elif not isinstance(chart_options, dict): raise Exception("Could not parse chart options") chart_type = args['chart'] fields = args['fields'] if args['fields'] else '*' return IPython.core.display.HTML( _utils.chart_html('gcharts', chart_type, source=source, chart_options=chart_options, fields=fields))
def _execute_cell(args, config): """Implements the BigQuery cell magic used to execute BQ queries. The supported syntax is: %%bigquery execute -q|--sql <query identifier> <other args> <config> Args: args: the arguments following '%bigquery execute'. config: optional contents of the cell interpreted as YAML or JSON. Returns: The QueryResultsTable """ env = _notebook_environment() config = _utils.parse_config(config, env) query = _get_query_argument(args, config, env) if args['verbose']: print query.sql return query.execute(args['target'], table_mode=args['mode'], use_cache=not args['nocache'], allow_large_results=args['large']).results
def _dryrun_cell(args, config): """Implements the BigQuery cell magic used to dry run BQ queries. The supported syntax is: %%bigquery dryrun -q|--sql <query identifier> <config> Args: args: the argument following '%bigquery dryrun'. config: optional contents of the cell interpreted as YAML or JSON. Returns: The response wrapped in a DryRunStats object """ env = _notebook_environment() config = _utils.parse_config(config, env) query = _get_query_argument(args, config, env) if args['verbose']: print query.sql result = query.execute_dry_run() return gcp.bigquery._query_stats.QueryStats(total_bytes=result['totalBytesProcessed'], is_cached=result['cacheHit'])
def _chart_cell(args, cell): source = args['data'] ipy = IPython.get_ipython() chart_options = _utils.parse_config(cell, ipy.user_ns) if chart_options is None: chart_options = {} elif not isinstance(chart_options, dict): raise Exception("Could not parse chart options") fields = args['fields'] if args['fields'] else '*' div_id = _html.Html.next_id() env = {} controls_html = '' controls_ids = [] if 'variables' in chart_options: variables = chart_options['variables'] del chart_options['variables'] # Just to make sure GCharts doesn't see them. try: item = _utils.get_notebook_item(source) _, defaults = gcp.data.SqlModule.get_sql_statement_with_environment(item, '') except Exception: defaults = {} for varname, control in variables.items(): label = control.get('label', varname) control_id = div_id + '__' + varname controls_ids.append(control_id) value = control.get('value', defaults.get(varname, None)) # The user should usually specify the type but we will default to 'textbox' for strings # and 'set' for lists. if isinstance(value, basestring): type = 'textbox' elif isinstance(value, list): type = 'set' else: type = None type = control.get('type', type) if type == 'picker': choices = control.get('choices', value) if not isinstance(choices, list) or len(choices) == 0: raise Exception('picker control must specify a nonempty set of choices') if value is None: value = choices[0] choices_html = '' for i, choice in enumerate(choices): choices_html += "<option value=\"%s\" %s>%s</option>" % \ (choice, ("selected=\"selected\"" if choice == value else ''), choice) control_html = "{label}<select disabled id=\"{id}\">{choices}</select>"\ .format(label=label, id=control_id, choices=choices_html) elif type == 'set': # Multi-picker; implemented as checkboxes. # TODO(gram): consider using "name" property of the control to group checkboxes. That # way we can save the code of constructing and parsing control Ids with sequential # numbers in it. Multiple checkboxes can share the same name. choices = control.get('choices', value) if not isinstance(choices, list) or len(choices) == 0: raise Exception('set control must specify a nonempty set of choices') if value is None: value = choices choices_html = '' controls_ids[-1] = '%s:%d' % (control_id, len(choices)) # replace ID to include count. for i, choice in enumerate(choices): checked = choice in value choice_id = '%s:%d' % (control_id, i) # TODO(gram): we may want a 'Submit/Refresh button as we may not want to rerun # query on each checkbox change. choices_html += """ <div> <label> <input type="checkbox" id="{id}" value="{choice}" {checked} disabled> {choice} </label> </div> """.format(id=choice_id, choice=choice, checked="checked" if checked else '') control_html = "{label}<div>{choices}</div>".format(label=label, choices=choices_html) elif type == 'checkbox': control_html = """ <label> <input type="checkbox" id="{id}" {checked} disabled> {label} </label> """.format(label=label, id=control_id, checked="checked" if value else '') elif type == 'slider': min = control.get('min', None) max = control.get('max', None) if min is None or max is None: raise Exception('slider control must specify a min and max value') if max <= min: raise Exception('slider control must specify a min value less than max value') step = control.get('step', 1 if isinstance(min, int) and isinstance(max, int) else (max - min) / 10.0) if value is None: value = min control_html = """ {label} <input type="text" class="gchart-slider_value" id="{id}_value" value="{value}" disabled/> <input type="range" class="gchart-slider" id="{id}" min="{min}" max="{max}" step="{step}" value="{value}" disabled/> """.format(label=label, id=control_id, value=value, min=min, max=max, step=step) elif type == 'textbox': if value is None: value = '' control_html = "{label}<input type=\"text\" value=\"{value}\" id=\"{id}\" disabled/>"\ .format(label=label, value=value, id=control_id) else: raise Exception( 'Unknown control type %s (expected picker, slider, checkbox, textbox or set)' % type) env[varname] = value controls_html += "<div class=\"gchart-control\">{control}</div>\n"\ .format(control=control_html) controls_html = "<div class=\"gchart-controls\">{controls}</div>".format(controls=controls_html) _HTML_TEMPLATE = """ <div class="bqgc-container"> {controls} <div class="bqgc{extra_class}" id="{id}"> </div> </div> <script> require(['extensions/charting', 'element!{id}', 'style!/static/extensions/charting.css'], function(charts, dom) {{ charts.render(dom, {{chartStyle:'{chart_type}', dataName:'{source}', fields:'{fields}'}}, {options}, {data}, {control_ids}); }} ); </script> """ chart_type = args['chart'] count = 25 if chart_type == 'paged_table' else -1 data, _ = _utils.get_data(source, fields, env, 0, count) # TODO(gram): check if we need to augment env with user_ns return IPython.core.display.HTML( _HTML_TEMPLATE.format(controls=controls_html, id=div_id, chart_type=chart_type, extra_class=" bqgc-controlled" if len(controls_html) else '', source=_utils.get_data_source_index(source), fields=fields, options=json.dumps(chart_options, cls=gcp._util.JSONEncoder), data=json.dumps(data, cls=gcp._util.JSONEncoder), control_ids=str(controls_ids)))