Ejemplo n.º 1
0
def _execute_cell(args, cell_body):
  """Implements the BigQuery cell magic used to execute BQ queries.

   The supported syntax is:
     %%bq execute <args>
     [<inline SQL>]

  Args:
    args: the optional arguments following '%%bq execute'.
    cell_body: optional contents of the cell
  Returns:
    QueryResultsTable containing query result
  """
  query = google.datalab.utils.commands.get_notebook_item(args['query'])
  if args['verbose']:
    print(query.sql)

  query_params = _get_query_parameters(args, cell_body)

  if args['to_dataframe']:
    # re-parse the int arguments because they're passed as strings
    start_row = int(args['dataframe_start_row']) if args['dataframe_start_row'] else None
    max_rows = int(args['dataframe_max_rows']) if args['dataframe_max_rows'] else None
    output_options = QueryOutput.dataframe(start_row=start_row, max_rows=max_rows,
                                           use_cache=not args['nocache'])
  else:
    output_options = QueryOutput.table(name=args['table'], mode=args['mode'],
                                       use_cache=not args['nocache'],
                                       allow_large_results=args['large'])
  context = _construct_context_for_args(args)
  r = query.execute(output_options, context=context, query_params=query_params)
  return r.result()
Ejemplo n.º 2
0
def _extract_cell(args, cell_body):
    """Implements the BigQuery extract magic used to extract query or table data to GCS.

   The supported syntax is:
     %bq extract <args>

  Args:
    args: the arguments following '%bigquery extract'.
  """

    if args['table']:
        source = _get_table(args['table'])
        if not source:
            raise Exception('Could not find table %s' % args['table'])

        job = source.extract(args['path'],
                             format='CSV' if args['format'] == 'csv' else
                             'NEWLINE_DELIMITED_JSON',
                             csv_delimiter=args['delimiter'],
                             csv_header=args['header'],
                             compress=args['compress'])
    elif args['query'] or args['view']:
        source_name = args['view'] or args['query']
        source = google.datalab.utils.commands.get_notebook_item(source_name)
        if not source:
            raise Exception('Could not find ' +
                            ('view ' +
                             args['view'] if args['view'] else 'query ' +
                             args['query']))
        query = source if args[
            'query'] else google.datalab.bigquery.Query.from_view(source)
        query_params = _get_query_parameters(
            args, cell_body) if args['query'] else None

        output_options = QueryOutput.file(path=args['path'],
                                          format=args['format'],
                                          csv_delimiter=args['delimiter'],
                                          csv_header=args['header'],
                                          compress=args['compress'],
                                          use_cache=not args['nocache'])
        context = _construct_context_for_args(args)
        job = query.execute(output_options,
                            context=context,
                            query_params=query_params)
    else:
        raise Exception('A query, table, or view is needed to extract')

    if job.failed:
        raise Exception('Extract failed: %s' % str(job.fatal_error))
    elif job.errors:
        raise Exception('Extract completed with errors: %s' % str(job.errors))
    return job.result()
Ejemplo n.º 3
0
def _execute_cell(args, cell_body):
  """Implements the BigQuery cell magic used to execute BQ queries.

   The supported syntax is:
     %%bq execute <args>
     [<inline SQL>]

  Args:
    args: the optional arguments following '%%bq execute'.
    cell_body: optional contents of the cell
  Returns:
    QueryResultsTable containing query result
  """
  env = google.datalab.utils.commands.notebook_environment()
  config = google.datalab.utils.commands.parse_config(cell_body, env, False) or {}
  parameters = config.get('parameters') or []
  if parameters:
    jsonschema.validate({'parameters': parameters}, BigQuerySchema.QUERY_PARAMS_SCHEMA)
  table_name = google.datalab.bigquery.Query.resolve_parameters(args['table'], parameters)

  query = google.datalab.utils.commands.get_notebook_item(args['query'])
  if args['verbose']:
    print(query.sql)

  query_params = get_query_parameters(args, cell_body)

  if args['to_dataframe']:
    # re-parse the int arguments because they're passed as strings
    start_row = int(args['dataframe_start_row']) if args['dataframe_start_row'] else None
    max_rows = int(args['dataframe_max_rows']) if args['dataframe_max_rows'] else None
    output_options = QueryOutput.dataframe(start_row=start_row, max_rows=max_rows,
                                           use_cache=not args['nocache'])
  else:
    output_options = QueryOutput.table(
      name=table_name, mode=args['mode'], use_cache=not args['nocache'],
      allow_large_results=args['large'])
  context = google.datalab.utils._utils._construct_context_for_args(args)
  r = query.execute(output_options, context=context, query_params=query_params)
  return r.result()
Ejemplo n.º 4
0
def _execute_cell(args, cell_body):
  """Implements the BigQuery cell magic used to execute BQ queries.

   The supported syntax is:
     %%bq execute <args>
     [<inline SQL>]

  Args:
    args: the optional arguments following '%%bq execute'.
    cell_body: optional contents of the cell
  Returns:
    QueryResultsTable containing query result
  """
  env = google.datalab.utils.commands.notebook_environment()
  config = google.datalab.utils.commands.parse_config(cell_body, env, False) or {}
  parameters = config.get('parameters') or []
  if parameters:
    jsonschema.validate({'parameters': parameters}, BigQuerySchema.QUERY_PARAMS_SCHEMA)
  table_name = google.datalab.bigquery.Query.resolve_parameters(args['table'], parameters)

  query = google.datalab.utils.commands.get_notebook_item(args['query'])
  if args['verbose']:
    print(query.sql)

  query_params = get_query_parameters(args, cell_body)

  if args['to_dataframe']:
    # re-parse the int arguments because they're passed as strings
    start_row = int(args['dataframe_start_row']) if args['dataframe_start_row'] else None
    max_rows = int(args['dataframe_max_rows']) if args['dataframe_max_rows'] else None
    output_options = QueryOutput.dataframe(start_row=start_row, max_rows=max_rows,
                                           use_cache=not args['nocache'])
  else:
    output_options = QueryOutput.table(
      name=table_name, mode=args['mode'], use_cache=not args['nocache'],
      allow_large_results=args['large'])
  context = google.datalab.utils._utils._construct_context_for_args(args)
  r = query.execute(output_options, context=context, query_params=query_params)
  return r.result()
Ejemplo n.º 5
0
def _execute_cell(args, cell_body):
    """Implements the BigQuery cell magic used to execute BQ queries.

   The supported syntax is:
     %%bq execute <args>
     [<inline SQL>]

  Args:
    args: the optional arguments following '%%bq execute'.
    cell_body: optional contents of the cell
  Returns:
    QueryResultsTable containing query result
  """
    query = google.datalab.utils.commands.get_notebook_item(args['query'])
    if args['verbose']:
        print(query.sql)

    query_params = _get_query_parameters(args, cell_body)

    if args['to_dataframe']:
        # re-parse the int arguments because they're passed as strings
        start_row = int(args['dataframe_start_row']
                        ) if args['dataframe_start_row'] else None
        max_rows = int(
            args['dataframe_max_rows']) if args['dataframe_max_rows'] else None
        output_options = QueryOutput.dataframe(start_row=start_row,
                                               max_rows=max_rows,
                                               use_cache=not args['nocache'])
    else:
        output_options = QueryOutput.table(name=args['table'],
                                           mode=args['mode'],
                                           use_cache=not args['nocache'],
                                           allow_large_results=args['large'])
    context = _construct_context_for_args(args)
    r = query.execute(output_options,
                      context=context,
                      query_params=query_params)
    return r.result()
Ejemplo n.º 6
0
def _extract_cell(args, cell_body):
  """Implements the BigQuery extract magic used to extract query or table data to GCS.

   The supported syntax is:
     %bq extract <args>

  Args:
    args: the arguments following '%bigquery extract'.
  """

  env = google.datalab.utils.commands.notebook_environment()
  config = google.datalab.utils.commands.parse_config(cell_body, env, False) or {}
  parameters = config.get('parameters')
  if args['table']:
    table = google.datalab.bigquery.Query.resolve_parameters(args['table'], parameters)
    source = _get_table(table)
    if not source:
      raise Exception('Could not find table %s' % table)

    csv_delimiter = args['delimiter'] if args['format'] == 'csv' else None
    path = google.datalab.bigquery.Query.resolve_parameters(args['path'], parameters)
    job = source.extract(path, format=args['format'], csv_delimiter=csv_delimiter,
                         csv_header=args['header'], compress=args['compress'])
  elif args['query'] or args['view']:
    source_name = args['view'] or args['query']
    source = google.datalab.utils.commands.get_notebook_item(source_name)
    if not source:
      raise Exception('Could not find ' +
                      ('view ' + args['view'] if args['view'] else 'query ' + args['query']))
    query = source if args['query'] else bigquery.Query.from_view(source)
    query_params = get_query_parameters(args, cell_body) if args['query'] else None

    output_options = QueryOutput.file(path=args['path'], format=args['format'],
                                      csv_delimiter=args['delimiter'],
                                      csv_header=args['header'], compress=args['compress'],
                                      use_cache=not args['nocache'])
    context = google.datalab.utils._utils._construct_context_for_args(args)
    job = query.execute(output_options, context=context, query_params=query_params)
  else:
    raise Exception('A query, table, or view is needed to extract')

  if job.failed:
    raise Exception('Extract failed: %s' % str(job.fatal_error))
  elif job.errors:
    raise Exception('Extract completed with errors: %s' % str(job.errors))
  return job.result()
Ejemplo n.º 7
0
def _extract_cell(args, cell_body):
  """Implements the BigQuery extract magic used to extract query or table data to GCS.

   The supported syntax is:
     %bq extract <args>

  Args:
    args: the arguments following '%bigquery extract'.
  """

  if args['table']:
    source = _get_table(args['table'])
    if not source:
      raise Exception('Could not find table %s' % args['table'])

    job = source.extract(args['path'],
                         format='CSV' if args['format'] == 'csv' else 'NEWLINE_DELIMITED_JSON',
                         csv_delimiter=args['delimiter'], csv_header=args['header'],
                         compress=args['compress'])
  elif args['query'] or args['view']:
    source = google.datalab.utils.commands.get_notebook_item(args['view'])
    if not source:
      raise Exception('Could not find ' +
                      ('view ' + args['view'] if args['view'] else 'query ' + args['query']))
    query = source if args['query'] else google.datalab.bigquery.Query.from_view(source)
    query_params = _get_query_parameters(args, cell_body) if args['query'] else None

    output_options = QueryOutput.file(path=args['path'], format=args['format'],
                                      csv_delimiter=args['delimiter'],
                                      csv_header=args['header'], compress=args['compress'],
                                      use_cache=not args['nocache'])
    context = _construct_context_for_args(args)
    job = query.execute(output_options, context=context, query_params=query_params)
  else:
    raise Exception('A query, table, or view is needed to extract')

  if job.failed:
    raise Exception('Extract failed: %s' % str(job.fatal_error))
  elif job.errors:
    raise Exception('Extract completed with errors: %s' % str(job.errors))
  return job.result()
Ejemplo n.º 8
0
def _sample_cell(args, cell_body):
    """Implements the BigQuery sample magic for sampling queries
  The supported sytanx is:
    %%bq sample <args>
     [<inline SQL>]
  Args:
    args: the optional arguments following '%%bq sample'.
    cell_body: optional contents of the cell
  Returns:
    The results of executing the sampling query, or a profile of the sample data.
  """

    query = None
    table = None
    view = None
    query_params = None

    if args['query']:
        query = google.datalab.utils.commands.get_notebook_item(args['query'])
        if query is None:
            raise Exception('Cannot find query %s.' % args['query'])
        query_params = _get_query_parameters(args, cell_body)

    elif args['table']:
        table = _get_table(args['table'])
        if not table:
            raise Exception('Could not find table %s' % args['table'])
    elif args['view']:
        view = google.datalab.utils.commands.get_notebook_item(args['view'])
        if not isinstance(view, google.datalab.bigquery.View):
            raise Exception('Could not find view %s' % args['view'])
    else:
        raise Exception('A query, table, or view is neede to sample')

    # parse comma-separated list of fields
    fields = args['fields'].split(',') if args['fields'] else None
    count = int(args['count']) if args['count'] else None
    percent = int(args['percent']) if args['percent'] else None
    sampling = Sampling._auto(method=args['method'],
                              fields=fields,
                              count=count,
                              percent=percent,
                              key_field=args['key_field'],
                              ascending=(args['order'] == 'ascending'))

    context = _construct_context_for_args(args)

    if view:
        query = google.datalab.bigquery.Query.from_view(view)
    elif table:
        query = google.datalab.bigquery.Query.from_table(table)

    if args['profile']:
        results = query.execute(QueryOutput.dataframe(),
                                sampling=sampling,
                                context=context,
                                query_params=query_params).result()
    else:
        results = query.execute(QueryOutput.table(),
                                sampling=sampling,
                                context=context,
                                query_params=query_params).result()

    if args['verbose']:
        print(query.sql)

    if args['profile']:
        return google.datalab.utils.commands.profile_df(results)
    else:
        return results
Ejemplo n.º 9
0
def _sample_cell(args, cell_body):
  """Implements the BigQuery sample magic for sampling queries
  The supported sytanx is:
    %%bq sample <args>
     [<inline SQL>]
  Args:
    args: the optional arguments following '%%bq sample'.
    cell_body: optional contents of the cell
  Returns:
    The results of executing the sampling query, or a profile of the sample data.
  """

  query = None
  table = None
  view = None
  query_params = None

  if args['query']:
    query = google.datalab.utils.commands.get_notebook_item(args['query'])
    if query is None:
      raise Exception('Cannot find query %s.' % args['query'])
    query_params = _get_query_parameters(args, cell_body)

  elif args['table']:
    table = _get_table(args['table'])
    if not table:
      raise Exception('Could not find table %s' % args['table'])
  elif args['view']:
    view = google.datalab.utils.commands.get_notebook_item(args['view'])
    if not isinstance(view, google.datalab.bigquery.View):
      raise Exception('Could not find view %s' % args['view'])
  else:
    raise Exception('A query, table, or view is neede to sample')

  # parse comma-separated list of fields
  fields = args['fields'].split(',') if args['fields'] else None
  count = int(args['count']) if args['count'] else None
  percent = int(args['percent']) if args['percent'] else None
  sampling = Sampling._auto(method=args['method'], fields=fields, count=count, percent=percent,
                            key_field=args['key_field'], ascending=(args['order'] == 'ascending'))

  context = _construct_context_for_args(args)

  if view:
    query = google.datalab.bigquery.Query.from_view(view)
  elif table:
    query = google.datalab.bigquery.Query.from_table(table)

  if args['profile']:
    results = query.execute(QueryOutput.dataframe(), sampling=sampling,
                            context=context, query_params=query_params).result()
  else:
    results = query.execute(QueryOutput.table(), sampling=sampling, context=context,
                            query_params=query_params).result()

  if args['verbose']:
    print(query.sql)

  if args['profile']:
    return google.datalab.utils.commands.profile_df(results)
  else:
    return results