Beispiel #1
0
def confirm_query(request, query, on_success_url=None):
    """
  Used by other forms to confirm a query before it's executed.
  The form is the same as execute_query below.

  query - The HQL about to be executed
  on_success_url - The page to go to upon successful execution
  """
    mform = QueryForm()
    mform.bind()
    mform.query.initial = dict(query=query)

    return render(
        "execute.mako",
        request,
        {
            "form": mform,
            "action": reverse(get_app_name(request) + ":execute_query"),
            "error_message": None,
            "design": None,
            "on_success_url": on_success_url,
            "design": None,
            "autocomplete_base_url": reverse(get_app_name(request) + ":autocomplete", kwargs={}),
        },
    )
Beispiel #2
0
def _run_parameterized_query(request, design_id, explain):
    """
  Given a design and arguments to parameterize that design, runs the query.
  - explain is a boolean to determine whether to run as an explain or as an
  execute.

  This is an extra "step" in the flow from execute_query.
  """
    design = authorized_get_design(request, design_id, must_exist=True)

    # Reconstitute the form
    design_obj = beeswax.design.HQLdesign.loads(design.data)
    query_form = QueryForm()
    params = design_obj.get_query_dict()
    params.update(request.POST)

    databases = _get_db_choices(request)
    query_form.bind(params)
    query_form.query.fields["database"].choices = databases  # Could not do it in the form

    if not query_form.is_valid():
        raise PopupException(_("Query form is invalid: %s") % query_form.errors)

    query_str = query_form.query.cleaned_data["query"]
    app_name = get_app_name(request)
    query_server = get_query_server_config(app_name)
    query_type = SavedQuery.TYPES_MAPPING[app_name]

    parameterization_form_cls = make_parameterization_form(query_str)
    if not parameterization_form_cls:
        raise PopupException(_("Query is not parameterizable."))

    parameterization_form = parameterization_form_cls(request.REQUEST, prefix="parameterization")

    if parameterization_form.is_valid():
        real_query = substitute_variables(query_str, parameterization_form.cleaned_data)
        query = HQLdesign(query_form, query_type=query_type)
        query._data_dict["query"]["query"] = real_query
        try:
            if explain:
                return explain_directly(request, query, design, query_server)
            else:
                return execute_directly(request, query, query_server, design)
        except Exception, ex:
            db = dbms.get(request.user, query_server)
            error_message, log = expand_exception(ex, db)
            return render(
                "execute.mako",
                request,
                {
                    "action": reverse(get_app_name(request) + ":execute_query"),
                    "design": design,
                    "error_message": error_message,
                    "form": query_form,
                    "log": log,
                    "autocomplete_base_url": reverse(get_app_name(request) + ":autocomplete", kwargs={}),
                },
            )
Beispiel #3
0
def query_history_to_dict(request, query_history):
  query_history_dict = {
    'id': query_history.id,
    'state': query_history.last_state,
    'query': query_history.query,
    'has_results': query_history.has_results,
    'statement_number': query_history.statement_number,
    'watch_url': reverse(get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': query_history.id}),
    'results_url': reverse(get_app_name(request) + ':view_results', kwargs={'id': query_history.id, 'first_row': 0})
  }

  if query_history.design:
    query_history_dict['design'] = design_to_dict(query_history.design)

  return query_history_dict
Beispiel #4
0
Datei: api.py Projekt: Kewtt/hue
def explain_query(request):
    response = {"status": -1, "message": ""}

    if request.method != "POST":
        response["message"] = _("A POST request is required.")

    app_name = get_app_name(request)
    query_type = beeswax_models.SavedQuery.TYPES_MAPPING[app_name]

    try:
        form = get_query_form(request)

        if form.is_valid():
            query = SQLdesign(form, query_type=query_type)
            query_server = dbms.get_query_server_config(request.POST.get("server"))
            db = dbms.get(request.user, query_server)

            try:
                db.use(form.cleaned_data["database"])
                datatable = db.explain(query)
                results = db.client.create_result(datatable)

                response["status"] = 0
                response["results"] = results_to_dict(results)
            except Exception, e:
                response["status"] = -1
                response["message"] = str(e)

        else:
Beispiel #5
0
Datei: views.py Projekt: dsc/hue
def load_table(request, database, table):
  table_obj = dbms.get(request.user).get_table(database, table)

  if request.method == "POST":
    form = beeswax.forms.LoadDataForm(table_obj, request.POST)
    if form.is_valid():
      # TODO(philip/todd): When PathField might refer to non-HDFS,
      # we need a pathfield.is_local function.
      hql = "LOAD DATA INPATH"
      hql += " '%s'" % form.cleaned_data['path']
      if form.cleaned_data['overwrite']:
        hql += " OVERWRITE"
      hql += " INTO TABLE "
      hql += "`%s.%s`" % (database, table,)
      if form.partition_columns:
        hql += " PARTITION ("
        vals = []
        for key, column_name in form.partition_columns.iteritems():
          vals.append("%s='%s'" % (column_name, form.cleaned_data[key]))
        hql += ", ".join(vals)
        hql += ")"

      on_success_url = reverse(get_app_name(request) + ':describe_table', kwargs={'database': database, 'table': table})
      query = hql_query(hql, database=database)
      try:
        return execute_directly(request, query, on_success_url=on_success_url)
      except Exception, e:
        raise PopupException(_("Can't load the data"), detail=e)
Beispiel #6
0
def list_designs(request):
  """
  View function for show all saved queries.

  We get here from /beeswax/list_designs?filterargs, with the options being:
    page=<n>    - Controls pagination. Defaults to 1.
    user=<name> - Show design items belonging to a user. Default to all users.
    type=<type> - <type> is "hql", for saved query type. Default to show all.
    sort=<key>  - Sort by the attribute <key>, which is one of:
                    "date", "name", "desc", and "type" (design type)
                  Accepts the form "-date", which sort in descending order.
                  Default to "-date".
    text=<frag> - Search for fragment "frag" in names and descriptions.
  """
  DEFAULT_PAGE_SIZE = 20
  app_name = get_app_name(request)

  # Extract the saved query list.
  prefix = 'q-'
  querydict_query = _copy_prefix(prefix, request.GET)
  # Manually limit up the user filter.
  querydict_query[ prefix + 'type' ] = app_name
  page, filter_params = _list_designs(request.user, querydict_query, DEFAULT_PAGE_SIZE, prefix)

  return render('list_designs.mako', request, {
    'page': page,
    'filter_params': filter_params,
    'user': request.user,
    'designs_json': json.dumps([query.id for query in page.object_list])
  })
Beispiel #7
0
def execute_directly(request, query, design, query_server, tablename=None, **kwargs):
  if design is not None:
    design = authorized_get_design(request, design.id)
  parameters = kwargs.pop('parameters', None)

  db = dbms.get(request.user, query_server)
  database = query.query.get('database', 'default')
  db.use(database)

  history_obj = db.execute_query(query, design)
  watch_url = reverse(get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': history_obj.id})

  if parameters is not None:
    history_obj.update_extra('parameters', parameters)
    history_obj.save()

  response = {
    'status': 0,
    'id': history_obj.id,
    'watch_url': watch_url,
    'statement': history_obj.get_current_statement(),
    'is_redacted': history_obj.is_redacted
  }

  return JsonResponse(response)
Beispiel #8
0
def save_query_design(request, design_id=None):
  response = {'status': -1, 'message': ''}

  if request.method != 'POST':
    response['message'] = _('A POST request is required.')

  app_name = get_app_name(request)
  query_type = beeswax.models.SavedQuery.TYPES_MAPPING[app_name]
  design = safe_get_design(request, query_type, design_id)

  try:
    query_form = get_query_form(request)

    if query_form.is_valid():
      design = save_design(request, query_form, query_type, design, True)
      response['design_id'] = design.id
      response['status'] = 0
    else:
      response['errors'] = {
        'query': [query_form.query.errors],
        'settings': query_form.settings.errors,
        'file_resources': query_form.file_resources.errors,
        'functions': query_form.functions.errors,
        'saveform': query_form.saveform.errors,
      }
  except RuntimeError, e:
    response['message'] = str(e)
Beispiel #9
0
def execute_query(request, design_id=None):
  """
  View function for executing an arbitrary query.
  It understands the optional GET/POST params:

    on_success_url
      If given, it will be displayed when the query is successfully finished.
      Otherwise, it will display the view query results page by default.
  """
  authorized_get_design(request, design_id)

  error_message = None
  form = QueryForm()
  action = request.path
  log = None
  app_name = get_app_name(request)
  query_type = SavedQuery.TYPES_MAPPING[app_name]
  design = safe_get_design(request, query_type, design_id)
  on_success_url = request.REQUEST.get('on_success_url')
  databases = []
  query_server = get_query_server_config(app_name)
  db = dbms.get(request.user, query_server)

  try:
    databases = get_db_choices(request)
  except Exception, ex:
    error_message, log = expand_exception(ex, db)
Beispiel #10
0
def list_trashed_designs(request):
  DEFAULT_PAGE_SIZE = 20
  app_name= get_app_name(request)

  user = request.user

  # Extract the saved query list.
  prefix = 'q-'
  querydict_query = _copy_prefix(prefix, request.GET)
  # Manually limit up the user filter.
  querydict_query[ prefix + 'type' ] = app_name
  # Get search filter input if any
  search_filter = request.GET.get('text', None)
  if search_filter is not None:
    querydict_query[ prefix + 'text' ] = search_filter

  page, filter_params = _list_designs(user, querydict_query, DEFAULT_PAGE_SIZE, prefix, is_trashed=True)

  return render('list_trashed_designs.mako', request, {
    'page': page,
    'filter_params': filter_params,
    'prefix': prefix,
    'user': request.user,
    'designs_json': json.dumps([query.id for query in page.object_list])
  })
Beispiel #11
0
def get_session(request, session_id=None):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)

  response = {'status': -1, 'message': ''}

  if session_id:
    session = Session.objects.get(id=session_id, owner=request.user, application=query_server['server_name'])
  else:  # get the latest session for given user and server type
    session = Session.objects.get_session(request.user, query_server['server_name'])

  if session is not None:
    properties = json.loads(session.properties)
    # Redact passwords
    for key, value in properties.items():
      if 'password' in key.lower():
        properties[key] = '*' * len(value)

    response['status'] = 0
    response['session'] = {'id': session.id, 'application': session.application, 'status': session.status_code}
    response['properties'] = properties
  else:
    response['message'] = _('Could not find session or no open sessions found.')

  return JsonResponse(response)
Beispiel #12
0
def explain_query(request):
  response = {'status': -1, 'message': ''}

  if request.method != 'POST':
    response['message'] = _('A POST request is required.')
  
  app_name = get_app_name(request)
  query_type = beeswax_models.SavedQuery.TYPES_MAPPING[app_name]

  try:
    form = get_query_form(request)

    if form.is_valid():
      query = SQLdesign(form, query_type=query_type)
      query_server = dbms.get_query_server_config(app_name)
      db = dbms.get(request.user, query_server)

      try:
        db.use(form.cleaned_data['database'])
        datatable = db.explain(query)
        results = db.client.create_result(datatable)

        response['status'] = 0
        response['results'] = results_to_dict(results)
      except Exception, e:
        response['status'] = -1
        response['message'] = str(e)

    else:
Beispiel #13
0
def list_trashed_designs(request):
    DEFAULT_PAGE_SIZE = 20
    app_name = get_app_name(request)

    user = request.user

    # Extract the saved query list.
    prefix = "q-"
    querydict_query = _copy_prefix(prefix, request.GET)
    # Manually limit up the user filter.
    querydict_query[prefix + "type"] = app_name
    # Get search filter input if any
    search_filter = request.GET.get("text", None)
    if search_filter is not None:
        querydict_query[prefix + "text"] = search_filter

    page, filter_params = _list_designs(user, querydict_query, DEFAULT_PAGE_SIZE, prefix, is_trashed=True)

    return render(
        "list_trashed_designs.mako",
        request,
        {
            "page": page,
            "filter_params": filter_params,
            "prefix": prefix,
            "user": request.user,
            "designs_json": json.dumps([query.id for query in page.object_list]),
        },
    )
Beispiel #14
0
def view_results(request, id, first_row=0):
  """
  Returns the view for the results of the QueryHistory with the given id.

  The query results MUST be ready.
  To display query results, one should always go through the execute_query view.
  If the result set has has_result_set=False, display an empty result.

  If ``first_row`` is 0, restarts (if necessary) the query read.  Otherwise, just
  spits out a warning if first_row doesn't match the servers conception.
  Multiple readers will produce a confusing interaction here, and that's known.

  It understands the ``context`` GET parameter. (See execute_query().)
  """
  first_row = long(first_row)
  start_over = (first_row == 0)
  results = type('Result', (object,), {
                'rows': 0,
                'columns': [],
                'has_more': False,
                'start_row': 0,
            })
  data = []
  fetch_error = False
  error_message = ''
  log = ''
  columns = []
  app_name = get_app_name(request)

  query_history = authorized_get_query_history(request, id, must_exist=True)
  query_server = query_history.get_query_server_config()
  db = dbms.get(request.user, query_server)

  handle, state = _get_query_handle_and_state(query_history)
  context_param = request.GET.get('context', '')
  query_context = parse_query_context(context_param)

  # Update the status as expired should not be accessible
  expired = state == models.QueryHistory.STATE.expired

  # Retrieve query results or use empty result if no result set
  try:
    if query_server['server_name'] == 'impala' and not handle.has_result_set:
      downloadable = False
    else:
      results = db.fetch(handle, start_over, 100)

      # Materialize and HTML escape results
      data = escape_rows(results.rows())

      # We display the "Download" button only when we know that there are results:
      downloadable = first_row > 0 or data
      log = db.get_log(handle)
      columns = results.data_table.cols()

  except Exception, ex:
    LOG.exception('error fetching results')

    fetch_error = True
    error_message, log = expand_exception(ex, db, handle)
Beispiel #15
0
def get_sample_data(request, database, table, column=None):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  db = dbms.get(request.user, query_server)

  response = _get_sample_data(db, database, table, column)
  return JsonResponse(response)
Beispiel #16
0
def configuration(request):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  config_values = dbms.get(request.user, query_server).get_default_configuration(
                      bool(request.REQUEST.get("include_hadoop", False)))

  return render("configuration.mako", request, {'config_values': config_values})
Beispiel #17
0
def analyze_table(request, database, table, columns=None):
  app_name = get_app_name(request)
  cluster = json.loads(request.POST.get('cluster', '{}'))

  query_server = get_query_server_config(app_name, cluster=cluster)
  db = dbms.get(request.user, query_server)

  table_obj = db.get_table(database, table)
  if table_obj.is_impala_only and app_name != 'impala':
    query_server = get_query_server_config('impala')
    db = dbms.get(request.user, query_server)

  response = {'status': -1, 'message': '', 'redirect': ''}

  if request.method == "POST":
    if columns is None:
      query_history = db.analyze_table(database, table)
    else:
      query_history = db.analyze_table_columns(database, table)

    response['watch_url'] = reverse('beeswax:api_watch_query_refresh_json', kwargs={'id': query_history.id})
    response['status'] = 0
  else:
    response['message'] = _('A POST request is required.')

  return JsonResponse(response)
Beispiel #18
0
def show_tables(request, database=None):
    if database is None:
        database = _get_last_database(request, database)
    if request.method == 'POST':
        resp = {}
        try:
            tables = _get_table_list(request, database)
            table_list_rendered = django_mako.render_to_string("table_list.mako", dict(
                app_name=get_app_name(request),
                database=database,
                tables=tables,
            ))
        except Exception as ex:
            resp['error'] = escapejs(ex.message)
        else:
            resp['table_list_rendered'] = table_list_rendered
            resp['tables'] = tables
        return HttpResponse(json.dumps(resp))

    db = dbms.get(request.user)
    databases = db.get_databases()
    db_form = hcatalog.forms.DbForm(initial={'database': database}, databases=databases)
    response = render("show_tables.mako", request, {
        'database': database,
        'db_form': db_form,
    })
    response.set_cookie("hueHcatalogLastDatabase", database, expires=90)
    return response
Beispiel #19
0
def autocomplete(request, database=None, table=None, column=None, nested=None):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  do_as = request.user
  if (request.user.is_superuser or request.user.has_hue_permission(action="impersonate", app="security")) and 'doas' in request.GET:
    do_as = User.objects.get(username=request.GET.get('doas'))
  db = dbms.get(do_as, query_server)
  response = {}

  try:
    if database is None:
      response['databases'] = db.get_databases()
    elif table is None:
      response['tables'] = db.get_tables(database=database)
    elif column is None:
      t = db.get_table(database, table)
      response['hdfs_link'] = t.hdfs_link
      response['columns'] = [column.name for column in t.cols]
      response['extended_columns'] = massage_columns_for_json(t.cols)
    else:
      col = db.get_column(database, table, column)
      if col:
        parse_tree = parser.parse_column(col.name, col.type, col.comment)
        if nested:
          parse_tree = _extract_nested_type(parse_tree, nested)
        response = parse_tree
      else:
        raise Exception('Could not find column `%s`.`%s`.`%s`' % (database, table, column))
  except (QueryServerTimeoutException, TTransportException), e:
    response['code'] = 503
    response['error'] = e.message
Beispiel #20
0
 def process_response(self, request, response):
     try:
         audit_logger = get_audit_logger()
         audit_logger.debug(
             JsonMessage(
                 **{
                     datetime.utcnow().strftime('%s'): {
                         'user':
                         request.user.username
                         if hasattr(request, 'user') else 'anonymous',
                         "status":
                         response.status_code,
                         "impersonator":
                         None,
                         "ip_address":
                         request.META.get('REMOTE_ADDR'),
                         "authorization_failure":
                         response.status_code == 401,
                         "service":
                         get_app_name(request),
                         "url":
                         request.path,
                     }
                 }))
         response['audited'] = True
     except Exception, e:
         LOG.error('Could not audit the request: %s' % e)
Beispiel #21
0
Datei: api.py Projekt: atupal/hue
def save_query(request, design_id=None):
  response = {'status': -1, 'message': ''}

  if request.method != 'POST':
    response['message'] = _('A POST request is required.')

  app_name = get_app_name(request)
  query_type = beeswax_models.SavedQuery.TYPES_MAPPING[app_name]
  design = safe_get_design(request, query_type, design_id)
  form = QueryForm()
  api = get_api(request.user)
  app_names = api.jars()

  try:
    form.bind(request.POST)
    form.query.fields['appName'].choices = ((key, key) for key in app_names)

    if form.is_valid():
      design = save_design(request, form, query_type, design, True)
      response['design_id'] = design.id
      response['status'] = 0
    else:
      response['message'] = smart_str(form.query.errors) + smart_str(form.saveform.errors)
  except RuntimeError, e:
    response['message'] = str(e)
Beispiel #22
0
def list_query_history(request):
  """
  View the history of query (for the current user).
  We get here from /beeswax/query_history?filterargs, with the options being:
    page=<n>            - Controls pagination. Defaults to 1.
    user=<name>         - Show history items from a user. Default to current user only.
                          Also accepts ':all' to show all history items.
    type=<type>         - <type> is "report|hql", for design type. Default to show all.
    design_id=<id>      - Show history for this particular design id.
    sort=<key>          - Sort by the attribute <key>, which is one of:
                            "date", "state", "name" (design name), and "type" (design type)
                          Accepts the form "-date", which sort in descending order.
                          Default to "-date".
    auto_query=<bool>   - Show auto generated actions (drop table, read data, etc). Default False
  """
  DEFAULT_PAGE_SIZE = 20

  share_queries = conf.SHARE_SAVED_QUERIES.get() or request.user.is_superuser

  querydict_query = request.GET.copy()
  if not share_queries:
    querydict_query['user'] = request.user.username

  app_name= get_app_name(request)
  querydict_query['type'] = app_name

  page, filter_params = _list_query_history(request.user, querydict_query, DEFAULT_PAGE_SIZE)

  return render('list_history.mako', request, {
    'request': request,
    'page': page,
    'filter_params': filter_params,
    'share_queries': share_queries,
  })
Beispiel #23
0
def load_table(request, database, table):
  table_obj = dbms.get(request.user).get_table(database, table)

  if request.method == "POST":
    form = beeswax.forms.LoadDataForm(table_obj, request.POST)
    if form.is_valid():
      # TODO(philip/todd): When PathField might refer to non-HDFS,
      # we need a pathfield.is_local function.
      hql = "LOAD DATA INPATH"
      hql += " '%s'" % form.cleaned_data['path']
      if form.cleaned_data['overwrite']:
        hql += " OVERWRITE"
      hql += " INTO TABLE "
      hql += "`%s.%s`" % (database, table,)
      if form.partition_columns:
        hql += " PARTITION ("
        vals = []
        for key, column_name in form.partition_columns.iteritems():
          vals.append("%s='%s'" % (column_name, form.cleaned_data[key]))
        hql += ", ".join(vals)
        hql += ")"

      on_success_url = reverse(get_app_name(request) + ':describe_table', kwargs={'database': database, 'table': table})
      return confirm_query(request, hql, on_success_url)
  else:
    form = beeswax.forms.LoadDataForm(table_obj)
    return render("load_table.mako", request, {'form': form, 'table': table, 'action': request.get_full_path()})
Beispiel #24
0
def _run_parameterized_query(request, design_id, explain):
  """
  Given a design and arguments to parameterize that design, runs the query.
  - explain is a boolean to determine whether to run as an explain or as an
  execute.

  This is an extra "step" in the flow from execute_query.
  """
  design = authorized_get_design(request, design_id, must_exist=True)

  # Reconstitute the form
  design_obj = beeswax.design.HQLdesign.loads(design.data)
  query_form = QueryForm()
  params = design_obj.get_query_dict()
  params.update(request.POST)
  query_form.bind(params)
  assert query_form.is_valid()

  query_str = query_form.query.cleaned_data["query"]
  query_server = get_query_server_config(get_app_name(request))

  parameterization_form_cls = make_parameterization_form(query_str)
  if not parameterization_form_cls:
    raise PopupException(_("Query is not parameterizable."))

  parameterization_form = parameterization_form_cls(request.REQUEST, prefix="parameterization")

  if parameterization_form.is_valid():
    real_query = substitute_variables(query_str, parameterization_form.cleaned_data)
    query = HQLdesign(query_form)
    query._data_dict['query']['query'] = real_query
    try:
      if explain:
        return explain_directly(request, query, design, query_server)
      else:
        return execute_directly(request, query, query_server, design)
    except Exception, ex:
      db = dbms.get(request.user, query_server)
      error_message, log = expand_exception(ex, db)
      return render('execute.mako', request, {
        'action': reverse(get_app_name(request) + ':execute_query'),
        'design': design,
        'error_message': error_message,
        'form': query_form,
        'log': log,
        'autocomplete': json.dumps({}),
      })
Beispiel #25
0
def view_results(request, id, first_row=0):
  """
  Returns the view for the results of the QueryHistory with the given id.

  The query results MUST be ready.
  To display query results, one should always go through the watch_query view.

  If ``first_row`` is 0, restarts (if necessary) the query read.  Otherwise, just
  spits out a warning if first_row doesn't match the servers conception.
  Multiple readers will produce a confusing interaction here, and that's known.

  It understands the ``context`` GET parameter. (See watch_query().)
  """
  first_row = long(first_row)
  start_over = (first_row == 0)
  results = None
  data = None
  fetch_error = False
  error_message = ''
  log = ''
  app_name = get_app_name(request)

  query_history = authorized_get_history(request, id, must_exist=True)
  db = dbms.get(request.user, query_history.get_query_server_config())

  handle, state = _get_query_handle_and_state(query_history)
  context_param = request.GET.get('context', '')
  query_context = _parse_query_context(context_param)

  # To remove in Hue 2.3
  download  = request.GET.get('download', '')

  # Update the status as expired should not be accessible
  # Impala does not support startover for now
  expired = state == models.QueryHistory.STATE.expired
  if expired or app_name == 'impala':
    state = models.QueryHistory.STATE.expired
    query_history.save_state(state)

  # Retrieve query results
  try:
    if not download:
      results = db.fetch(handle, start_over, 100)
      data = list(results.rows()) # Materialize results

      # We display the "Download" button only when we know that there are results:
      downloadable = first_row > 0 or data
    else:
      downloadable = True
      data = []
      results = type('Result', (object,), {
                    'rows': 0,
                    'columns': [],
                    'has_more': False,
                    'start_row': 0, })
    log = db.get_log(handle)
  except Exception, ex:
    fetch_error = True
    error_message, log = expand_exception(ex, db)
Beispiel #26
0
def get_query_form(request):
  try:
    # Get database choices
    query_server = dbms.get_query_server_config(get_app_name(request))
    db = dbms.get(request.user, query_server)
    databases = [(database, database) for database in db.get_databases()]
  except Exception, e:
    raise PopupException(_('Unable to access databases, Query Server or Metastore may be down.'), detail=e)
Beispiel #27
0
def save_results_hdfs_file(request, query_history_id):
  """
  Save the results of a query to an HDFS file.

  Do not rerun the query.
  """
  response = {'status': 0, 'message': ''}

  query_history = authorized_get_query_history(request, query_history_id, must_exist=True)
  server_id, state = _get_query_handle_and_state(query_history)
  query_history.save_state(state)
  error_msg, log = None, None

  if request.method != 'POST':
    response['message'] = _('A POST request is required.')
  else:
    if not query_history.is_success():
      response['message'] = _('This query is %(state)s. Results unavailable.') % {'state': state}
      response['status'] = -1
      return JsonResponse(response)

    db = dbms.get(request.user, query_history.get_query_server_config())

    form = beeswax.forms.SaveResultsFileForm({
      'target_file': request.POST.get('path'),
      'overwrite': request.POST.get('overwrite', False),
    })

    if form.is_valid():
      target_file = form.cleaned_data['target_file']
      overwrite = form.cleaned_data['overwrite']

      try:
        handle, state = _get_query_handle_and_state(query_history)
      except Exception, ex:
        response['message'] = _('Cannot find query handle and state: %s') % str(query_history)
        response['status'] = -2
        return JsonResponse(response)

      try:
        if overwrite and request.fs.exists(target_file):
          if request.fs.isfile(target_file):
            request.fs.do_as_user(request.user.username, request.fs.rmtree, target_file)
          else:
            raise PopupException(_("The target path is a directory"))

        upload(target_file, handle, request.user, db, request.fs)

        response['type'] = 'hdfs-file'
        response['id'] = query_history.id
        response['query'] = query_history.query
        response['path'] = target_file
        response['success_url'] = '/filebrowser/view=%s' % target_file
        response['watch_url'] = reverse(get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': query_history.id})
      except Exception, ex:
        error_msg, log = expand_exception(ex, db)
        response['message'] = _('The result could not be saved: %s.') % error_msg
        response['status'] = -3
Beispiel #28
0
def watch_query(request, id):
  """
  Wait for the query to finish and (by default) displays the results of query id.
  It understands the optional GET params:

    on_success_url
      If given, it will be displayed when the query is successfully finished.
      Otherwise, it will display the view query results page by default.

    context
      A string of "name:data" that describes the context
      that generated this query result. It may be:
        - "table":"<table_name>"
        - "design":<design_id>

  All other GET params will be passed to on_success_url (if present).
  """
  # Coerce types; manage arguments
  query_history = authorized_get_history(request, id, must_exist=True)

  # GET param: context.
  context_param = request.GET.get('context', '')

  # GET param: on_success_url. Default to view_results
  results_url = reverse(get_app_name(request) + ':view_results', kwargs={'id': id, 'first_row': 0})
  on_success_url = request.GET.get('on_success_url')
  if not on_success_url:
    on_success_url = results_url

  # Check query state
  handle, state = _get_query_handle_and_state(query_history)
  query_history.save_state(state)


  # Query finished?
#  if state == models.QueryHistory.STATE.expired:
#    raise PopupException(_("The result of this query has expired."))
  if query_history.is_success():
    return format_preserving_redirect(request, on_success_url, request.GET)
  elif query_history.is_failure():
    # When we fetch, Beeswax server will throw us a BeeswaxException, which has the
    # log we want to display.
    return format_preserving_redirect(request, results_url, request.GET)

  # Still running
  log = dbms.get(request.user, query_history.get_query_server_config()).get_log(handle)

  # Keep waiting
  # - Translate context into something more meaningful (type, data)
  query_context = _parse_query_context(context_param)

  return render('watch_wait.mako', request, {
                'query': query_history,
                'fwd_params': request.GET.urlencode(),
                'log': log,
                'hadoop_jobs': _parse_out_hadoop_jobs(log),
                'query_context': query_context,
              })
Beispiel #29
0
def execute_directly(request, query, query_server=None, design=None, tablename=None,
                     on_success_url=None, on_success_params=None, **kwargs):
  """
  execute_directly(request, query_msg, tablename, design) -> HTTP response for execution

  This method wraps around dbms.execute_directly() to take care of the HTTP response
  after the execution.

    query
      The HQL model Query object.

    query_server
      To which Query Server to submit the query.
      Dictionary with keys: ['server_name', 'server_host', 'server_port'].

    design
      The design associated with the query.

    tablename
      The associated table name for the context.

    on_success_url
      Where to go after the query is done. The URL handler may expect an option "context" GET
      param. (See ``watch_query``.) For advanced usage, on_success_url can be a function, in
      which case the on complete URL is the return of:
        on_success_url(history_obj) -> URL string
      Defaults to the view results page.

    on_success_params
      Optional params to pass to the on_success_url (in additional to "context").

  Note that this may throw a Beeswax exception.
  """
  if design is not None:
    authorized_get_design(request, design.id)

  history_obj = dbms.get(request.user, query_server).execute_query(query, design)
  watch_url = reverse(get_app_name(request) + ':watch_query', kwargs={'id': history_obj.id})

  # Prepare the GET params for the watch_url
  get_dict = QueryDict(None, mutable=True)
  # (1) context
  if design:
    get_dict['context'] = make_query_context('design', design.id)
  elif tablename:
    get_dict['context'] = make_query_context('table', tablename)

  # (2) on_success_url
  if on_success_url:
    if callable(on_success_url):
      on_success_url = on_success_url(history_obj)
    get_dict['on_success_url'] = on_success_url

  # (3) misc
  if on_success_params:
    get_dict.update(on_success_params)

  return format_preserving_redirect(request, watch_url, get_dict)
Beispiel #30
0
def _save_results_ctas(request, query_history, target_table, result_meta):
  """
  Handle saving results as a new table. Returns HTTP response.
  May raise BeeswaxException, IOError.
  """
  query_server = query_history.get_query_server_config() # Query server requires DDL support
  db = dbms.get(request.user)

  # Case 1: The results are straight from an existing table
  if result_meta.in_tablename:
    hql = 'CREATE TABLE `%s` AS SELECT * FROM %s' % (target_table, result_meta.in_tablename)
    query = hql_query(hql)
    # Display the CTAS running. Could take a long time.
    return execute_directly(request, query, query_server, on_success_url=reverse(get_app_name(request) + ':show_tables'))

  # Case 2: The results are in some temporary location
  # 1. Create table
  cols = ''
  schema = result_meta.schema
  for i, field in enumerate(schema.fieldSchemas):
    if i != 0:
      cols += ',\n'
    cols += '`%s` %s' % (field.name, field.type)

  # The representation of the delimiter is messy.
  # It came from Java as a string, which might has been converted from an integer.
  # So it could be "1" (^A), or "10" (\n), or "," (a comma literally).
  delim = result_meta.delim
  if not delim.isdigit():
    delim = str(ord(delim))

  hql = '''
        CREATE TABLE `%s` (
        %s
        )
        ROW FORMAT DELIMITED
        FIELDS TERMINATED BY '\%s'
        STORED AS TextFile
        ''' % (target_table, cols, delim.zfill(3))

  query = hql_query(hql)
  db.execute_and_wait(query)

  try:
    # 2. Move the results into the table's storage
    table_obj = db.get_table('default', target_table)
    table_loc = request.fs.urlsplit(table_obj.path_location)[2]
    request.fs.rename_star(result_meta.table_dir, table_loc)
    LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc))
    messages.info(request, _('Saved query results as new table %(table)s') % {'table': target_table})
    query_history.save_state(models.QueryHistory.STATE.expired)
  except Exception, ex:
    LOG.error('Error moving data into storage of table %s. Will drop table.' % (target_table,))
    query = hql_query('DROP TABLE `%s`' % (target_table,))
    try:
      db.execute_directly(query)        # Don't wait for results
    except Exception, double_trouble:
      LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble))
Beispiel #31
0
def view_results(request, id, first_row=0):
  """
  Returns the view for the results of the QueryHistory with the given id.

  The query results MUST be ready.
  To display query results, one should always go through the execute_query view.
  If the result set has has_result_set=False, display an empty result.

  If ``first_row`` is 0, restarts (if necessary) the query read.  Otherwise, just
  spits out a warning if first_row doesn't match the servers conception.
  Multiple readers will produce a confusing interaction here, and that's known.

  It understands the ``context`` GET parameter. (See execute_query().)
  """
  first_row = int(first_row)
  start_over = (first_row == 0)
  results = type('Result', (object,), {
                'rows': 0,
                'columns': [],
                'has_more': False,
                'start_row': 0,
            })
  data = []
  fetch_error = False
  error_message = ''
  log = ''
  columns = []
  app_name = get_app_name(request)

  query_history = authorized_get_query_history(request, id, must_exist=True)
  query_server = query_history.get_query_server_config()
  db = dbms.get(request.user, query_server)

  handle, state = _get_query_handle_and_state(query_history)
  context_param = request.GET.get('context', '')
  query_context = parse_query_context(context_param)

  # Update the status as expired should not be accessible
  expired = state == models.QueryHistory.STATE.expired

  # Retrieve query results or use empty result if no result set
  try:
    if query_server['server_name'] == 'impala' and not handle.has_result_set:
      downloadable = False
    else:
      results = db.fetch(handle, start_over, 100)

      # Materialize and HTML escape results
      data = escape_rows(results.rows())

      # We display the "Download" button only when we know that there are results:
      downloadable = first_row > 0 or data
      log = db.get_log(handle)
      columns = results.data_table.cols()

  except Exception as ex:
    LOG.exception('error fetching results')

    fetch_error = True
    error_message, log = expand_exception(ex, db, handle)

  # Handle errors
  error = fetch_error or results is None or expired

  context = {
    'error': error,
    'message': error_message,
    'query': query_history,
    'results': data,
    'columns': columns,
    'expected_first_row': first_row,
    'log': log,
    'hadoop_jobs': app_name != 'impala' and parse_out_jobs(log),
    'query_context': query_context,
    'can_save': False,
    'context_param': context_param,
    'expired': expired,
    'app_name': app_name,
    'next_json_set': None,
    'is_finished': query_history.is_finished()
  }

  if not error:
    download_urls = {}
    if downloadable:
      for format in common.DL_FORMATS:
        download_urls[format] = reverse(app_name + ':download', kwargs=dict(id=str(id), format=format))

    results.start_row = first_row

    context.update({
      'id': id,
      'results': data,
      'has_more': results.has_more,
      'next_row': results.start_row + len(data),
      'start_row': results.start_row,
      'expected_first_row': first_row,
      'columns': columns,
      'download_urls': download_urls,
      'can_save': query_history.owner == request.user,
      'next_json_set':
        reverse(get_app_name(request) + ':view_results', kwargs={
            'id': str(id),
            'first_row': results.start_row + len(data)
          }
        )
        + ('?context=' + context_param or '') + '&format=json'
    })

  context['columns'] = massage_columns_for_json(columns)
  if 'save_form' in context:
    del context['save_form']
  if 'query' in context:
    del context['query']
  return JsonResponse(context)
Beispiel #32
0
def execute_query(request, design_id=None):
    """
  View function for executing an arbitrary query.
  It understands the optional GET/POST params:

    on_success_url
      If given, it will be displayed when the query is successfully finished.
      Otherwise, it will display the view query results page by default.
  """
    authorized_get_design(request, design_id)

    request.session['start_time'] = time.time(
    )  # FIXME: add job id to not intersect simultaneous jobs
    error_message = None
    form = QueryForm()
    action = request.path
    log = None
    app_name = get_app_name(request)
    query_type = SavedQuery.TYPES_MAPPING['beeswax']
    design = safe_get_design(request, query_type, design_id)
    on_success_url = request.REQUEST.get('on_success_url')

    query_server = get_query_server_config(app_name)
    db = dbms.get(request.user, query_server)
    databases = _get_db_choices(request)

    if request.method == 'POST':
        form.bind(request.POST)
        form.query.fields[
            'database'].choices = databases  # Could not do it in the form

        to_explain = request.POST.has_key('button-explain')
        to_submit = request.POST.has_key('button-submit')

        # Always validate the saveform, which will tell us whether it needs explicit saving
        if form.is_valid():
            to_save = form.saveform.cleaned_data['save']
            to_saveas = form.saveform.cleaned_data['saveas']

            if to_save or to_saveas:
                if 'beeswax-autosave' in request.session:
                    del request.session['beeswax-autosave']

            if to_saveas and not design.is_auto:
                # Save As only affects a previously saved query
                design = design.clone()

            if to_submit or to_save or to_saveas or to_explain:
                explicit_save = to_save or to_saveas
                design = save_design(request, form, query_type, design,
                                     explicit_save)
                action = urlresolvers.reverse(app_name + ':execute_query',
                                              kwargs=dict(design_id=design.id))

            if to_explain or to_submit:
                query_str = form.query.cleaned_data["query"]

                if conf.CHECK_PARTITION_CLAUSE_IN_QUERY.get():
                    query_str = _strip_trailing_semicolon(query_str)
                    # check query. if a select query on partitioned table without partition keys,
                    # intercept it and raise a PopupException.
                    _check_partition_clause_in_query(
                        form.query.cleaned_data.get('database', None),
                        query_str, db)

                # (Optional) Parameterization.
                parameterization = get_parameterization(
                    request, query_str, form, design, to_explain)
                if parameterization:
                    return parameterization

                try:
                    query = HQLdesign(form, query_type=query_type)
                    if to_explain:
                        return explain_directly(request, query, design,
                                                query_server)
                    else:
                        download = request.POST.has_key('download')

                        download_format = form.query.cleaned_data.get(
                            'download_format', None)
                        if not download_format: download_format = None
                        if download_format in common.DL_FORMATS:
                            request.session['dl_status'] = True

                        return execute_directly(
                            request,
                            query,
                            query_server,
                            design,
                            on_success_url=on_success_url,
                            download_format=download_format,
                            download=download)
                except QueryServerException, ex:
                    error_message, log = expand_exception(ex, db)
Beispiel #33
0
def watch_query(request, id, download_format=None):
    """
    Wait for the query to finish and (by default) displays the results of query id.
    It understands the optional GET params:

      on_success_url
        If given, it will be displayed when the query is successfully finished.
        Otherwise, it will display the view query results page by default.

      context
        A string of "name:data" that describes the context
        that generated this query result. It may be:
          - "table":"<table_name>"
          - "design":<design_id>

    All other GET params will be passed to on_success_url (if present).
    """
    # Coerce types; manage arguments
    query_history = authorized_get_history(request, id, must_exist=True)
    db = dbms.get(request.user, query_history.get_query_server_config())

    # GET param: context.
    context_param = request.GET.get('context', '')

    # GET param: on_success_url. Default to view_results
    if request.session.get(
            'dl_status',
            False) == False and download_format in common.DL_FORMATS:
        results_url = urlresolvers.reverse(
            get_app_name(request) + ':execute_query')
    else:
        results_url = urlresolvers.reverse(get_app_name(request) +
                                           ':view_results',
                                           kwargs={
                                               'id': id,
                                               'first_row': 0
                                           })
    if request.GET.get('download', ''):
        results_url += '?download=true'
    on_success_url = request.GET.get('on_success_url')
    if not on_success_url:
        on_success_url = results_url

    # Go to next statement if asked to continue or when a statement with no dataset finished.
    if request.method == 'POST' or (not query_history.is_finished()
                                    and query_history.is_success()
                                    and not query_history.has_results):
        try:
            query_history = db.execute_next_statement(query_history)
        except Exception:
            pass

    # Check query state
    handle, state = _get_query_handle_and_state(query_history)
    query_history.save_state(state)

    if query_history.is_failure():
        # When we fetch, Beeswax server will throw us a BeeswaxException, which has the
        # log we want to display.
        return format_preserving_redirect(request, results_url, request.GET)
    elif query_history.is_finished() or (query_history.is_success()
                                         and query_history.has_results):
        if request.session.get('dl_status', False):  # BUG-20020
            on_success_url = urlresolvers.reverse(
                get_app_name(request) + ':download',
                kwargs=dict(id=str(id), format=download_format))
        _clean_session(request)
        return format_preserving_redirect(request, on_success_url, request.GET)

    # Still running
    log = db.get_log(handle)

    # Keep waiting
    # - Translate context into something more meaningful (type, data)
    query_context = _parse_query_context(context_param)

    return render(
        'watch_wait.mako',
        request,
        {
            'query': query_history,
            'fwd_params': request.GET.urlencode(),
            'log': log,
            'hadoop_jobs': _parse_out_hadoop_jobs(log)[0],
            'query_context': query_context,
            'download_format': download_format,  ## ExpV
        })
Beispiel #34
0
def watch_query_refresh_json(request, id):
    query_history = authorized_get_query_history(request, id, must_exist=True)
    db = dbms.get(request.user, query_history.get_query_server_config())

    if not request.POST.get(
            'next'
    ):  # We need this as multi query would fail as current query is closed
        handle, state = _get_query_handle_and_state(query_history)
        query_history.save_state(state)

    # Go to next statement if asked to continue or when a statement with no dataset finished.
    try:
        if request.POST.get('next') or (not query_history.is_finished()
                                        and query_history.is_success()
                                        and not query_history.has_results):
            close_operation(request, id)
            query_history = db.execute_next_statement(
                query_history, request.POST.get('query-query'))
            handle, state = _get_query_handle_and_state(query_history)
    except QueryServerException as ex:
        raise ex
    except Exception as ex:
        LOG.exception(ex)
        handle, state = _get_query_handle_and_state(query_history)

    try:
        start_over = request.POST.get('log-start-over') == 'true'
        log = db.get_log(handle, start_over=start_over)
    except Exception as ex:
        log = str(ex)

    jobs = parse_out_jobs(log)
    job_urls = massage_job_urls_for_json(jobs)

    result = {
        'status':
        -1,
        'log':
        log,
        'jobs':
        jobs,
        'jobUrls':
        job_urls,
        'isSuccess':
        query_history.is_success(),
        'isFailure':
        query_history.is_failure(),
        'id':
        id,
        'statement':
        query_history.get_current_statement(),
        'watch_url':
        reverse(get_app_name(request) + ':api_watch_query_refresh_json',
                kwargs={'id': query_history.id}),
        'oldLogsApi':
        USE_GET_LOG_API.get()
    }

    # Run time error
    if query_history.is_failure():
        res = db.get_operation_status(handle)
        if query_history.is_canceled(res):
            result['status'] = 0
        elif hasattr(res, 'errorMessage') and res.errorMessage:
            result['message'] = res.errorMessage
        else:
            result['message'] = _('Bad status for request %s:\n%s') % (id, res)
    else:
        result['status'] = 0

    return JsonResponse(result)
Beispiel #35
0
  except Exception, ex:
    log = str(ex)

  jobs = _parse_out_hadoop_jobs(log)
  job_urls = massage_job_urls_for_json(jobs)

  result = {
    'status': -1,
    'log': log,
    'jobs': jobs,
    'jobUrls': job_urls,
    'isSuccess': query_history.is_success(),
    'isFailure': query_history.is_failure(),
    'id': id,
    'statement': query_history.get_current_statement(),
    'watch_url': reverse(get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': query_history.id}),
    'oldLogsApi': USE_GET_LOG_API.get()
  }

  # Run time error
  if query_history.is_failure():
    res = db.get_operation_status(handle)
    if query_history.is_canceled(res):
      result['status'] = 0
    elif hasattr(res, 'errorMessage') and res.errorMessage:
      result['message'] = res.errorMessage
    else:
      result['message'] = _('Bad status for request %s:\n%s') % (id, res)
  else:
    result['status'] = 0
Beispiel #36
0
                else:
                    action = 'watch-results'
            else:
                action = 'editor-results'
        except QueryServerException, e:
            if 'Invalid query handle' in e.message or 'Invalid OperationHandle' in e.message:
                query_history.save_state(QueryHistory.STATE.expired)
                LOG.warn("Invalid query handle", exc_info=sys.exc_info())
                action = 'editor-expired-results'
            else:
                raise e
    else:
        # Check perms.
        authorized_get_design(request, design_id)

        app_name = get_app_name(request)
        query_type = SavedQuery.TYPES_MAPPING[app_name]
        design = safe_get_design(request, query_type, design_id)
        query_history = None

    doc = design and design.id and design.doc.get()
    context = {
        'design':
        design,
        'query':
        query_history,  # Backward
        'query_history':
        query_history,
        'autocomplete_base_url':
        reverse(get_app_name(request) + ':api_autocomplete_databases',
                kwargs={}),
Beispiel #37
0
def view_results(request, id, first_row=0):
    """
  Returns the view for the results of the QueryHistory with the given id.

  The query results MUST be ready.
  To display query results, one should always go through the execute_query view.
  If the result set has has_result_set=False, display an empty result.

  If ``first_row`` is 0, restarts (if necessary) the query read.  Otherwise, just
  spits out a warning if first_row doesn't match the servers conception.
  Multiple readers will produce a confusing interaction here, and that's known.

  It understands the ``context`` GET parameter. (See execute_query().)
  """
    first_row = long(first_row)
    start_over = (first_row == 0)
    results = type('Result', (object, ), {
        'rows': 0,
        'columns': [],
        'has_more': False,
        'start_row': 0,
    })
    data = []
    fetch_error = False
    error_message = ''
    log = ''
    columns = []
    app_name = get_app_name(request)

    query_history = authorized_get_query_history(request, id, must_exist=True)
    query_server = query_history.get_query_server_config()
    db = dbms.get(request.user, query_server)

    handle, state = _get_query_handle_and_state(query_history)
    context_param = request.GET.get('context', '')
    query_context = parse_query_context(context_param)

    # Update the status as expired should not be accessible
    expired = state == models.QueryHistory.STATE.expired

    # Retrieve query results or use empty result if no result set
    try:
        if query_server[
                'server_name'] == 'impala' and not handle.has_result_set:
            downloadable = False
        else:
            results = db.fetch(handle, start_over, 100)

            # Materialize and HTML escape results
            data = escape_rows(results.rows())

            # We display the "Download" button only when we know that there are results:
            downloadable = first_row > 0 or data
            log = db.get_log(handle)
            columns = results.data_table.cols()

    except Exception, ex:
        LOG.exception('error fetching results')

        fetch_error = True
        error_message, log = expand_exception(ex, db, handle)
Beispiel #38
0
    def process_view(self, request, view_func, view_args, view_kwargs):
        """
    We also perform access logging in ``process_view()`` since we have the view function,
    which tells us the log level. The downside is that we don't have the status code,
    which isn't useful for status logging anyways.
    """
        request.ts = time.time()
        request.view_func = view_func
        access_log_level = getattr(view_func, 'access_log_level', None)

        # Skip loop for oidc
        if request.path in [
                '/oidc/authenticate/', '/oidc/callback/', '/oidc/logout/',
                '/hue/oidc_failed/'
        ]:
            return None

        # Skip views not requiring login

        # If the view has "opted out" of login required, skip
        if hasattr(view_func, "login_notrequired"):
            log_page_hit(request,
                         view_func,
                         level=access_log_level or logging.DEBUG)
            return None

        # There are certain django views which are also opt-out, but
        # it would be evil to go add attributes to them
        if view_func in DJANGO_VIEW_AUTH_WHITELIST:
            log_page_hit(request,
                         view_func,
                         level=access_log_level or logging.DEBUG)
            return None

        # If user is logged in, check that he has permissions to access the app
        if request.user.is_active and request.user.is_authenticated:
            AppSpecificMiddleware.augment_request_with_app(request, view_func)

            # Until Django 1.3 which resolves returning the URL name, just do a match of the name of the view
            try:
                access_view = 'access_view:%s:%s' % (
                    request._desktop_app, resolve(request.path)[0].__name__)
            except Exception as e:
                access_log(request,
                           'error checking view perm: %s' % e,
                           level=access_log_level)
                access_view = ''

            app_accessed = request._desktop_app
            app_libs_whitelist = [
                "desktop", "home", "home2", "about", "hue", "editor",
                "notebook", "indexer", "404", "500", "403"
            ]
            if has_connectors():
                app_libs_whitelist.append('metadata')
                if DASHBOARD_ENABLED.get():
                    app_libs_whitelist.append('dashboard')
            # Accessing an app can access an underlying other app.
            # e.g. impala or spark uses code from beeswax and so accessing impala shows up as beeswax here.
            # Here we trust the URL to be the real app we need to check the perms.
            ui_app_accessed = get_app_name(request)
            if app_accessed != ui_app_accessed and ui_app_accessed not in (
                    'logs', 'accounts', 'login'):
                app_accessed = ui_app_accessed

            if app_accessed and \
                app_accessed not in app_libs_whitelist and \
                not (
                    is_admin(request.user) or
                    request.user.has_hue_permission(action="access", app=app_accessed) or
                    request.user.has_hue_permission(action=access_view, app=app_accessed)
                ) and \
                not (app_accessed == '__debug__' and DJANGO_DEBUG_MODE.get()):
                access_log(request,
                           'permission denied',
                           level=access_log_level)
                return PopupException(_(
                    "You do not have permission to access the %(app_name)s application."
                ) % {
                    'app_name': app_accessed.capitalize()
                },
                                      error_code=401).response(request)
            else:
                if not hasattr(request, 'view_func'):
                    log_page_hit(request, view_func, level=access_log_level)
                return None

        if desktop.conf.CORS_ENABLED.get():
            user = authenticate(request, username='******', password='******')
            if user is not None:
                login(request, user)
                return None

        logging.info("Redirecting to login page: %s", request.get_full_path())
        access_log(request, 'login redirection', level=access_log_level)
        no_idle_backends = ("libsaml.backend.SAML2Backend",
                            "desktop.auth.backend.SpnegoDjangoBackend",
                            "desktop.auth.backend.KnoxSpnegoDjangoBackend")
        if request.ajax and all(no_idle_backend not in AUTH.BACKEND.get()
                                for no_idle_backend in no_idle_backends):
            # Send back a magic header which causes Hue.Request to interpose itself
            # in the ajax request and make the user login before resubmitting the
            # request.
            response = HttpResponse("/* login required */",
                                    content_type="text/javascript")
            response[MIDDLEWARE_HEADER] = 'LOGIN_REQUIRED'
            return response
        else:
            if request.GET.get('is_embeddable'):
                return JsonResponse(
                    {
                        'url':
                        "%s?%s=%s" %
                        (settings.LOGIN_URL, REDIRECT_FIELD_NAME,
                         quote('/hue' + request.get_full_path().replace(
                             'is_embeddable=true', '').replace('&&', '&')))
                    }
                )  # Remove embeddable so redirect from & to login works. Login page is not embeddable
            else:
                return HttpResponseRedirect(
                    "%s?%s=%s" % (settings.LOGIN_URL, REDIRECT_FIELD_NAME,
                                  quote(request.get_full_path())))
Beispiel #39
0
        else:
          action = 'watch-results'
      else:
        action = 'editor-results'
    except QueryServerException, e:
      if 'Invalid query handle' in e.message or 'Invalid OperationHandle' in e.message:
        query_history.save_state(QueryHistory.STATE.expired)
        LOG.warn("Invalid query handle", exc_info=sys.exc_info())
        action = 'editor-expired-results'
      else:
        raise e
  else:
    # Check perms.
    authorized_get_design(request, design_id)

    app_name = get_app_name(request)
    query_type = SavedQuery.TYPES_MAPPING[app_name]
    design = safe_get_design(request, query_type, design_id)
    query_history = None

  context = {
    'design': design,
    'query': query_history, # Backward
    'query_history': query_history,
    'autocomplete_base_url': reverse(get_app_name(request) + ':api_autocomplete_databases', kwargs={}),
    'autocomplete_base_url_hive': reverse('beeswax:api_autocomplete_databases', kwargs={}),
    'can_edit_name': design and design.id and not design.is_auto,
    'can_edit': design and design.id and design.doc.get().can_write(request.user),
    'action': action,
    'on_success_url': request.GET.get('on_success_url'),
    'has_metastore': 'metastore' in get_apps_dict(request.user)
Beispiel #40
0
def execute_query(request, design_id=None):
  """
  View function for executing an arbitrary query.
  It understands the optional GET/POST params:

    on_success_url
      If given, it will be displayed when the query is successfully finished.
      Otherwise, it will display the view query results page by default.
  """
  authorized_get_design(request, design_id)

  error_message = None
  form = QueryForm()
  action = request.path
  log = None
  app_name = get_app_name(request)
  query_type = SavedQuery.TYPES_MAPPING[app_name]
  design = safe_get_design(request, query_type, design_id)
  on_success_url = request.REQUEST.get('on_success_url')

  query_server = get_query_server_config(app_name)
  db = dbms.get(request.user, query_server)
  databases = _get_db_choices(request)

  if request.method == 'POST':
    form.bind(request.POST)
    form.query.fields['database'].choices =  databases # Could not do it in the form

    to_explain = request.POST.has_key('button-explain')
    to_submit = request.POST.has_key('button-submit')

    # Always validate the saveform, which will tell us whether it needs explicit saving
    if form.is_valid():
      to_save = form.saveform.cleaned_data['save']
      to_saveas = form.saveform.cleaned_data['saveas']

      if to_saveas and not design.is_auto:
        # Save As only affects a previously saved query
        design = design.clone()

      if to_submit or to_save or to_saveas or to_explain:
        explicit_save = to_save or to_saveas
        design = save_design(request, form, query_type, design, explicit_save)
        action = reverse(app_name + ':execute_query', kwargs=dict(design_id=design.id))

      if to_explain or to_submit:
        query_str = form.query.cleaned_data["query"]

        # (Optional) Parameterization.
        parameterization = get_parameterization(request, query_str, form, design, to_explain)
        if parameterization:
          return parameterization

        try:
          query = HQLdesign(form, query_type=query_type)
          if to_explain:
            return explain_directly(request, query, design, query_server)
          else:
            download = request.POST.has_key('download')
            return execute_directly(request, query, query_server, design, on_success_url=on_success_url, download=download)
        except Exception, ex:
          error_message, log = expand_exception(ex, db)
Beispiel #41
0
def _get_db_choices(request):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  db = dbms.get(request.user, query_server)
  dbs = db.get_databases()
  return ((db, db) for db in dbs)
Beispiel #42
0
    def process_view(self, request, view_func, view_args, view_kwargs):
        """
    We also perform access logging in ``process_view()`` since we have the view function,
    which tells us the log level. The downside is that we don't have the status code,
    which isn't useful for status logging anyways.
    """
        access_log_level = getattr(view_func, 'access_log_level', None)
        # First, skip views not requiring login

        # If the view has "opted out" of login required, skip
        if hasattr(view_func, "login_notrequired"):
            log_page_hit(request,
                         view_func,
                         level=access_log_level or logging.DEBUG)
            return None

        # There are certain django views which are also opt-out, but
        # it would be evil to go add attributes to them
        if view_func in DJANGO_VIEW_AUTH_WHITELIST:
            log_page_hit(request,
                         view_func,
                         level=access_log_level or logging.DEBUG)
            return None

        # If user is logged in, check that he has permissions to access the
        # app.
        if request.user.is_active and request.user.is_authenticated():
            AppSpecificMiddleware.augment_request_with_app(request, view_func)

            # Until we get Django 1.3 and resolve returning the URL name, we just do a match of the name of the view
            try:
                access_view = 'access_view:%s:%s' % (
                    request._desktop_app, resolve(request.path)[0].__name__)
            except Exception, e:
                access_log(request,
                           'error checking view perm: %s',
                           e,
                           level=access_log_level)
                access_view = ''

            # Accessing an app can access an underlying other app.
            # e.g. impala or spark uses code from beeswax and so accessing impala shows up as beeswax here.
            # Here we trust the URL to be the real app we need to check the perms.
            app_accessed = request._desktop_app
            ui_app_accessed = get_app_name(request)
            if app_accessed != ui_app_accessed and ui_app_accessed not in (
                    'logs', 'accounts', 'login'):
                app_accessed = ui_app_accessed

            if app_accessed and \
                app_accessed not in ("desktop", "home", "about") and \
                not (request.user.has_hue_permission(action="access", app=app_accessed) or
                     request.user.has_hue_permission(action=access_view, app=app_accessed)):
                access_log(request,
                           'permission denied',
                           level=access_log_level)
                return PopupException(_(
                    "You do not have permission to access the %(app_name)s application."
                ) % {
                    'app_name': app_accessed.capitalize()
                },
                                      error_code=401).response(request)
            else:
                log_page_hit(request, view_func, level=access_log_level)
                return None
Beispiel #43
0
def execute_query(request, design_id=None, query_history_id=None):
    """
  View function for executing an arbitrary query.
  """
    action = 'query'

    if query_history_id:
        query_history = authorized_get_query_history(request,
                                                     query_history_id,
                                                     must_exist=True)
        design = query_history.design

        try:
            if query_history.server_id and query_history.server_guid:
                handle, state = _get_query_handle_and_state(query_history)

            if 'on_success_url' in request.GET:
                if request.GET.get('on_success_url') and any([
                        regexp.match(request.GET.get('on_success_url'))
                        for regexp in REDIRECT_WHITELIST.get()
                ]):
                    action = 'watch-redirect'
                else:
                    action = 'watch-results'
            else:
                action = 'editor-results'
        except QueryServerException as e:
            if 'Invalid query handle' in e.message or 'Invalid OperationHandle' in e.message:
                query_history.save_state(QueryHistory.STATE.expired)
                LOG.warn("Invalid query handle", exc_info=sys.exc_info())
                action = 'editor-expired-results'
            else:
                raise e
    else:
        # Check perms.
        authorized_get_design(request, design_id)

        app_name = get_app_name(request)
        query_type = SavedQuery.TYPES_MAPPING[app_name]
        design = safe_get_design(request, query_type, design_id)
        query_history = None

    current_app, other_apps, apps_list = _get_apps(request.user, '')
    doc = design and design.id and design.doc.get()
    context = {
        'design':
        design,
        'apps':
        apps_list,
        'query':
        query_history,  # Backward
        'query_history':
        query_history,
        'autocomplete_base_url':
        reverse(get_app_name(request) + ':api_autocomplete_databases',
                kwargs={}),
        'autocomplete_base_url_hive':
        reverse('beeswax:api_autocomplete_databases', kwargs={}),
        'can_edit_name':
        design and design.id and not design.is_auto,
        'doc_id':
        doc and doc.id or -1,
        'can_edit':
        doc and doc.can_write(request.user),
        'action':
        action,
        'on_success_url':
        request.GET.get('on_success_url'),
        'has_metastore':
        'metastore' in get_apps_dict(request.user)
    }

    return render('execute.mako', request, context)
Beispiel #44
0
def import_wizard(request, database='default'):
  """
  Help users define table and based on a file they want to import to Hive.
  Limitations:
    - Rows are delimited (no serde).
    - No detection for map and array types.
    - No detection for the presence of column header in the first row.
    - No partition table.
    - Does not work with binary data.
  """
  encoding = i18n.get_site_encoding()
  app_name = get_app_name(request)

  if request.method == 'POST':
    # Have a while loop to allow an easy way to break
    for _ in range(1):
      #
      # General processing logic:
      # - We have 3 steps. Each requires the previous.
      #   * Step 1      : Table name and file location
      #   * Step 2a     : Display sample with auto chosen delim
      #   * Step 2b     : Display sample with user chosen delim (if user chooses one)
      #   * Step 3      : Display sample, and define columns
      # - Each step is represented by a different form. The form of an earlier step
      #   should be present when submitting to a later step.
      # - To preserve the data from the earlier steps, we send the forms back as
      #   hidden fields. This way, when users revisit a previous step, the data would
      #   be there as well.
      #
      delim_is_auto = False
      fields_list, n_cols = [ [] ], 0
      s3_col_formset = None

      # Everything requires a valid file form
      db = dbms.get(request.user)
      s1_file_form = CreateByImportFileForm(request.POST, db=db)
      if not s1_file_form.is_valid():
        break

      do_s2_auto_delim = request.POST.get('submit_file')        # Step 1 -> 2
      do_s2_user_delim = request.POST.get('submit_preview')     # Step 2 -> 2
      do_s3_column_def = request.POST.get('submit_delim')       # Step 2 -> 3
      do_hive_create = request.POST.get('submit_create')        # Step 3 -> execute

      cancel_s2_user_delim = request.POST.get('cancel_delim')   # Step 2 -> 1
      cancel_s3_column_def = request.POST.get('cancel_create')  # Step 3 -> 2

      # Exactly one of these should be True
      assert len(filter(None, (do_s2_auto_delim,
                               do_s2_user_delim,
                               do_s3_column_def,
                               do_hive_create,
                               cancel_s2_user_delim,
                               cancel_s3_column_def))) == 1, 'Invalid form submission'

      #
      # Fix up what we should do in case any form is invalid
      #
      if not do_s2_auto_delim:
        # We should have a valid delim form
        s2_delim_form = CreateByImportDelimForm(request.POST)
        if not s2_delim_form.is_valid():
          # Go back to picking delimiter
          do_s2_user_delim, do_s3_column_def, do_hive_create = True, False, False

      if do_hive_create:
        # We should have a valid columns formset
        s3_col_formset = ColumnTypeFormSet(prefix='cols', data=request.POST)
        if not s3_col_formset.is_valid():
          # Go back to define columns
          do_s3_column_def, do_hive_create = True, False

      #
      # Go to step 2: We've just picked the file. Preview it.
      #
      if do_s2_auto_delim:
        delim_is_auto = True
        fields_list, n_cols, s2_delim_form = _delim_preview(
                                              request.fs,
                                              s1_file_form,
                                              encoding,
                                              [ reader.TYPE for reader in FILE_READERS ],
                                              DELIMITERS)

      if (do_s2_user_delim or do_s3_column_def or cancel_s3_column_def) and s2_delim_form.is_valid():
        # Delimit based on input
        fields_list, n_cols, s2_delim_form = _delim_preview(
                                              request.fs,
                                              s1_file_form,
                                              encoding,
                                              (s2_delim_form.cleaned_data['file_type'],),
                                              (s2_delim_form.cleaned_data['delimiter'],))

      if do_s2_auto_delim or do_s2_user_delim or cancel_s3_column_def:
        return render('choose_delimiter.mako', request, {
          'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
          'delim_readable': DELIMITER_READABLE.get(s2_delim_form['delimiter'].data[0], s2_delim_form['delimiter'].data[1]),
          'initial': delim_is_auto,
          'file_form': s1_file_form,
          'delim_form': s2_delim_form,
          'fields_list': fields_list,
          'delimiter_choices': TERMINATOR_CHOICES,
          'n_cols': n_cols,
          'database': database,
        })

      #
      # Go to step 3: Define column.
      #
      if do_s3_column_def:
        if s3_col_formset is None:
          columns = []
          for i in range(n_cols):
            columns.append(dict(
                column_name='col_%s' % (i,),
                column_type='string',
            ))
          s3_col_formset = ColumnTypeFormSet(prefix='cols', initial=columns)
        return render('define_columns.mako', request, {
          'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
          'file_form': s1_file_form,
          'delim_form': s2_delim_form,
          'column_formset': s3_col_formset,
          'fields_list': fields_list,
          'n_cols': n_cols,
           'database': database,
        })

      #
      # Finale: Execute
      #
      if do_hive_create:
        delim = s2_delim_form.cleaned_data['delimiter']
        table_name = s1_file_form.cleaned_data['name']
        proposed_query = django_mako.render_to_string("create_table_statement.mako", {
            'table': dict(name=table_name,
                          comment=s1_file_form.cleaned_data['comment'],
                          row_format='Delimited',
                          field_terminator=delim),
            'columns': [ f.cleaned_data for f in s3_col_formset.forms ],
            'partition_columns': [],
            'database': database,
          }
        )

        do_load_data = s1_file_form.cleaned_data.get('do_import')
        path = s1_file_form.cleaned_data['path']
        return _submit_create_and_load(request, proposed_query, table_name, path, do_load_data, database=database)
  else:
    s1_file_form = CreateByImportFileForm()

  return render('choose_file.mako', request, {
    'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
    'file_form': s1_file_form,
    'database': database,
  })
Beispiel #45
0
def save_results_hive_table(request, query_history_id):
    """
  Save the results of a query to a hive table.

  Rerun the query.
  """
    response = {'status': 0, 'message': ''}

    query_history = authorized_get_query_history(request,
                                                 query_history_id,
                                                 must_exist=True)
    server_id, state = _get_query_handle_and_state(query_history)
    query_history.save_state(state)
    error_msg, log = None, None

    if request.method != 'POST':
        response['message'] = _('A POST request is required.')
    else:
        if not query_history.is_success():
            response['message'] = _(
                'This query is %(state)s. Results unavailable.') % {
                    'state': state
                }
            response['status'] = -1
            return JsonResponse(response)

        db = dbms.get(request.user, query_history.get_query_server_config())
        database = query_history.design.get_design().query.get(
            'database', 'default')
        form = beeswax.forms.SaveResultsTableForm(
            {'target_table': request.POST.get('table')},
            db=db,
            database=database)

        if form.is_valid():
            try:
                handle, state = _get_query_handle_and_state(query_history)
                result_meta = db.get_results_metadata(handle)
            except Exception, ex:
                response['message'] = _(
                    'Cannot find query handle and state: %s') % str(
                        query_history)
                response['status'] = -2
                return JsonResponse(response)

            try:
                query_history = db.create_table_as_a_select(
                    request, query_history, form.target_database,
                    form.cleaned_data['target_table'], result_meta)
                response['id'] = query_history.id
                response['query'] = query_history.query
                response['type'] = 'hive-table'
                response['path'] = form.cleaned_data['target_table']
                response['success_url'] = reverse(
                    'metastore:describe_table',
                    kwargs={
                        'database': form.target_database,
                        'table': form.cleaned_data['target_table']
                    })
                response['watch_url'] = reverse(
                    get_app_name(request) + ':api_watch_query_refresh_json',
                    kwargs={'id': query_history.id})
            except Exception, ex:
                error_msg, log = expand_exception(ex, db)
                response['message'] = _(
                    'The result could not be saved: %s.') % error_msg
                response['status'] = -3
Beispiel #46
0
def save_results_hdfs_file(request, query_history_id):
    """
  Save the results of a query to an HDFS file.

  Do not rerun the query.
  """
    response = {'status': 0, 'message': ''}

    query_history = authorized_get_query_history(request,
                                                 query_history_id,
                                                 must_exist=True)
    server_id, state = _get_query_handle_and_state(query_history)
    query_history.save_state(state)
    error_msg, log = None, None

    if request.method != 'POST':
        response['message'] = _('A POST request is required.')
    else:
        if not query_history.is_success():
            response['message'] = _(
                'This query is %(state)s. Results unavailable.') % {
                    'state': state
                }
            response['status'] = -1
            return JsonResponse(response)

        db = dbms.get(request.user, query_history.get_query_server_config())

        form = beeswax.forms.SaveResultsFileForm({
            'target_file':
            request.POST.get('path'),
            'overwrite':
            request.POST.get('overwrite', False),
        })

        if form.is_valid():
            target_file = form.cleaned_data['target_file']
            overwrite = form.cleaned_data['overwrite']

            try:
                handle, state = _get_query_handle_and_state(query_history)
            except Exception, ex:
                response['message'] = _(
                    'Cannot find query handle and state: %s') % str(
                        query_history)
                response['status'] = -2
                return JsonResponse(response)

            try:
                if overwrite and request.fs.exists(target_file):
                    if request.fs.isfile(target_file):
                        request.fs.do_as_user(request.user.username,
                                              request.fs.rmtree, target_file)
                    else:
                        raise PopupException(
                            _("The target path is a directory"))

                upload(target_file, handle, request.user, db, request.fs)

                response['type'] = 'hdfs-file'
                response['id'] = query_history.id
                response['query'] = query_history.query
                response['path'] = target_file
                response['success_url'] = '/filebrowser/view%s' % target_file
                response['watch_url'] = reverse(
                    get_app_name(request) + ':api_watch_query_refresh_json',
                    kwargs={'id': query_history.id})
            except Exception, ex:
                error_msg, log = expand_exception(ex, db)
                response['message'] = _(
                    'The result could not be saved: %s.') % error_msg
                response['status'] = -3
Beispiel #47
0
def execute(request, design_id=None):
    response = {'status': -1, 'message': ''}

    if request.method != 'POST':
        response['message'] = _('A POST request is required.')

    app_name = get_app_name(request)
    query_server = get_query_server_config(app_name)
    query_type = beeswax.models.SavedQuery.TYPES_MAPPING[app_name]
    design = safe_get_design(request, query_type, design_id)

    try:
        query_form = get_query_form(request)

        if query_form.is_valid():
            query_str = query_form.query.cleaned_data["query"]
            explain = request.GET.get('explain', 'false').lower() == 'true'
            design = save_design(request, query_form, query_type, design,
                                 False)

            if query_form.query.cleaned_data['is_parameterized']:
                # Parameterized query
                parameterization_form_cls = make_parameterization_form(
                    query_str)
                if parameterization_form_cls:
                    parameterization_form = parameterization_form_cls(
                        request.REQUEST, prefix="parameterization")

                    if parameterization_form.is_valid():
                        parameters = parameterization_form.cleaned_data
                        real_query = substitute_variables(
                            query_str, parameters)
                        query = HQLdesign(query_form, query_type=query_type)
                        query._data_dict['query']['query'] = real_query

                        try:
                            if explain:
                                return explain_directly(
                                    request, query_server, query)
                            else:
                                return execute_directly(request,
                                                        query,
                                                        design,
                                                        query_server,
                                                        parameters=parameters)

                        except Exception, ex:
                            db = dbms.get(request.user, query_server)
                            error_message, log = expand_exception(ex, db)
                            response['message'] = error_message
                            return JsonResponse(response)
                    else:
                        response['errors'] = parameterization_form.errors
                        return JsonResponse(response)

            # Non-parameterized query
            query = HQLdesign(query_form, query_type=query_type)
            if request.GET.get('explain', 'false').lower() == 'true':
                return explain_directly(request, query_server, query)
            else:
                return execute_directly(request, query, design, query_server)
        else:
Beispiel #48
0
        'log':
        log,
        'jobs':
        jobs,
        'jobUrls':
        job_urls,
        'isSuccess':
        query_history.is_success(),
        'isFailure':
        query_history.is_failure(),
        'id':
        id,
        'statement':
        query_history.get_current_statement(),
        'watch_url':
        reverse(get_app_name(request) + ':api_watch_query_refresh_json',
                kwargs={'id': query_history.id}),
        'oldLogsApi':
        USE_GET_LOG_API.get()
    }

    # Run time error
    if query_history.is_failure():
        res = db.get_operation_status(handle)
        if query_history.is_canceled(res):
            result['status'] = 0
        elif hasattr(res, 'errorMessage') and res.errorMessage:
            result['message'] = res.errorMessage
        else:
            result['message'] = _('Bad status for request %s:\n%s') % (id, res)
    else:
Beispiel #49
0
        download_urls[format] = reverse(app_name + ':download', kwargs=dict(id=str(id), format=format))

    save_form = beeswax.forms.SaveResultsForm()
    results.start_row = first_row

    context.update({
      'results': data,
      'has_more': results.has_more,
      'next_row': results.start_row + len(data),
      'start_row': results.start_row,
      'expected_first_row': first_row,
      'columns': results.columns,
      'download_urls': download_urls,
      'save_form': save_form,
      'can_save': query_history.owner == request.user and not download,
      'next_json_set': reverse(get_app_name(request) + ':view_results', kwargs={
        'id': str(id),
        'first_row': results.start_row + len(data)
      }) + ('?context=' + context_param or '') + '&format=json'
    })

  if request.GET.get('format') == 'json':
    context = {
      'results': data,
      'has_more': results.has_more,
      'next_row': results.start_row + len(data),
      'start_row': results.start_row,
      'next_json_set': reverse(get_app_name(request) + ':view_results', kwargs={
        'id': str(id),
        'first_row': results.start_row + len(data)
      }) + ('?context=' + context_param or '') + '&format=json'
Beispiel #50
0
def describe_table(request, database, table):
    app_name = get_app_name(request)
    cluster = json.loads(request.POST.get('cluster', '{}'))
    source_type = request.POST.get('source_type',
                                   request.GET.get('source_type', 'hive'))
    db = _get_db(user=request.user, source_type=source_type, cluster=cluster)

    try:
        table = db.get_table(database, table)
    except Exception as e:
        LOG.exception("Describe table error")
        raise PopupException(
            _("DB Error"),
            detail=e.message if hasattr(e, 'message') and e.message else e)

    if request.POST.get("format", "html") == "json":
        return JsonResponse({
            'status':
            0,
            'name':
            table.name,
            'partition_keys': [{
                'name': part.name,
                'type': part.type
            } for part in table.partition_keys],
            'cols': [{
                'name': col.name,
                'type': col.type,
                'comment': col.comment
            } for col in table.cols],
            'path_location':
            table.path_location,
            'hdfs_link':
            table.hdfs_link,
            'comment':
            table.comment,
            'is_view':
            table.is_view,
            'properties':
            table.properties,
            'details':
            table.details,
            'stats':
            table.stats
        })
    else:  # Render HTML
        renderable = "metastore.mako"
        apps_list = _get_apps(request.user, '')

        partitions = None
        if app_name != 'impala' and table.partition_keys:
            try:
                partitions = [
                    _massage_partition(database, table, partition)
                    for partition in db.get_partitions(database, table)
                ]
            except:
                LOG.exception('Table partitions could not be retrieved')

        return render(
            renderable, request, {
                'apps':
                apps_list,
                'breadcrumbs': [
                    {
                        'name':
                        database,
                        'url':
                        reverse('metastore:show_tables',
                                kwargs={'database': database})
                    },
                    {
                        'name':
                        str(table.name),
                        'url':
                        reverse('metastore:describe_table',
                                kwargs={
                                    'database': database,
                                    'table': table.name
                                })
                    },
                ],
                'table':
                table,
                'partitions':
                partitions,
                'database':
                database,
                'has_write_access':
                has_write_access(request.user),
                'is_optimizer_enabled':
                has_optimizer(),
                'is_navigator_enabled':
                has_catalog(request.user),
                'optimizer_url':
                get_optimizer_url(),
                'navigator_url':
                get_catalog_url(),
                'is_embeddable':
                request.GET.get('is_embeddable', False),
                'source_type':
                _get_servername(db),
            })
Beispiel #51
0
def view_results(request, id, first_row=0):
    """
    Returns the view for the results of the QueryHistory with the given id.

    The query results MUST be ready.
    To display query results, one should always go through the watch_query view.

    If ``first_row`` is 0, restarts (if necessary) the query read.  Otherwise, just
    spits out a warning if first_row doesn't match the servers conception.
    Multiple readers will produce a confusing interaction here, and that's known.

    It understands the ``context`` GET parameter. (See watch_query().)
    """
    first_row = long(first_row)
    start_over = (first_row == 0)
    results = None
    data = None
    fetch_error = False
    error_message = ''
    log = ''
    app_name = get_app_name(request)

    query_history = authorized_get_history(request, id, must_exist=True)
    db = dbms.get(request.user, query_history.get_query_server_config())

    handle, state = _get_query_handle_and_state(query_history)
    context_param = request.GET.get('context', '')
    query_context = _parse_query_context(context_param)

    # To remove in Hue 2.3
    download = request.GET.get('download', '')

    # Update the status as expired should not be accessible
    expired = state == QueryHistory.STATE.expired
    if expired:
        state = QueryHistory.STATE.expired
        query_history.save_state(state)

    # Retrieve query results
    try:
        if not download:
            results = db.fetch(handle, start_over, 100)
            data = list(results.rows())  # Materialize results

            # We display the "Download" button only when we know that there are results:
            downloadable = first_row > 0 or data
        else:
            downloadable = True
            data = []
            results = type('Result', (object, ), {
                'rows': 0,
                'columns': [],
                'has_more': False,
                'start_row': 0,
            })
        log = db.get_log(handle)
    except Exception as ex:
        fetch_error = True
        error_message, log = expand_exception(ex, db)

    # Handle errors
    error = fetch_error or results is None or expired

    context = {
        'error': error,
        'error_message': error_message,
        'has_more': True,
        'query': query_history,
        'results': data,
        'expected_first_row': first_row,
        'log': log,
        'hadoop_jobs': _parse_out_hadoop_jobs(log)[0],
        'query_context': query_context,
        'can_save': False,
        'context_param': context_param,
        'expired': expired,
        'app_name': app_name,
        'download': download,
    }

    if not error:
        download_urls = {}
        if downloadable:
            for format in common.DL_FORMATS:
                download_urls[format] = urlresolvers.reverse(
                    'beeswax' + ':download',
                    kwargs=dict(id=str(id), format=format))

        save_form = SaveResultsForm()
        results.start_row = first_row

        context.update({
            'results':
            data,
            'has_more':
            results.has_more,
            'next_row':
            results.start_row + len(data),
            'start_row':
            results.start_row,
            'expected_first_row':
            first_row,
            'columns':
            results.columns,
            'download_urls':
            download_urls,
            'save_form':
            save_form,
            'can_save':
            query_history.owner == request.user and not download,
        })

    return render('watch_results.mako', request, context)
Beispiel #52
0
def import_wizard(request, database='default'):
    """
  Help users define table and based on a file they want to import to Hive.
  Limitations:
    - Rows are delimited (no serde).
    - No detection for map and array types.
    - No detection for the presence of column header in the first row.
    - No partition table.
    - Does not work with binary data.
  """
    encoding = i18n.get_site_encoding()
    app_name = get_app_name(request)

    db = dbms.get(request.user)
    dbs = db.get_databases()
    databases = [{
        'name':
        db,
        'url':
        reverse('beeswax:import_wizard', kwargs={'database': db})
    } for db in dbs]

    if request.method == 'POST':
        #
        # General processing logic:
        # - We have 3 steps. Each requires the previous.
        #   * Step 1      : Table name and file location
        #   * Step 2a     : Display sample with auto chosen delim
        #   * Step 2b     : Display sample with user chosen delim (if user chooses one)
        #   * Step 3      : Display sample, and define columns
        # - Each step is represented by a different form. The form of an earlier step
        #   should be present when submitting to a later step.
        # - To preserve the data from the earlier steps, we send the forms back as
        #   hidden fields. This way, when users revisit a previous step, the data would
        #   be there as well.
        #
        delim_is_auto = False
        fields_list, n_cols = [[]], 0
        s3_col_formset = None
        s1_file_form = CreateByImportFileForm(request.POST, db=db)

        if s1_file_form.is_valid():
            do_s2_auto_delim = request.POST.get('submit_file')  # Step 1 -> 2
            do_s2_user_delim = request.POST.get(
                'submit_preview')  # Step 2 -> 2
            do_s3_column_def = request.POST.get('submit_delim')  # Step 2 -> 3
            do_hive_create = request.POST.get(
                'submit_create')  # Step 3 -> execute

            cancel_s2_user_delim = request.POST.get(
                'cancel_delim')  # Step 2 -> 1
            cancel_s3_column_def = request.POST.get(
                'cancel_create')  # Step 3 -> 2

            # Exactly one of these should be True
            if len(
                    filter(None,
                           (do_s2_auto_delim, do_s2_user_delim,
                            do_s3_column_def, do_hive_create,
                            cancel_s2_user_delim, cancel_s3_column_def))) != 1:
                raise PopupException(_('Invalid form submission'))

            if not do_s2_auto_delim:
                # We should have a valid delim form
                s2_delim_form = CreateByImportDelimForm(request.POST)
                if not s2_delim_form.is_valid():
                    # Go back to picking delimiter
                    do_s2_user_delim, do_s3_column_def, do_hive_create = True, False, False
            if do_hive_create:
                # We should have a valid columns formset
                s3_col_formset = ColumnTypeFormSet(prefix='cols',
                                                   data=request.POST)
                if not s3_col_formset.is_valid():
                    # Go back to define columns
                    do_s3_column_def, do_hive_create = True, False

            #
            # Go to step 2: We've just picked the file. Preview it.
            #
            if do_s2_auto_delim:
                delim_is_auto = True
                fields_list, n_cols, s2_delim_form = _delim_preview(
                    request.fs, s1_file_form, encoding,
                    [reader.TYPE for reader in FILE_READERS], DELIMITERS)

            if (do_s2_user_delim or do_s3_column_def
                    or cancel_s3_column_def) and s2_delim_form.is_valid():
                # Delimit based on input
                fields_list, n_cols, s2_delim_form = _delim_preview(
                    request.fs, s1_file_form, encoding,
                    (s2_delim_form.cleaned_data['file_type'], ),
                    (s2_delim_form.cleaned_data['delimiter'], ))

            if do_s2_auto_delim or do_s2_user_delim or cancel_s3_column_def:
                return render(
                    'import_wizard_choose_delimiter.mako', request, {
                        'action':
                        reverse(app_name + ':import_wizard',
                                kwargs={'database': database}),
                        'delim_readable':
                        DELIMITER_READABLE.get(
                            s2_delim_form['delimiter'].data[0],
                            s2_delim_form['delimiter'].data[1]),
                        'initial':
                        delim_is_auto,
                        'file_form':
                        s1_file_form,
                        'delim_form':
                        s2_delim_form,
                        'fields_list':
                        fields_list,
                        'delimiter_choices':
                        TERMINATOR_CHOICES,
                        'n_cols':
                        n_cols,
                        'database':
                        database,
                        'databases':
                        databases
                    })

            #
            # Go to step 3: Define column.
            #
            if do_s3_column_def:
                if s3_col_formset is None:
                    columns = []
                    for i in range(n_cols):
                        columns.append({
                            'column_name': 'col_%s' % (i, ),
                            'column_type': 'string',
                        })
                    s3_col_formset = ColumnTypeFormSet(prefix='cols',
                                                       initial=columns)
                try:
                    fields_list_for_json = list(fields_list)
                    if fields_list_for_json:
                        fields_list_for_json[0] = map(
                            lambda a: re.sub('[^\w]', '', a),
                            fields_list_for_json[0])  # Cleaning headers

                    return render(
                        'import_wizard_define_columns.mako', request, {
                            'action':
                            reverse(app_name + ':import_wizard',
                                    kwargs={'database': database}),
                            'file_form':
                            s1_file_form,
                            'delim_form':
                            s2_delim_form,
                            'column_formset':
                            s3_col_formset,
                            'fields_list':
                            fields_list,
                            'fields_list_json':
                            json.dumps(fields_list_for_json),
                            'n_cols':
                            n_cols,
                            'database':
                            database,
                            'databases':
                            databases
                        })
                except Exception, e:
                    raise PopupException(_(
                        "The selected delimiter is creating an un-even number of columns. Please make sure you don't have empty columns."
                    ),
                                         detail=e)

            #
            # Final: Execute
            #
            if do_hive_create:
                delim = s2_delim_form.cleaned_data['delimiter']
                table_name = s1_file_form.cleaned_data['name']
                proposed_query = django_mako.render_to_string(
                    "create_table_statement.mako", {
                        'table': {
                            'name': table_name,
                            'comment': s1_file_form.cleaned_data['comment'],
                            'row_format': 'Delimited',
                            'field_terminator': delim,
                            'file_format': 'TextFile'
                        },
                        'columns':
                        [f.cleaned_data for f in s3_col_formset.forms],
                        'partition_columns': [],
                        'database': database,
                        'databases': databases
                    })

                do_load_data = s1_file_form.cleaned_data.get('do_import')
                path = s1_file_form.cleaned_data['path']
                try:
                    return _submit_create_and_load(request,
                                                   proposed_query,
                                                   table_name,
                                                   path,
                                                   do_load_data,
                                                   database=database)
                except QueryServerException, e:
                    raise PopupException(_('The table could not be created.'),
                                         detail=e.message)
Beispiel #53
0
    if design.id is not None:
      data = HQLdesign.loads(design.data).get_query_dict()
      form.bind(data)
      form.saveform.set_data(design.name, design.desc)
    else:
      # New design
      form.bind()
    form.query.fields['database'].choices = databases # Could not do it in the form

  return render('execute.mako', request, {
    'action': action,
    'design': design,
    'error_message': error_message,
    'form': form,
    'log': log,
    'autocomplete_base_url': reverse(get_app_name(request) + ':autocomplete', kwargs={}),
    'on_success_url': on_success_url,
    'can_edit_name': design and not design.is_auto and design.name,
  })


def execute_parameterized_query(request, design_id):
  return _run_parameterized_query(request, design_id, False)


def explain_parameterized_query(request, design_id):
  return _run_parameterized_query(request, design_id, True)


def watch_query(request, id):
  """
Beispiel #54
0
def view_results(request, id, first_row=0):
  """
  Returns the view for the results of the QueryHistory with the given id.

  The query results MUST be ready.
  To display query results, one should always go through the execute_query view.
  If the result set has has_result_set=False, display an empty result.

  If ``first_row`` is 0, restarts (if necessary) the query read.  Otherwise, just
  spits out a warning if first_row doesn't match the servers conception.
  Multiple readers will produce a confusing interaction here, and that's known.

  It understands the ``context`` GET parameter. (See execute_query().)
  """
  first_row = long(first_row)
  start_over = (first_row == 0)
  results = type('Result', (object,), {
                'rows': 0,
                'columns': [],
                'has_more': False,
                'start_row': 0,
            })
  data = []
  fetch_error = False
  error_message = ''
  log = ''
  columns = []
  app_name = get_app_name(request)

  query_history = authorized_get_query_history(request, id, must_exist=True)
  query_server = query_history.get_query_server_config()
  db = dbms.get(request.user, query_server)

  handle, state = _get_query_handle_and_state(query_history)
  context_param = request.GET.get('context', '')
  query_context = parse_query_context(context_param)

  # Update the status as expired should not be accessible
  # Impala does not support startover for now
  expired = state == models.QueryHistory.STATE.expired

  # Retrieve query results or use empty result if no result set
  try:
    if query_server['server_name'] == 'impala' and not handle.has_result_set:
      downloadable = False
    else:
      results = db.fetch(handle, start_over, 100)
      data = []

      # Materialize and HTML escape results
      # TODO: use Number + list comprehension
      for row in results.rows():
        escaped_row = []
        for field in row:
          if isinstance(field, (int, long, float, complex, bool)):
            escaped_field = field
          elif field is None:
            escaped_field = 'NULL'
          else:
            field = smart_unicode(field, errors='replace') # Prevent error when getting back non utf8 like charset=iso-8859-1
            escaped_field = escape(field).replace(' ', '&nbsp;')
          escaped_row.append(escaped_field)
        data.append(escaped_row)

      # We display the "Download" button only when we know that there are results:
      downloadable = first_row > 0 or data
      log = db.get_log(handle)
      columns = results.data_table.cols()

  except Exception, ex:
    fetch_error = True
    error_message, log = expand_exception(ex, db, handle)
Beispiel #55
0
def execute_directly(request, query, query_server=None, design=None, tablename=None,
                     on_success_url=None, on_success_params=None, **kwargs):
  """
  execute_directly(request, query_msg, tablename, design) -> HTTP response for execution

  This method wraps around dbms.execute_query() to take care of the HTTP response
  after the execution.

    query
      The HQL model Query object.

    query_server
      To which Query Server to submit the query.
      Dictionary with keys: ['server_name', 'server_host', 'server_port'].

    design
      The design associated with the query.

    tablename
      The associated table name for the context.

    on_success_url
      Where to go after the query is done. The URL handler may expect an option "context" GET
      param. (See ``watch_query``.) For advanced usage, on_success_url can be a function, in
      which case the on complete URL is the return of:
        on_success_url(history_obj) -> URL string
      Defaults to the view results page.

    on_success_params
      Optional params to pass to the on_success_url (in additional to "context").

  Note that this may throw a Beeswax exception.
  """
  if design is not None:
    authorized_get_design(request, design.id)

  db = dbms.get(request.user, query_server)
  database = query.query.get('database', 'default')
  db.use(database)

  history_obj = db.execute_query(query, design)

  watch_url = reverse(get_app_name(request) + ':watch_query', kwargs={'id': history_obj.id})
  if 'download' in kwargs and kwargs['download']:
    watch_url += '?download=true'

  # Prepare the GET params for the watch_url
  get_dict = QueryDict(None, mutable=True)
  # (1) context
  if design:
    get_dict['context'] = make_query_context('design', design.id)
  elif tablename:
    get_dict['context'] = make_query_context('table', '%s:%s' % (tablename, database))

  # (2) on_success_url
  if on_success_url:
    if callable(on_success_url):
      on_success_url = on_success_url(history_obj)
    get_dict['on_success_url'] = on_success_url

  # (3) misc
  if on_success_params:
    get_dict.update(on_success_params)

  return format_preserving_redirect(request, watch_url, get_dict)
Beispiel #56
0
def load_table(request, database, table):
    """
    Loads data into a table.
    """
    try:
        table_desc_extended = HCatClient(
            request.user.username).describe_table_extended(table, db=database)
        is_table_partitioned = table_desc_extended['partitioned']
        partitionColumns = []
        if is_table_partitioned:
            partitionColumns = table_desc_extended['partitionColumns']
        table_obj = {
            'tableName': table,
            'columns': table_desc_extended['columns'],
            'partitionKeys': partitionColumns
        }
    except Exception:
        import traceback

        error = traceback.format_exc()
        raise PopupException('Error getting table description',
                             title="Error getting table description",
                             detail=error)
    if request.method == "POST":
        form = hcatalog.forms.LoadDataForm(table_obj, request.POST)
        hql = ''
        if form.is_valid():
            hql += "LOAD DATA INPATH"
            hql += " '%s'" % form.cleaned_data['path']
            if form.cleaned_data['overwrite']:
                hql += " OVERWRITE"
            hql += " INTO TABLE "
            hql += "`%s.%s`" % (database, table)
            if len(form.partition_columns) > 0:
                hql += " PARTITION ("
                vals = []
                for key, column_name in form.partition_columns.iteritems():
                    vals.append("%s='%s'" %
                                (column_name, form.cleaned_data[key]))
                hql += ", ".join(vals)
                hql += ")"
            hql += ";"
        try:
            do_load_table(request, hql)
        except Exception:
            import traceback

            error = traceback.format_exc()
            raise PopupException('Error loading data into the table',
                                 title="Error loading data into the table",
                                 detail=error)
        on_success_url = urlresolvers.reverse(get_app_name(request) +
                                              ':describe_table',
                                              kwargs=dict(database=database,
                                                          table=table))
        result = {'on_success_url': on_success_url}
        return HttpResponse(json.dumps(result))
    else:
        form = hcatalog.forms.LoadDataForm(table_obj)
        return render(
            "load_table.mako", request,
            dict(form=form, table=table, action=request.get_full_path()))