Beispiel #1
0
def view(request, path):
    """Dispatches viewing of a path to either index() or fileview(), depending on type."""

    # default_to_home is set in bootstrap.js
    if 'default_to_home' in request.GET:
        home_dir_path = request.user.get_home_directory()
        if request.fs.isdir(home_dir_path):
            return format_preserving_redirect(request, reverse(view, kwargs=dict(path=home_dir_path)))

    # default_to_home is set in bootstrap.js
    if 'default_to_trash' in request.GET:
        home_trash = request.fs.join(request.fs.trash_path, 'Current', request.user.get_home_directory()[1:])
        if request.fs.isdir(home_trash):
            return format_preserving_redirect(request, reverse(view, kwargs=dict(path=home_trash)))
        if request.fs.isdir(request.fs.trash_path):
            return format_preserving_redirect(request, reverse(view, kwargs=dict(path=request.fs.trash_path)))

    try:
        stats = request.fs.stats(path)
        if stats.isDir:
            return listdir_paged(request, path)
        else:
            return display(request, path)
    except (IOError, WebHdfsException), e:
        msg = _("Cannot access: %(path)s.") % {'path': escape(path)}
        if request.user.is_superuser and not request.user == request.fs.superuser:
            msg += _(' Note: You are a Hue admin but not a HDFS superuser (which is "%(superuser)s").') % {'superuser': request.fs.superuser}
        if request.is_ajax():
          exception = {
            'error': msg
          }
          return render_json(exception)
        else:
          raise PopupException(msg , detail=e)
Beispiel #2
0
def watch_query(request, id):
  """
  Wait for the query to finish and (by default) displays the results of query id.
  It understands the optional GET params:

    on_success_url
      If given, it will be displayed when the query is successfully finished.
      Otherwise, it will display the view query results page by default.

    context
      A string of "name:data" that describes the context
      that generated this query result. It may be:
        - "table":"<table_name>"
        - "design":<design_id>

  All other GET params will be passed to on_success_url (if present).
  """
  # Coerce types; manage arguments
  query_history = authorized_get_history(request, id, must_exist=True)

  # GET param: context.
  context_param = request.GET.get('context', '')

  # GET param: on_success_url. Default to view_results
  results_url = urlresolvers.reverse(view_results, kwargs={'id': id, 'first_row': 0})
  on_success_url = request.GET.get('on_success_url')
  if not on_success_url:
    on_success_url = results_url

  # Check query state
  handle, state = _get_query_handle_and_state(query_history)
  query_history.save_state(state)


  # Query finished?
#  if state == models.QueryHistory.STATE.expired:
#    raise PopupException(_("The result of this query has expired."))
  if query_history.is_success():
    return format_preserving_redirect(request, on_success_url, request.GET)
  elif query_history.is_failure():
    # When we fetch, Beeswax server will throw us a BeeswaxException, which has the
    # log we want to display.
    return format_preserving_redirect(request, results_url, request.GET)

  # Still running
  log = dbms.get(request.user, query_history.get_query_server()).get_log(handle)

  # Keep waiting
  # - Translate context into something more meaningful (type, data)
  query_context = _parse_query_context(context_param)

  return render('watch_wait.mako', request, {
                'query': query_history,
                'fwd_params': request.GET.urlencode(),
                'log': log,
                'hadoop_jobs': _parse_out_hadoop_jobs(log),
                'query_context': query_context,
              })
Beispiel #3
0
def generic_op(form_class, request, op, parameter_names, piggyback=None, template="fileop.mako", data_extractor=default_data_extractor, arg_extractor=default_arg_extractor, initial_value_extractor=default_initial_value_extractor, extra_params=None):
    """
    Generic implementation for several operations.

    @param form_class form to instantiate
    @param request incoming request, used for parameters
    @param op callable with the filesystem operation
    @param parameter_names list of form parameters that are extracted and then passed to op
    @param piggyback list of form parameters whose file stats to look up after the operation
    @param data_extractor function that extracts POST data to be used by op
    @param arg_extractor function that extracts args from a given form or formset
    @param initial_value_extractor function that extracts the initial values of a form or formset
    @param extra_params dictionary of extra parameters to send to the template for rendering
    """
    # Use next for non-ajax requests, when available.
    next = request.GET.get("next", request.POST.get("next", None))

    ret = dict({
        'next': next
    })

    if extra_params is not None:
        ret['extra_params'] = extra_params

    for p in parameter_names:
        val = request.REQUEST.get(p)
        if val:
            ret[p] = val

    if request.method == 'POST':
        form = form_class(**data_extractor(request))
        ret['form'] = form
        if form.is_valid():
            args = arg_extractor(request, form, parameter_names)
            try:
                op(*args)
            except (IOError, WebHdfsException), e:
                msg = _("Cannot perform operation.")
                if request.user.is_superuser and not request.user == request.fs.superuser:
                    msg += _(' Note: you are a Hue admin but not a HDFS superuser (which is "%(superuser)s").') \
                           % {'superuser': request.fs.superuser}
                raise PopupException(msg, detail=e)
            if next:
                logging.debug("Next: %s" % next)
                # Doesn't need to be quoted: quoting is done by HttpResponseRedirect.
                return format_preserving_redirect(request, next)
            ret["success"] = True
            try:
                if piggyback:
                    piggy_path = form.cleaned_data[piggyback]
                    ret["result"] = _massage_stats(request, request.fs.stats(piggy_path))
            except Exception, e:
                # Hard to report these more naturally here.  These happen either
                # because of a bug in the piggy-back code or because of a
                # race condition.
                logger.exception("Exception while processing piggyback data")
                ret["result_error"] = True

            ret['user'] = request.user
            return render(template, request, ret)
Beispiel #4
0
def _submit_to_cluster(request, job_design, form):
  plan = SubmissionPlan()
  plan.name = job_design.name
  plan.user = request.user.username
  plan.groups = request.user.get_groups()
  plan.steps = form.to_job_submission_steps(plan.name)

  submission = Submission(owner=request.user,
    last_seen_state=State.SUBMITTED,
    name=job_design.name,
    submission_plan=plan)

  # Save aggressively in case submit() below triggers an error.
  submission.save()
  try:
    try:
      submission.submission_handle = get_client().submit(plan)
    except Exception:
      submission.last_seen_state=State.ERROR
      raise
  finally:
    submission.save()

  watch_url = submission.watch_url()
  return format_preserving_redirect(request, watch_url)
Beispiel #5
0
def _submit_to_cluster(request, job_design, form):
    plan = SubmissionPlan()
    plan.name = job_design.name
    plan.user = request.user.username
    plan.groups = request.user.get_groups()
    plan.steps = form.to_job_submission_steps(plan.name)

    submission = Submission(owner=request.user,
                            last_seen_state=State.SUBMITTED,
                            name=job_design.name,
                            submission_plan=plan)

    # Save aggressively in case submit() below triggers an error.
    submission.save()
    try:
        try:
            submission.submission_handle = get_client().submit(plan)
        except Exception:
            submission.last_seen_state = State.ERROR
            raise
    finally:
        submission.save()

    watch_url = submission.watch_url()
    return format_preserving_redirect(request, watch_url)
Beispiel #6
0
def setup(request):
  """Installs jobsub examples."""
  if request.method == "GET":
    return render("confirm.html", request, dict(url=request.path, title="Install job design examples?"))
  else:
    jobsub_setup.Command().handle_noargs()
    return format_preserving_redirect(request, "/jobsub")
Beispiel #7
0
def execute_directly(request,
                     query_msg,
                     design=None,
                     tablename=None,
                     on_success_url=None,
                     on_success_params=None,
                     **kwargs):
    """
  execute_directly(request, query_msg, tablename, design) -> HTTP response for execution

  This method wraps around db_utils.execute_directly() to take care of the HTTP response
  after the execution.

    query_msg
      The thrift Query object.

    design
      The design associated with the query.

    tablename
      The associated table name for the context.

    on_success_url
      Where to go after the query is done. The URL handler may expect an option "context" GET
      param. (See ``watch_query``.) For advanced usage, on_success_url can be a function, in
      which case the on complete URL is the return of:
        on_success_url(history_obj) -> URL string
      Defaults to the view results page.

    on_success_params
      Optional params to pass to the on_success_url (in additional to "context").

  Note that this may throw a Beeswax exception.
  """
    if design is not None:
        authorized_get_design(request, design.id)
    history_obj = db_utils.execute_directly(request.user, query_msg, design,
                                            **kwargs)
    watch_url = urlresolvers.reverse("hcatalog.views.watch_query",
                                     kwargs=dict(id=history_obj.id))

    # Prepare the GET params for the watch_url
    get_dict = QueryDict(None, mutable=True)
    # (1) context
    if design:
        get_dict['context'] = make_query_context("design", design.id)
    elif tablename:
        get_dict['context'] = make_query_context("table", tablename)

    # (2) on_success_url
    if on_success_url:
        if callable(on_success_url):
            on_success_url = on_success_url(history_obj)
        get_dict['on_success_url'] = on_success_url

    # (3) misc
    if on_success_params:
        get_dict.update(on_success_params)

    return format_preserving_redirect(request, watch_url, get_dict)
Beispiel #8
0
def clone_design(request, design_id):
    """Clone a design belonging to any user"""
    design = authorized_get_design(request, design_id)

    if design is None:
        LOG.error('Cannot clone non-existent design %s' % (design_id, ))
        return list_designs(request)

    copy = design.clone()
    copy_doc = design.doc.get().copy()
    copy.name = design.name + ' (copy)'
    copy.owner = request.user
    copy.save()

    copy_doc.owner = copy.owner
    copy_doc.name = copy.name
    copy_doc.save()
    copy.doc.add(copy_doc)

    messages.info(request,
                  _('Copied design: %(name)s') % {'name': design.name})

    return format_preserving_redirect(
        request,
        reverse(get_app_name(request) + ':execute_design',
                kwargs={'design_id': copy.id}))
Beispiel #9
0
def generic_op(form_class,
               request,
               op,
               parameter_names,
               piggyback=None,
               template="fileop.mako",
               extra_params=None):
    """
  Generic implementation for several operations.

  @param form_class form to instantiate
  @param request incoming request, used for parameters
  @param op callable with the filesystem operation
  @param parameter_names list of form parameters that are extracted and then passed to op
  @param piggyback list of form parameters whose file stats to look up after the operation
  @param extra_params dictionary of extra parameters to send to the template for rendering
  """
    # Use next for non-ajax requests, when available.
    next = request.GET.get("next")
    if next is None:
        next = request.POST.get("next")

    ret = dict({'next': next})

    if extra_params is not None:
        ret['extra_params'] = extra_params

    for p in parameter_names:
        val = request.REQUEST.get(p)
        if val:
            ret[p] = val

    if request.method == 'POST':
        form = form_class(request.POST)
        # TODO(philip): How best to do error handling?  fs will throw
        # an arbitrary-ish exception (typically file not found or maybe permission
        # denied), and this needs to be coaxed into an HTTP error.
        ret['form'] = form
        if form.is_valid():
            args = [form.cleaned_data[p] for p in parameter_names]
            op(*args)
            if next:
                logging.debug("Next: %s" % next)
                # Doesn't need to be quoted: quoting is done by HttpResponseRedirect.
                return format_preserving_redirect(request, next)
            ret["success"] = True
            try:
                if piggyback:
                    piggy_path = form.cleaned_data[piggyback]
                    ret["result"] = _massage_stats(
                        request, request.fs.stats(piggy_path))
            except Exception, e:
                # Hard to report these more naturally here.  These happen either
                # because of a bug in the piggy-back code or because of a
                # race condition.
                logger.exception("Exception while processing piggyback data")
                ret["result_error"] = True

            return render_with_toolbars(template, request, ret)
Beispiel #10
0
def execute_directly(request, query, query_server=None, design=None, tablename=None,
                     on_success_url=None, on_success_params=None, **kwargs):
  """
  execute_directly(request, query_msg, tablename, design) -> HTTP response for execution

  This method wraps around dbms.execute_directly() to take care of the HTTP response
  after the execution.

    query
      The HQL model Query object.

    query_server
      To which Query Server to submit the query.
      Dictionary with keys: ['server_name', 'server_host', 'server_port'].

    design
      The design associated with the query.

    tablename
      The associated table name for the context.

    on_success_url
      Where to go after the query is done. The URL handler may expect an option "context" GET
      param. (See ``watch_query``.) For advanced usage, on_success_url can be a function, in
      which case the on complete URL is the return of:
        on_success_url(history_obj) -> URL string
      Defaults to the view results page.

    on_success_params
      Optional params to pass to the on_success_url (in additional to "context").

  Note that this may throw a Beeswax exception.
  """
  if design is not None:
    authorized_get_design(request, design.id)

  history_obj = dbms.get(request.user, query_server).execute_query(query, design)
  watch_url = urlresolvers.reverse(watch_query, kwargs={'id': history_obj.id})

  # Prepare the GET params for the watch_url
  get_dict = QueryDict(None, mutable=True)
  # (1) context
  if design:
    get_dict['context'] = make_query_context("design", design.id)
  elif tablename:
    get_dict['context'] = make_query_context("table", tablename)

  # (2) on_success_url
  if on_success_url:
    if callable(on_success_url):
      on_success_url = on_success_url(history_obj)
    get_dict['on_success_url'] = on_success_url

  # (3) misc
  if on_success_params:
    get_dict.update(on_success_params)

  return format_preserving_redirect(request, watch_url, get_dict)
Beispiel #11
0
def setup(request):
    """Installs jobsub examples."""
    if request.method == "GET":
        return render(
            "confirm.html", request,
            dict(url=request.path, title="Install job design examples?"))
    else:
        jobsub_setup.Command().handle_noargs()
        return format_preserving_redirect(request, "/jobsub")
Beispiel #12
0
def execute_directly(request, query, query_server=None,
                     design=None, on_success_url=None, on_success_params=None,
                     **kwargs):
  """
  execute_directly(request, query_msg, tablename, design) -> HTTP response for execution

  This method wraps around dbms.execute_query() to take care of the HTTP response
  after the execution.

    query
      The HQL model Query object.

    query_server
      To which Query Server to submit the query.
      Dictionary with keys: ['server_name', 'server_host', 'server_port'].

    design
      The design associated with the query.

    on_success_url
      Where to go after the query is done. The URL handler may expect an option "context" GET
      param. (See ``watch_query``.) For advanced usage, on_success_url can be a function, in
      which case the on complete URL is the return of:
        on_success_url(history_obj) -> URL string
      Defaults to the view results page.

    on_success_params
      Optional params to pass to the on_success_url (in additional to "context").

  Note that this may throw a Beeswax exception.
  """
  if design is not None:
    authorized_get_design(request, design.id)

  db = dbms.get(request.user, query_server)
  database = query.query.get('database', 'default')
  db.use(database)

  query_history = db.execute_query(query, design)

  watch_url = reverse(get_app_name(request) + ':watch_query_history', kwargs={'query_history_id': query_history.id})

  # Prepare the GET params for the watch_url
  get_dict = QueryDict(None, mutable=True)

  # (1) on_success_url
  if on_success_url:
    if callable(on_success_url):
      on_success_url = on_success_url(query_history)
    get_dict['on_success_url'] = on_success_url

  # (2) misc
  if on_success_params:
    get_dict.update(on_success_params)

  return format_preserving_redirect(request, watch_url, get_dict)
Beispiel #13
0
def generic_op(form_class, request, op, parameter_names, piggyback=None, template="fileop.mako", extra_params=None):
  """
  Generic implementation for several operations.

  @param form_class form to instantiate
  @param request incoming request, used for parameters
  @param op callable with the filesystem operation
  @param parameter_names list of form parameters that are extracted and then passed to op
  @param piggyback list of form parameters whose file stats to look up after the operation
  @param extra_params dictionary of extra parameters to send to the template for rendering
  """
  # Use next for non-ajax requests, when available.
  next = request.GET.get("next")
  if next is None:
    next = request.POST.get("next")

  ret = dict({
    'next':next
  })

  if extra_params is not None:
    ret['extra_params'] = extra_params

  for p in parameter_names:
    val = request.REQUEST.get(p)
    if val:
      ret[p] = val

  if request.method == 'POST':
    form = form_class(request.POST)
    # TODO(philip): How best to do error handling?  fs will throw
    # an arbitrary-ish exception (typically file not found or maybe permission
    # denied), and this needs to be coaxed into an HTTP error.
    ret['form'] = form
    if form.is_valid():
      args = [ form.cleaned_data[p] for p in parameter_names ]
      op(*args)
      if next:
        logging.debug("Next: %s" % next)
        # Doesn't need to be quoted: quoting is done by HttpResponseRedirect.
        return format_preserving_redirect(request, next)
      ret["success"] = True
      try:
        if piggyback:
          piggy_path = form.cleaned_data[piggyback]
          ret["result"] = _massage_stats(request, request.fs.stats(piggy_path))
      except Exception, e:
        # Hard to report these more naturally here.  These happen either
        # because of a bug in the piggy-back code or because of a
        # race condition.
        logger.exception("Exception while processing piggyback data")
        ret["result_error"] = True

      return render_with_toolbars(template, request, ret)
Beispiel #14
0
def view(request, path):
    """Dispatches viewing of a path to either index() or fileview(), depending on type."""

    # default_to_home is set in bootstrap.js
    if 'default_to_home' in request.GET:
        home_dir_path = request.user.get_home_directory()
        if request.fs.isdir(home_dir_path):
            return format_preserving_redirect(request, reverse(view, kwargs=dict(path=home_dir_path)))

    # default_to_home is set in bootstrap.js
    if 'default_to_trash' in request.GET:
        home_trash = request.fs.join(request.fs.trash_path, 'Current', request.user.get_home_directory()[1:])
        if request.fs.isdir(home_trash):
            return format_preserving_redirect(request, reverse(view, kwargs=dict(path=home_trash)))
        if request.fs.isdir(request.fs.trash_path):
            return format_preserving_redirect(request, reverse(view, kwargs=dict(path=request.fs.trash_path)))

    try:
        decoded_path = urllib.unquote(path)
        if path != decoded_path:
          path = decoded_path
        stats = request.fs.stats(path)
        if stats.isDir:
            return listdir_paged(request, path)
        else:
            return display(request, path)
    except (IOError, WebHdfsException), e:
        msg = _("Cannot access: %(path)s. ") % {'path': escape(path)}
        if "Connection refused" in e.message:
            msg += _(" The HDFS REST service is not available. ")
        if request.user.is_superuser and not _is_hdfs_superuser(request):
            msg += _(' Note: you are a Hue admin but not a HDFS superuser, "%(superuser)s" or part of HDFS supergroup, "%(supergroup)s".') \
                % {'superuser': request.fs.superuser, 'supergroup': request.fs.supergroup}
        if request.is_ajax():
          exception = {
            'error': msg
          }
          return JsonResponse(exception)
        else:
          raise PopupException(msg , detail=e)
Beispiel #15
0
def clone_design(request, design_id):
  """Clone a design belonging to any user"""
  design = authorized_get_design(request, design_id)

  if design is None:
    LOG.error('Cannot clone non-existent design %s' % (design_id,))
    return list_designs(request)

  copy = design.clone(request.user)

  messages.info(request, _('Copied design: %(name)s') % {'name': design.name})

  return format_preserving_redirect(request, reverse(get_app_name(request) + ':execute_design', kwargs={'design_id': copy.id}))
Beispiel #16
0
def execute_directly(request, query_msg, design=None, tablename=None,
                     on_success_url=None, on_success_params=None, **kwargs):
  """
  execute_directly(request, query_msg, tablename, design) -> HTTP response for execution

  This method wraps around db_utils.execute_directly() to take care of the HTTP response
  after the execution.

    query_msg
      The thrift Query object.

    design
      The design associated with the query.

    tablename
      The associated table name for the context.

    on_success_url
      Where to go after the query is done. The URL handler may expect an option "context" GET
      param. (See ``watch_query``.) For advanced usage, on_success_url can be a function, in
      which case the on complete URL is the return of:
        on_success_url(history_obj) -> URL string
      Defaults to the view results page.

    on_success_params
      Optional params to pass to the on_success_url (in additional to "context").

  Note that this may throw a Beeswax exception.
  """
  history_obj = db_utils.execute_directly(request.user, query_msg, design, **kwargs)
  watch_url = urlresolvers.reverse("beeswax.views.watch_query", kwargs=dict(id=history_obj.id))

  # Prepare the GET params for the watch_url
  get_dict = QueryDict(None, mutable=True)
  # (1) context
  if design:
    get_dict['context'] = make_query_context("design", design.id)
  elif tablename:
    get_dict['context'] = make_query_context("table", tablename)

  # (2) on_success_url
  if on_success_url:
    if callable(on_success_url):
      on_success_url = on_success_url(history_obj)
    get_dict['on_success_url'] = on_success_url

  # (3) misc
  if on_success_params:
    get_dict.update(on_success_params)

  return format_preserving_redirect(request, watch_url, get_dict)
Beispiel #17
0
def view(request, path):
  """Dispatches viewing of a path to either index() or fileview(), depending on type."""

  # default_to_home is set in bootstrap.js
  home_dir_path = request.user.get_home_directory()
  if request.GET.get('default_to_home') and request.fs.isdir(home_dir_path):
    return format_preserving_redirect(request, urlresolvers.reverse(view, kwargs=dict(path=home_dir_path)))

  if request.fs.isdir(path):
    return listdir(request, path)
  elif request.fs.isfile(path):
    return display(request, path)
  else:
    raise Http404("File not found: %s" % escape(path))
Beispiel #18
0
def view(request, path):
    """Dispatches viewing of a path to either index() or fileview(), depending on type."""

    # default_to_home is set in bootstrap.js
    if 'default_to_home' in request.GET:
        home_dir_path = request.user.get_home_directory()
        if request.fs.isdir(home_dir_path):
            return format_preserving_redirect(request, reverse(view, kwargs=dict(path=home_dir_path)))

    # default_to_home is set in bootstrap.js
    if 'default_to_trash' in request.GET:
        home_trash = request.fs.join(request.fs.trash_path, 'Current', request.user.get_home_directory()[1:])
        if request.fs.isdir(home_trash):
            return format_preserving_redirect(request, reverse(view, kwargs=dict(path=home_trash)))
        if request.fs.isdir(request.fs.trash_path):
            return format_preserving_redirect(request, reverse(view, kwargs=dict(path=request.fs.trash_path)))

    try:
        stats = request.fs.stats(path)
        if stats.isDir:
            return listdir_paged(request, path)
        else:
            return display(request, path)
    except (IOError, WebHdfsException), e:
        msg = _("Cannot access: %(path)s. ") % {'path': escape(path)}
        if "Connection refused" in e.message:
            msg += _(" The HDFS REST service is not available. ")
        if request.user.is_superuser and not _is_hdfs_superuser(request):
            msg += _(' Note: you are a Hue admin but not a HDFS superuser, "%(superuser)s" or part of HDFS supergroup, "%(supergroup)s".') \
                % {'superuser': request.fs.superuser, 'supergroup': request.fs.supergroup}
        if request.is_ajax():
          exception = {
            'error': msg
          }
          return JsonResponse(exception)
        else:
          raise PopupException(msg , detail=e)
Beispiel #19
0
def clone_design(request, design_id):
  """Clone a design belonging to any user"""
  try:
    design = models.SavedQuery.get(design_id)
  except models.SavedQuery.DoesNotExist:
    LOG.error('Cannot clone non-existent design %s' % (design_id,))
    return list_designs(request)

  copy = design.clone()
  copy.name = design.name + ' (copy)'
  copy.owner = request.user
  copy.save()
  request.flash.put('Copied design: %s' % (design.name,))
  return format_preserving_redirect(
      request, urlresolvers.reverse(execute_query, kwargs={'design_id': copy.id}))
Beispiel #20
0
def clone_design(request, design_id):
  """Clone a design belonging to any user"""
  design = authorized_get_design(request, design_id)

  if design is None:
    LOG.error('Cannot clone non-existent design %s' % (design_id,))
    return list_designs(request)

  copy = design.clone()
  copy.name = design.name + ' (copy)'
  copy.owner = request.user
  copy.save()
  messages.info(request, _('Copied design: %(name)s') % {'name': design.name})
  return format_preserving_redirect(
      request, urlresolvers.reverse(execute_query, kwargs={'design_id': copy.id}))
Beispiel #21
0
def view(request, path):
    """Dispatches viewing of a path to either index() or fileview(), depending on type."""

    # default_to_home is set in bootstrap.js
    home_dir_path = request.user.get_home_directory()
    if request.GET.get('default_to_home') and request.fs.isdir(home_dir_path):
        return format_preserving_redirect(
            request, urlresolvers.reverse(view,
                                          kwargs=dict(path=home_dir_path)))

    if request.fs.isdir(path):
        return listdir(request, path)
    elif request.fs.isfile(path):
        return display(request, path)
    else:
        raise Http404("File not found: %s" % escape(path))
Beispiel #22
0
def clone_design(request, design_id):
    """Clone a design belonging to any user"""
    design = authorized_get_design(request, design_id)

    if design is None:
        LOG.error("Cannot clone non-existent design %s" % (design_id,))
        return list_designs(request)

    copy = design.clone()
    copy.name = design.name + " (copy)"
    copy.owner = request.user
    copy.save()
    messages.info(request, _("Copied design: %(name)s") % {"name": design.name})
    return format_preserving_redirect(
        request, reverse(get_app_name(request) + ":execute_query", kwargs={"design_id": copy.id})
    )
Beispiel #23
0
def save_results(request, id):
    """
  Save the results of a query to an HDFS directory or Hive table.
  """
    query_history = authorized_get_history(request, id, must_exist=True)

    app_name = get_app_name(request)
    server_id, state = _get_query_handle_and_state(query_history)
    query_history.save_state(state)
    error_msg, log = None, None

    if request.method == "POST":
        if not query_history.is_success():
            msg = _("This query is %(state)s. Results unavailable.") % {"state": state}
            raise PopupException(msg)

        db = dbms.get(request.user, query_history.get_query_server_config())
        form = beeswax.forms.SaveResultsForm(request.POST, db=db, fs=request.fs)

        if request.POST.get("cancel"):
            return format_preserving_redirect(request, "/%s/watch/%s" % (app_name, id))

        if form.is_valid():
            try:
                handle, state = _get_query_handle_and_state(query_history)
                result_meta = db.get_results_metadata(handle)
            except Exception, ex:
                raise PopupException(_("Cannot find query: %s") % ex)

            try:
                if form.cleaned_data["save_target"] == form.SAVE_TYPE_DIR:
                    target_dir = form.cleaned_data["target_dir"]
                    query_history = db.insert_query_into_directory(query_history, target_dir)
                    redirected = redirect(
                        reverse("beeswax:watch_query", args=[query_history.id])
                        + "?on_success_url="
                        + reverse("filebrowser.views.view", kwargs={"path": target_dir})
                    )
                elif form.cleaned_data["save_target"] == form.SAVE_TYPE_TBL:
                    redirected = db.create_table_as_a_select(
                        request, query_history, form.cleaned_data["target_table"], result_meta
                    )
            except Exception, ex:
                error_msg, log = expand_exception(ex, db)
                raise PopupException(_("The result could not be saved: %s.") % log, detail=ex)

            return redirected
Beispiel #24
0
def install_examples(request):
  """
  Handle installing sample data and example queries.
  """
  if request.method == 'GET':
    return render('confirm.html', request,
                  dict(url=request.path, title='Install sample tables and Beeswax examples?'))
  elif request.method == 'POST':
    try:
      beeswax.management.commands.beeswax_install_examples.Command().handle_noargs()
      if models.MetaInstall.get().installed_example:
        request.flash.put('Installed Beeswax examples.')
    except Exception, err:
      LOG.exception(err)
      raise PopupException(err)

    return format_preserving_redirect(request, '/beeswax/tables')
Beispiel #25
0
def clone_design(request, design_id):
    """Clone a design belonging to any user"""
    design = authorized_get_design(request, design_id)

    if design is None:
        LOG.error("Cannot clone non-existent design %s" % (design_id,))
        return list_designs(request)

    copy = design.clone(request.user)
    copy.save()

    copy_doc = Document.objects.link(copy, owner=copy.owner, name=copy.name, description=copy.desc, extra=copy.type)

    messages.info(request, _("Copied design: %(name)s") % {"name": design.name})

    return format_preserving_redirect(
        request, reverse(get_app_name(request) + ":execute_design", kwargs={"design_id": copy.id})
    )
Beispiel #26
0
def save_results(request, id):
  """
  DEPRECATED. Need to get rid of watch_wait dependency first.

  Save the results of a query to an HDFS directory or Hive table.
  """
  query_history = authorized_get_history(request, id, must_exist=True)

  app_name = get_app_name(request)
  server_id, state = _get_query_handle_and_state(query_history)
  query_history.save_state(state)
  error_msg, log = None, None

  if request.method == 'POST':
    if not query_history.is_success():
      msg = _('This query is %(state)s. Results unavailable.') % {'state': state}
      raise PopupException(msg)

    db = dbms.get(request.user, query_history.get_query_server_config())
    form = beeswax.forms.SaveResultsForm(request.POST, db=db, fs=request.fs)

    if request.POST.get('cancel'):
      return format_preserving_redirect(request, '/%s/watch/%s' % (app_name, id))

    if form.is_valid():
      try:
        handle, state = _get_query_handle_and_state(query_history)
        result_meta = db.get_results_metadata(handle)
      except Exception, ex:
        raise PopupException(_('Cannot find query: %s') % ex)

      try:
        if form.cleaned_data['save_target'] == form.SAVE_TYPE_DIR:
          target_dir = form.cleaned_data['target_dir']
          query_history = db.insert_query_into_directory(query_history, target_dir)
          redirected = redirect(reverse('beeswax:watch_query', args=[query_history.id]) \
                                + '?on_success_url=' + reverse('filebrowser.views.view', kwargs={'path': target_dir}))
        elif form.cleaned_data['save_target'] == form.SAVE_TYPE_TBL:
          redirected = db.create_table_as_a_select(request, query_history, form.cleaned_data['target_table'], result_meta)
      except Exception, ex:
        error_msg, log = expand_exception(ex, db)
        raise PopupException(_('The result could not be saved: %s.') % log, detail=ex)

      return redirected
Beispiel #27
0
def save_results(request, id):
  """
  Save the results of a query to an HDFS directory or Hive table.
  """
  query_history = authorized_get_history(request, id, must_exist=True)

  app_name = get_app_name(request)
  server_id, state = _get_query_handle_and_state(query_history)
  query_history.save_state(state)
  error_msg, log = None, None

  if request.method == 'POST':
    if not query_history.is_success():
      msg = _('This query is %(state)s. Results unavailable.') % {'state': state}
      raise PopupException(msg)

    db = dbms.get(request.user, query_history.get_query_server_config())
    form = beeswax.forms.SaveResultsForm(request.POST, db=db, fs=request.fs)

    if request.POST.get('cancel'):
      return format_preserving_redirect(request, '/%s/watch/%s' % (app_name, id))

    if form.is_valid():
      try:
        handle, state = _get_query_handle_and_state(query_history)
        result_meta = db.get_results_metadata(handle)
      except Exception, ex:
        raise PopupException(_('Cannot find query: %s') % ex)

      try:
        if form.cleaned_data['save_target'] == form.SAVE_TYPE_DIR:
          target_dir = form.cleaned_data['target_dir']
          query_history = db.insert_query_into_directory(query_history, target_dir)
          redirected = redirect(reverse('beeswax:watch_query', args=[query_history.id]) \
                                + '?on_success_url=' + reverse('filebrowser.views.view', kwargs={'path': target_dir}))
        elif form.cleaned_data['save_target'] == form.SAVE_TYPE_TBL:
          redirected = db.create_table_as_a_select(request, query_history, form.cleaned_data['target_table'], result_meta)
      except Exception, ex:
        error_msg, log = expand_exception(ex, db)
        raise PopupException(_('The result could not be saved: %s.') % log, detail=ex)

      return redirected
Beispiel #28
0
def clone_design(request, design_id):
  """Clone a design belonging to any user"""
  design = authorized_get_design(request, design_id)

  if design is None:
    LOG.error('Cannot clone non-existent design %s' % (design_id,))
    return list_designs(request)

  copy = design.clone(request.user)
  copy.save()

  copy_doc = Document.objects.link(copy,
      owner=copy.owner,
      name=copy.name,
      description=copy.desc,
      extra=copy.type)

  messages.info(request, _('Copied design: %(name)s') % {'name': design.name})

  return format_preserving_redirect(request, reverse(get_app_name(request) + ':execute_design', kwargs={'design_id': copy.id}))
Beispiel #29
0
def view(request, path):
    """Dispatches viewing of a path to either index() or fileview(), depending on type."""

    # default_to_home is set in bootstrap.js
    if "default_to_home" in request.GET:
        home_dir_path = request.user.get_home_directory()
        if request.fs.isdir(home_dir_path):
            return format_preserving_redirect(request, urlresolvers.reverse(view, kwargs=dict(path=home_dir_path)))

    try:
        stats = request.fs.stats(path)
        if stats.isDir:
            return listdir(request, path, False)
        else:
            return display(request, path)
    except (IOError, WebHdfsException), e:
        msg = _("Cannot access: %(path)s.") % {"path": escape(path)}
        if request.user.is_superuser and not request.user == request.fs.superuser:
            msg += _(' Note: you are a Hue admin but not a HDFS superuser (which is "%(superuser)s").') % {
                "superuser": request.fs.superuser
            }
        raise PopupException(msg, detail=e)
Beispiel #30
0
def save_results(request, id):
    """
  Save the results of a query to an HDFS directory
  """
    query_history = authorized_get_history(request, id, must_exist=True)

    server_id, state = _get_query_handle_and_state(query_history)
    query_history.save_state(state)
    error_msg, log = None, None

    if request.method == "POST":
        # Make sure the result is available.
        # Note that we may still hit errors during the actual save
        if not query_history.is_success():
            if query_history.is_failure():
                msg = _("This query has %(state)s. Results unavailable.") % {"state": state}
            else:
                msg = _("The result of this query is not available yet.")
            raise PopupException(msg)

        db = dbms.get(request.user, query_history.get_query_server_config())
        form = beeswax.forms.SaveResultsForm(request.POST, db=db)

        # Cancel goes back to results
        if request.POST.get("cancel"):
            return format_preserving_redirect(request, "/beeswax/watch/%s" % (id,))

        if form.is_valid():
            # Do save
            # 1. Get the results metadata
            assert request.POST.get("save")
            try:
                handle, state = _get_query_handle_and_state(query_history)
                result_meta = db.get_results_metadata(handle)
            except Exception, ex:
                LOG.exception(ex)
                raise PopupException(_("Cannot find query."))
            if result_meta.table_dir:
                result_meta.table_dir = request.fs.urlsplit(result_meta.table_dir)[2]

            # 2. Check for partitioned tables
            if result_meta.table_dir is None:
                raise PopupException(
                    _(
                        "Saving results from a partitioned table is not supported. You may copy from the HDFS location manually."
                    )
                )

            # 3. Actual saving of results
            try:
                if form.cleaned_data["save_target"] == form.SAVE_TYPE_DIR:
                    # To dir
                    if result_meta.in_tablename:
                        raise PopupException(
                            _(
                                "Saving results from a query with no MapReduce jobs is not supported. "
                                "You may copy manually from the HDFS location %(path)s."
                            )
                            % {"path": result_meta.table_dir}
                        )
                    target_dir = form.cleaned_data["target_dir"]
                    request.fs.rename_star(result_meta.table_dir, target_dir)
                    LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, target_dir))
                    query_history.save_state(models.QueryHistory.STATE.expired)
                    return redirect(reverse("filebrowser.views.view", kwargs={"path": target_dir}))
                elif form.cleaned_data["save_target"] == form.SAVE_TYPE_TBL:
                    # To new table
                    try:
                        return _save_results_ctas(
                            request, query_history, form.cleaned_data["target_table"], result_meta
                        )
                    except Exception, bex:
                        LOG.exception(bex)
                        error_msg, log = expand_exception(bex, db)
            except WebHdfsException, ex:
                raise PopupException(_("The table could not be saved."), detail=ex)
            except IOError, ex:
                LOG.exception(ex)
                error_msg = str(ex)
Beispiel #31
0
    if request.method == "POST" or (
        not query_history.is_finished() and query_history.is_success() and not query_history.has_results
    ):
        try:
            query_history = db.execute_next_statement(query_history)
        except Exception, ex:
            pass

    # Check query state
    handle, state = _get_query_handle_and_state(query_history)
    query_history.save_state(state)

    if query_history.is_failure():
        # When we fetch, Beeswax server will throw us a Exception, which has the
        # log we want to display.
        return format_preserving_redirect(request, results_url, request.GET)
    elif query_history.is_finished() or (query_history.is_success() and query_history.has_results):
        return format_preserving_redirect(request, on_success_url, request.GET)

    # Still running
    log = db.get_log(handle)

    # Keep waiting
    # - Translate context into something more meaningful (type, data)
    query_context = _parse_query_context(context_param)

    return render(
        "watch_wait.mako",
        request,
        {
            "query": query_history,
Beispiel #32
0
class HiveServer2Dbms(object):
    def __init__(self, client, server_type):
        self.client = client
        self.server_type = server_type
        self.server_name = self.client.query_server['server_name']

    @classmethod
    def to_matching_wildcard(cls, identifier=None):
        cleaned = "*"
        if identifier and identifier.strip() != "*":
            cleaned = "*%s*" % identifier.strip().strip("*")
        return cleaned

    def get_databases(self, database_names='*'):
        if database_names != '*':
            database_names = self.to_matching_wildcard(database_names)

        databases = self.client.get_databases(schemaName=database_names)

        if len(databases) <= APPLY_NATURAL_SORT_MAX.get():
            databases = apply_natural_sort(databases)

        return databases

    def get_database(self, database):
        return self.client.get_database(database)

    def get_tables_meta(self,
                        database='default',
                        table_names='*',
                        table_types=None):
        if self.server_name == 'beeswax':
            identifier = self.to_matching_wildcard(table_names)
        else:
            identifier = None
        tables = self.client.get_tables_meta(database, identifier, table_types)
        if len(tables) <= APPLY_NATURAL_SORT_MAX.get():
            tables = apply_natural_sort(tables, key='name')
        return tables

    def get_tables(self,
                   database='default',
                   table_names='*',
                   table_types=None):
        if self.server_name == 'beeswax':
            identifier = self.to_matching_wildcard(table_names)
        else:
            identifier = None
        tables = self.client.get_tables(database, identifier, table_types)
        if len(tables) <= APPLY_NATURAL_SORT_MAX.get():
            tables = apply_natural_sort(tables)
        return tables

    def get_table(self, database, table_name):
        return self.client.get_table(database, table_name)

    def alter_table(self,
                    database,
                    table_name,
                    new_table_name=None,
                    comment=None,
                    tblproperties=None):
        hql = 'ALTER TABLE `%s`.`%s`' % (database, table_name)

        if new_table_name:
            table_name = new_table_name
            hql += ' RENAME TO `%s`' % table_name
        elif comment:
            hql += " SET TBLPROPERTIES ('comment' = '%s')" % comment
        elif tblproperties:
            hql += " SET TBLPROPERTIES (%s)" % ' ,'.join(
                "'%s' = '%s'" % (k, v) for k, v in tblproperties.items())

        timeout = SERVER_CONN_TIMEOUT.get()
        query = hql_query(hql)
        handle = self.execute_and_wait(query, timeout_sec=timeout)

        if handle:
            self.close(handle)
        else:
            msg = _("Failed to execute alter table statement: %s") % hql
            raise QueryServerException(msg)

        return self.client.get_table(database, table_name)

    def get_column(self, database, table_name, column_name):
        table = self.client.get_table(database, table_name)
        for col in table.cols:
            if col.name == column_name:
                return col
        return None

    def alter_column(self,
                     database,
                     table_name,
                     column_name,
                     new_column_name,
                     column_type,
                     comment=None,
                     partition_spec=None,
                     cascade=False):
        hql = 'ALTER TABLE `%s`.`%s`' % (database, table_name)

        if partition_spec:
            hql += ' PARTITION (%s)' % partition_spec

        hql += ' CHANGE COLUMN `%s` `%s` %s' % (column_name, new_column_name,
                                                column_type.upper())

        if comment:
            hql += " COMMENT '%s'" % comment

        if cascade:
            hql += ' CASCADE'

        timeout = SERVER_CONN_TIMEOUT.get()
        query = hql_query(hql)
        handle = self.execute_and_wait(query, timeout_sec=timeout)

        if handle:
            self.close(handle)
        else:
            msg = _("Failed to execute alter column statement: %s") % hql
            raise QueryServerException(msg)

        return self.get_column(database, table_name, new_column_name)

    def execute_query(self, query, design):
        return self.execute_and_watch(query, design=design)

    def select_star_from(self, database, table, limit=10000):
        if table.partition_keys:  # Filter on max number of partitions for partitioned tables
            hql = self._get_sample_partition_query(
                database, table, limit=limit)  # Currently need a limit
        else:
            hql = "SELECT * FROM `%s`.`%s` LIMIT %d;" % (database, table.name,
                                                         limit)
        return self.execute_statement(hql)

    def get_select_star_query(self, database, table, limit=10000):
        if table.partition_keys:  # Filter on max number of partitions for partitioned tables
            hql = self._get_sample_partition_query(
                database, table, limit=limit)  # Currently need a limit
        else:
            hql = "SELECT * FROM `%s`.`%s` LIMIT %d;" % (database, table.name,
                                                         limit)
        return hql

    def execute_statement(self, hql):
        if self.server_name == 'impala':
            query = hql_query(hql, QUERY_TYPES[1])
        else:
            query = hql_query(hql, QUERY_TYPES[0])
        return self.execute_and_watch(query)

    def fetch(self, query_handle, start_over=False, rows=None):
        no_start_over_support = [
            config_variable
            for config_variable in self.get_default_configuration(False)
            if config_variable.key == 'support_start_over'
            and config_variable.value == 'false'
        ]
        if no_start_over_support:
            start_over = False

        return self.client.fetch(query_handle, start_over, rows)

    def close_operation(self, query_handle):
        return self.client.close_operation(query_handle)

    def open_session(self, user):
        return self.client.open_session(user)

    def close_session(self, session):
        resp = self.client.close_session(session)

        if resp.status.statusCode != 0:
            session.status_code = resp.status.statusCode
            session.save()
            raise QueryServerException(
                _('Failed to close session, session handle may already be closed or timed out.'
                  ))
        else:
            session.status_code = 4  # Set to ttypes.TStatusCode.INVALID_HANDLE_STATUS
            session.save()

        return session

    def cancel_operation(self, query_handle):
        resp = self.client.cancel_operation(query_handle)
        if self.client.query_server['server_name'] == 'impala':
            resp = self.client.close_operation(query_handle)
        return resp

    def get_sample(self, database, table, column=None, nested=None, limit=100):
        result = None
        hql = None

        # Filter on max # of partitions for partitioned tables
        column = '`%s`' % column if column else '*'
        if table.partition_keys:
            hql = self._get_sample_partition_query(database, table, column,
                                                   limit)
        elif self.server_name == 'impala':
            if column or nested:
                from impala.dbms import ImpalaDbms
                select_clause, from_clause = ImpalaDbms.get_nested_select(
                    database, table.name, column, nested)
                hql = 'SELECT %s FROM %s LIMIT %s;' % (select_clause,
                                                       from_clause, limit)
            else:
                hql = "SELECT * FROM `%s`.`%s` LIMIT %s;" % (database,
                                                             table.name, limit)
        else:
            hql = "SELECT %s FROM `%s`.`%s` LIMIT %s;" % (column, database,
                                                          table.name, limit)
            # TODO: Add nested select support for HS2

        if hql:
            query = hql_query(hql)
            handle = self.execute_and_wait(query, timeout_sec=5.0)

            if handle:
                result = self.fetch(handle, rows=100)
                self.close(handle)

        return result

    def _get_sample_partition_query(self,
                                    database,
                                    table,
                                    column='*',
                                    limit=100):
        max_parts = QUERY_PARTITIONS_LIMIT.get()
        partitions = self.get_partitions(database,
                                         table,
                                         partition_spec=None,
                                         max_parts=max_parts)

        if partitions and max_parts:
            # Need to reformat partition specs for where clause syntax
            partition_specs = [
                part.partition_spec.replace(',', ' AND ')
                for part in partitions
            ]
            partition_filters = ' OR '.join([
                '(%s)' % partition_spec for partition_spec in partition_specs
            ])
            partition_clause = 'WHERE %s' % partition_filters
        else:
            partition_clause = ''

        return "SELECT %(column)s FROM `%(database)s`.`%(table)s` %(partition_clause)s LIMIT %(limit)s" % \
          {'column': column, 'database': database, 'table': table.name, 'partition_clause': partition_clause, 'limit': limit}

    def analyze_table(self, database, table):
        if self.server_name == 'impala':
            hql = 'COMPUTE STATS `%(database)s`.`%(table)s`' % {
                'database': database,
                'table': table
            }
        else:
            table_obj = self.get_table(database, table)
            partition_spec = ''
            if table_obj.partition_keys:
                partition_keys = ','.join(
                    [part.name for part in table_obj.partition_keys])
                partition_spec = 'PARTITION(%(partition_keys)s)' % {
                    'partition_keys': partition_keys
                }

            hql = 'ANALYZE TABLE `%(database)s`.`%(table)s` %(partition_spec)s COMPUTE STATISTICS' % \
                  {'database': database, 'table': table, 'partition_spec': partition_spec}

        return self.execute_statement(hql)

    def analyze_table_columns(self, database, table):
        if self.server_name == 'impala':
            hql = 'COMPUTE STATS `%(database)s`.`%(table)s`' % {
                'database': database,
                'table': table
            }
        else:
            table_obj = self.get_table(database, table)
            if table_obj.partition_keys:
                raise NotImplementedError(
                    'HIVE-4861: COMPUTE STATISTICS FOR COLUMNS not supported for partitioned-tables.'
                )
            else:
                hql = 'ANALYZE TABLE `%(database)s`.`%(table)s` COMPUTE STATISTICS FOR COLUMNS' % {
                    'database': database,
                    'table': table
                }

        return self.execute_statement(hql)

    def get_table_stats(self, database, table):
        stats = []

        if self.server_name == 'impala':
            hql = 'SHOW TABLE STATS `%(database)s`.`%(table)s`' % {
                'database': database,
                'table': table
            }

            query = hql_query(hql)
            handle = self.execute_and_wait(query, timeout_sec=5.0)

            if handle:
                result = self.fetch(handle, rows=100)
                self.close(handle)
                stats = list(result.rows())
        else:
            table = self.get_table(database, table)
            stats = table.stats

        return stats

    def get_table_columns_stats(self, database, table, column):
        if self.server_name == 'impala':
            hql = 'SHOW COLUMN STATS `%(database)s`.`%(table)s`' % {
                'database': database,
                'table': table
            }
        else:
            hql = 'DESCRIBE FORMATTED `%(database)s`.`%(table)s` `%(column)s`' % {
                'database': database,
                'table': table,
                'column': column
            }

        query = hql_query(hql)
        handle = self.execute_and_wait(query, timeout_sec=5.0)

        if handle:
            result = self.fetch(handle, rows=100)
            self.close(handle)
            data = list(result.rows())

            if self.server_name == 'impala':
                data = [col for col in data if col[0] == column][0]
                return [
                    {
                        'col_name': data[0]
                    },
                    {
                        'data_type': data[1]
                    },
                    {
                        'distinct_count': data[2]
                    },
                    {
                        'num_nulls': data[3]
                    },
                    {
                        'max_col_len': data[4]
                    },
                    {
                        'avg_col_len': data[5]
                    },
                ]
            else:
                return [{
                    'col_name': data[2][0]
                }, {
                    'data_type': data[2][1]
                }, {
                    'min': data[2][2]
                }, {
                    'max': data[2][3]
                }, {
                    'num_nulls': data[2][4]
                }, {
                    'distinct_count': data[2][5]
                }, {
                    'avg_col_len': data[2][6]
                }, {
                    'max_col_len': data[2][7]
                }, {
                    'num_trues': data[2][8]
                }, {
                    'num_falses': data[2][9]
                }]
        else:
            return []

    def get_top_terms(self, database, table, column, limit=30, prefix=None):
        limit = min(limit, 100)
        prefix_match = ''
        if prefix:
            prefix_match = "WHERE CAST(%(column)s AS STRING) LIKE '%(prefix)s%%'" % {
                'column': column,
                'prefix': prefix
            }

        hql = 'SELECT %(column)s, COUNT(*) AS ct FROM `%(database)s`.`%(table)s` %(prefix_match)s GROUP BY %(column)s ORDER BY ct DESC LIMIT %(limit)s' % {
            'database': database,
            'table': table,
            'column': column,
            'prefix_match': prefix_match,
            'limit': limit,
        }

        query = hql_query(hql)
        handle = self.execute_and_wait(query,
                                       timeout_sec=60.0)  # Hive is very slow

        if handle:
            result = self.fetch(handle, rows=limit)
            self.close(handle)
            return list(result.rows())
        else:
            return []

    def drop_table(self, database, table):
        if table.is_view:
            hql = "DROP VIEW `%s`.`%s`" % (
                database,
                table.name,
            )
        else:
            hql = "DROP TABLE `%s`.`%s`" % (
                database,
                table.name,
            )

        return self.execute_statement(hql)

    def load_data(self, database, table, form, design):
        hql = "LOAD DATA INPATH"
        hql += " '%s'" % form.cleaned_data['path']
        if form.cleaned_data['overwrite']:
            hql += " OVERWRITE"
        hql += " INTO TABLE "
        hql += "`%s`.`%s`" % (
            database,
            table.name,
        )
        if form.partition_columns:
            hql += " PARTITION ("
            vals = []
            for key, column_name in form.partition_columns.iteritems():
                vals.append("%s='%s'" % (column_name, form.cleaned_data[key]))
            hql += ", ".join(vals)
            hql += ")"

        query = hql_query(hql, database)
        design.data = query.dumps()
        design.save()

        return self.execute_query(query, design)

    def drop_tables(self, database, tables, design):
        hql = []

        for table in tables:
            if table.is_view:
                hql.append("DROP VIEW `%s`.`%s`" % (
                    database,
                    table.name,
                ))
            else:
                hql.append("DROP TABLE `%s`.`%s`" % (
                    database,
                    table.name,
                ))
        query = hql_query(';'.join(hql), database)
        design.data = query.dumps()
        design.save()

        return self.execute_query(query, design)

    def drop_database(self, database):
        return self.execute_statement("DROP DATABASE `%s`" % database)

    def drop_databases(self, databases, design):
        hql = []

        for database in databases:
            hql.append("DROP DATABASE `%s`" % database)
        query = hql_query(';'.join(hql), database)
        design.data = query.dumps()
        design.save()

        return self.execute_query(query, design)

    def _get_and_validate_select_query(self, design, query_history):
        query = design.get_query_statement(query_history.statement_number)
        if not query.strip().lower().startswith('select'):
            raise Exception(
                _('Only SELECT statements can be saved. Provided query: %(query)s'
                  ) % {'query': query})

        return query

    def insert_query_into_directory(self, query_history, target_dir):
        design = query_history.design.get_design()
        database = design.query['database']
        self.use(database)
        query = self._get_and_validate_select_query(design, query_history)
        hql = "INSERT OVERWRITE DIRECTORY '%s' %s" % (target_dir, query)
        return self.execute_statement(hql)

    def create_table_as_a_select(self, request, query_history, target_database,
                                 target_table, result_meta):
        design = query_history.design.get_design()
        database = design.query['database']

        # Case 1: Hive Server 2 backend or results straight from an existing table
        if result_meta.in_tablename:
            self.use(database)
            query = self._get_and_validate_select_query(design, query_history)
            hql = 'CREATE TABLE `%s`.`%s` AS %s' % (target_database,
                                                    target_table, query)
            query_history = self.execute_statement(hql)
        else:
            # FYI: this path is dead since moving to HiveServer2
            #
            # Case 2: The results are in some temporary location
            # Beeswax backward compatibility and optimization
            # 1. Create table
            cols = ''
            schema = result_meta.schema
            for i, field in enumerate(schema.fieldSchemas):
                if i != 0:
                    cols += ',\n'
                cols += '`%s` %s' % (field.name, field.type)

            # The representation of the delimiter is messy.
            # It came from Java as a string, which might has been converted from an integer.
            # So it could be "1" (^A), or "10" (\n), or "," (a comma literally).
            delim = result_meta.delim
            if not delim.isdigit():
                delim = str(ord(delim))

            hql = '''
            CREATE TABLE `%s` (
            %s
            )
            ROW FORMAT DELIMITED
            FIELDS TERMINATED BY '\%s'
            STORED AS TextFile
            ''' % (target_table, cols, delim.zfill(3))

            query = hql_query(hql)
            self.execute_and_wait(query)

            try:
                # 2. Move the results into the table's storage
                table_obj = self.get_table('default', target_table)
                table_loc = request.fs.urlsplit(table_obj.path_location)[2]
                result_dir = request.fs.urlsplit(result_meta.table_dir)[2]
                request.fs.rename_star(result_dir, table_loc)
                LOG.debug("Moved results from %s to %s" %
                          (result_meta.table_dir, table_loc))
                request.info(
                    request,
                    _('Saved query results as new table %(table)s.') %
                    {'table': target_table})
                query_history.save_state(QueryHistory.STATE.expired)
            except Exception, ex:
                query = hql_query('DROP TABLE `%s`' % target_table)
                try:
                    self.execute_and_wait(query)
                except Exception, double_trouble:
                    LOG.exception('Failed to drop table "%s" as well: %s' %
                                  (target_table, double_trouble))
                raise ex
            url = format_preserving_redirect(request,
                                             reverse('metastore:index'))
Beispiel #33
0
    table_loc = request.fs.urlsplit(table_obj.path_location)[2]
    request.fs.rename_star(result_meta.table_dir, table_loc)
    LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc))
    messages.info(request, _('Saved query results as new table %(table)s') % {'table': target_table})
    query_history.save_state(models.QueryHistory.STATE.expired)
  except Exception, ex:
    LOG.error('Error moving data into storage of table %s. Will drop table.' % (target_table,))
    query = hql_query('DROP TABLE `%s`' % (target_table,))
    try:
      db.execute_directly(query)        # Don't wait for results
    except Exception, double_trouble:
      LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble))
    raise ex

  # Show tables upon success
  return format_preserving_redirect(request, urlresolvers.reverse(show_tables))


def confirm_query(request, query, on_success_url=None):
  """
  Used by other forms to confirm a query before it's executed.
  The form is the same as execute_query below.

  query - The HQL about to be executed
  on_success_url - The page to go to upon successful execution
  """
  mform = QueryForm()
  mform.bind()
  mform.query.initial = dict(query=query)
  return render('execute.mako', request, {
    'form': mform,
Beispiel #34
0
    table_loc = request.fs.urlsplit(table_obj.path_location)[2]
    request.fs.rename_star(result_meta.table_dir, table_loc)
    LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc))
    messages.info(request, _('Saved query results as new table %(table)s') % {'table': target_table})
    query_history.save_state(models.QueryHistory.STATE.expired)
  except Exception, ex:
    LOG.error('Error moving data into storage of table %s. Will drop table.' % (target_table,))
    query = hql_query('DROP TABLE `%s`' % (target_table,))
    try:
      db.execute_directly(query)        # Don't wait for results
    except Exception, double_trouble:
      LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble))
    raise ex

  # Show tables upon success
  return format_preserving_redirect(request, reverse(get_app_name(request) + ':show_tables'))


def confirm_query(request, query, on_success_url=None):
  """
  Used by other forms to confirm a query before it's executed.
  The form is the same as execute_query below.

  query - The HQL about to be executed
  on_success_url - The page to go to upon successful execution
  """
  mform = QueryForm()
  mform.bind()
  mform.query.initial = dict(query=query)

  return render('execute.mako', request, {
Beispiel #35
0
def watch_query(request, id):
    """
  Wait for the query to finish and (by default) displays the results of query id.
  It understands the optional GET params:

    on_success_url
      If given, it will be displayed when the query is successfully finished.
      Otherwise, it will display the view query results page by default.

    context
      A string of "name:data" that describes the context
      that generated this query result. It may be:
        - "table":"<table_name>"
        - "design":<design_id>

  All other GET params will be passed to on_success_url (if present).
  """
    # Coerce types; manage arguments
    id = int(id)

    query_history = authorized_get_history(request, id, must_exist=True)

    # GET param: context.
    context_param = request.GET.get('context', '')

    # GET param: on_success_url. Default to view_results
    results_url = urlresolvers.reverse(view_results,
                                       kwargs=dict(id=str(id),
                                                   first_row=0,
                                                   last_result_len=0))
    on_success_url = request.GET.get('on_success_url')
    if not on_success_url:
        on_success_url = results_url

    # Get the server_id
    server_id, state = _get_server_id_and_state(query_history)
    query_history.save_state(state)

    # Query finished?
    if state == QueryHistory.STATE.expired:
        raise PopupException(_("The result of this query has expired."))
    elif state == QueryHistory.STATE.available:
        return format_preserving_redirect(request, on_success_url, request.GET)
    elif state == QueryHistory.STATE.failed:
        # When we fetch, Beeswax server will throw us a BeeswaxException, which has the
        # log we want to display.
        return format_preserving_redirect(request, results_url, request.GET)

    # Still running
    log = db_utils.db_client(
        query_history.get_query_server()).get_log(server_id)

    # Keep waiting
    # - Translate context into something more meaningful (type, data)
    context = _parse_query_context(context_param)
    return render(
        'watch_wait.mako', request, {
            'query': query_history,
            'fwd_params': request.GET.urlencode(),
            'log': log,
            'hadoop_jobs': _parse_out_hadoop_jobs(log),
            'query_context': context,
        })
Beispiel #36
0
def flash_redirect(request):
    sleeper(request)
    request.flash.put('redirect test')
    return format_preserving_redirect(request, '/jframegallery/')
Beispiel #37
0
def save_results(request, id):
  """
  Save the results of a query to an HDFS directory
  """
  query_history = authorized_get_history(request, id, must_exist=True)

  server_id, state = _get_query_handle_and_state(query_history)
  query_history.save_state(state)
  error_msg, log = None, None

  if request.method == 'POST':
    # Make sure the result is available.
    # Note that we may still hit errors during the actual save
    if not query_history.is_success():
      if query_history.is_failure():
        msg = _('This query has %(state)s. Results unavailable.') % {'state': state}
      else:
        msg = _('The result of this query is not available yet.')
      raise PopupException(msg)

    db = dbms.get(request.user, query_history.get_query_server_config())
    form = beeswax.forms.SaveResultsForm(request.POST, db=db)

    # Cancel goes back to results
    if request.POST.get('cancel'):
      return format_preserving_redirect(request, '/beeswax/watch/%s' % (id,))

    if form.is_valid():
      # Do save
      # 1. Get the results metadata
      assert request.POST.get('save')
      try:
        handle, state = _get_query_handle_and_state(query_history)
        result_meta = db.get_results_metadata(handle)
      except Exception, ex:
        LOG.exception(ex)
        raise PopupException(_('Cannot find query.'))
      if result_meta.table_dir:
        result_meta.table_dir = request.fs.urlsplit(result_meta.table_dir)[2]

      # 2. Check for partitioned tables
      if result_meta.table_dir is None:
        raise PopupException(_('Saving results from a partitioned table is not supported. You may copy from the HDFS location manually.'))

      # 3. Actual saving of results
      try:
        if form.cleaned_data['save_target'] == form.SAVE_TYPE_DIR:
          # To dir
          if result_meta.in_tablename:
            raise PopupException(_('Saving results from a query with no MapReduce jobs is not supported. '
                                   'You may copy manually from the HDFS location %(path)s.') % {'path': result_meta.table_dir})
          target_dir = form.cleaned_data['target_dir']
          request.fs.rename_star(result_meta.table_dir, target_dir)
          LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, target_dir))
          query_history.save_state(models.QueryHistory.STATE.expired)
          return redirect(reverse('filebrowser.views.view', kwargs={'path': target_dir}))
        elif form.cleaned_data['save_target'] == form.SAVE_TYPE_TBL:
          # To new table
          try:
            return _save_results_ctas(request, query_history, form.cleaned_data['target_table'], result_meta)
          except Exception, bex:
            LOG.exception(bex)
            error_msg, log = expand_exception(bex, db)
      except WebHdfsException, ex:
        raise PopupException(_('The table could not be saved.'), detail=ex)
      except IOError, ex:
        LOG.exception(ex)
        error_msg = str(ex)
Beispiel #38
0
  # Go to next statement if asked to continue or when a statement with no dataset finished.
  if request.method == 'POST' or (not query_history.is_finished() and query_history.is_success() and not query_history.has_results):
    try:
      query_history = db.execute_next_statement(query_history)
    except Exception, ex:
      pass

  # Check query state
  handle, state = _get_query_handle_and_state(query_history)
  query_history.save_state(state)

  if query_history.is_failure():
    # When we fetch, Beeswax server will throw us a Exception, which has the
    # log we want to display.
    return format_preserving_redirect(request, results_url, request.GET)
  elif query_history.is_finished() or (query_history.is_success() and query_history.has_results):
    return format_preserving_redirect(request, on_success_url, request.GET)

  # Still running
  log = db.get_log(handle)

  # Keep waiting
  # - Translate context into something more meaningful (type, data)
  query_context = _parse_query_context(context_param)

  return render('watch_wait.mako', request, {
                'query': query_history,
                'fwd_params': request.GET.urlencode(),
                'log': log,
                'hadoop_jobs': _parse_out_hadoop_jobs(log),
Beispiel #39
0
class HiveServer2Dbms(object):
    def __init__(self, client, server_type):
        self.client = client
        self.server_type = server_type
        self.server_name = self.client.query_server['server_name']

    def get_table(self, database, table_name):
        # DB name not supported in SHOW PARTITIONS required in Table
        self.use(database)

        return self.client.get_table(database, table_name)

    def get_tables(self, database='default', table_names='.*'):
        return self.client.get_tables(database, table_names)

    def get_databases(self):
        return self.client.get_databases()

    def execute_query(self, query, design):
        return self.execute_and_watch(query, design=design)

    def select_star_from(self, database, table):
        hql = "SELECT * FROM `%s.%s` %s" % (
            database, table.name, self._get_browse_limit_clause(table))
        return self.execute_statement(hql)

    def execute_statement(self, hql):
        if self.server_name == 'impala':
            query = hql_query(hql, QUERY_TYPES[1])
        else:
            query = hql_query(hql, QUERY_TYPES[0])
        return self.execute_and_watch(query)

    def fetch(self, query_handle, start_over=False, rows=None):
        no_start_over_support = [
            config_variable
            for config_variable in self.get_default_configuration(False)
            if config_variable.key == 'support_start_over'
            and config_variable.value == 'false'
        ]
        if no_start_over_support:
            start_over = False

        return self.client.fetch(query_handle, start_over, rows)

    def close_operation(self, query_handle):
        return self.client.close_operation(query_handle)

    def open_session(self, user):
        return self.client.open_session(user)

    def close_session(self, session):
        return self.client.close_session(session)

    def cancel_operation(self, query_handle):
        resp = self.client.cancel_operation(query_handle)
        if self.client.query_server['server_name'] == 'impala':
            resp = self.client.close_operation(query_handle)
        return resp

    def get_sample(self, database, table):
        """No samples if it's a view (HUE-526)"""
        if not table.is_view:
            limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get())
            hql = "SELECT * FROM %s.%s LIMIT %s" % (database, table.name,
                                                    limit)
            query = hql_query(hql)
            handle = self.execute_and_wait(query, timeout_sec=5.0)

            if handle:
                result = self.fetch(handle, rows=100)
                self.close(handle)
                return result

    def analyze_table_table(self, database, table):
        hql = 'analyze table `%(database)s.%(table_name)` compute statistics' % {
            'database': database,
            'table_name': table.name
        }
        query = hql_query(hql, database)

        return self.execute_query(query)

    def analyze_table_column(self):
        # analyze table <table_name> partition <part_name> compute statistics for columns <col_name1>, <col_name2>...
        pass

    def drop_table(self, database, table):
        if table.is_view:
            hql = "DROP VIEW `%s.%s`" % (
                database,
                table.name,
            )
        else:
            hql = "DROP TABLE `%s.%s`" % (
                database,
                table.name,
            )

        return self.execute_statement(hql)

    def load_data(self, database, table, form, design):
        hql = "LOAD DATA INPATH"
        hql += " '%s'" % form.cleaned_data['path']
        if form.cleaned_data['overwrite']:
            hql += " OVERWRITE"
        hql += " INTO TABLE "
        hql += "`%s.%s`" % (
            database,
            table.name,
        )
        if form.partition_columns:
            hql += " PARTITION ("
            vals = []
            for key, column_name in form.partition_columns.iteritems():
                vals.append("%s='%s'" % (column_name, form.cleaned_data[key]))
            hql += ", ".join(vals)
            hql += ")"

        query = hql_query(hql, database)
        design.data = query.dumps()
        design.save()

        return self.execute_query(query, design)

    def drop_tables(self, database, tables, design):
        hql = []

        for table in tables:
            if table.is_view:
                hql.append("DROP VIEW `%s.%s`" % (
                    database,
                    table.name,
                ))
            else:
                hql.append("DROP TABLE `%s.%s`" % (
                    database,
                    table.name,
                ))
        query = hql_query(';'.join(hql), database)
        design.data = query.dumps()
        design.save()

        return self.execute_query(query, design)

    def invalidate_tables(self, database, tables):
        for table in tables:
            hql = "INVALIDATE METADATA %s.%s" % (
                database,
                table,
            )
            query = hql_query(hql, database, query_type=QUERY_TYPES[1])

            handle = self.execute_and_wait(query, timeout_sec=10.0)

            if handle:
                self.close(handle)

    def drop_database(self, database):
        return self.execute_statement("DROP DATABASE `%s`" % database)

    def drop_databases(self, databases, design):
        hql = []

        for database in databases:
            hql.append("DROP DATABASE `%s`" % database)
        query = hql_query(';'.join(hql), database)
        design.data = query.dumps()
        design.save()

        return self.execute_query(query, design)

    def insert_query_into_directory(self, query_history, target_dir):
        design = query_history.design.get_design()
        database = design.query['database']
        self.use(database)

        hql = "INSERT OVERWRITE DIRECTORY '%s' %s" % (target_dir,
                                                      design.query['query'])
        return self.execute_statement(hql)

    def create_table_as_a_select(self, request, query_history, target_database,
                                 target_table, result_meta):
        design = query_history.design.get_design()
        database = design.query['database']

        # Case 1: Hive Server 2 backend or results straight from an existing table
        if result_meta.in_tablename:
            self.use(database)

            hql = 'CREATE TABLE %s.%s AS %s' % (target_database, target_table,
                                                design.query['query'])
            query_history = self.execute_statement(hql)
        else:
            # Case 2: The results are in some temporary location
            # Beeswax backward compatibility and optimization
            # 1. Create table
            cols = ''
            schema = result_meta.schema
            for i, field in enumerate(schema.fieldSchemas):
                if i != 0:
                    cols += ',\n'
                cols += '`%s` %s' % (field.name, field.type)

            # The representation of the delimiter is messy.
            # It came from Java as a string, which might has been converted from an integer.
            # So it could be "1" (^A), or "10" (\n), or "," (a comma literally).
            delim = result_meta.delim
            if not delim.isdigit():
                delim = str(ord(delim))

            hql = '''
            CREATE TABLE `%s` (
            %s
            )
            ROW FORMAT DELIMITED
            FIELDS TERMINATED BY '\%s'
            STORED AS TextFile
            ''' % (target_table, cols, delim.zfill(3))

            query = hql_query(hql)
            self.execute_and_wait(query)

            try:
                # 2. Move the results into the table's storage
                table_obj = self.get_table('default', target_table)
                table_loc = request.fs.urlsplit(table_obj.path_location)[2]
                result_dir = request.fs.urlsplit(result_meta.table_dir)[2]
                request.fs.rename_star(result_dir, table_loc)
                LOG.debug("Moved results from %s to %s" %
                          (result_meta.table_dir, table_loc))
                request.info(
                    request,
                    _('Saved query results as new table %(table)s.') %
                    {'table': target_table})
                query_history.save_state(QueryHistory.STATE.expired)
            except Exception, ex:
                query = hql_query('DROP TABLE `%s`' % target_table)
                try:
                    self.execute_and_wait(query)
                except Exception, double_trouble:
                    LOG.exception('Failed to drop table "%s" as well: %s' %
                                  (target_table, double_trouble))
                raise ex
            url = format_preserving_redirect(request,
                                             reverse('metastore:index'))
Beispiel #40
0
def save_results(request, id):
  """
  Save the results of a query to an HDFS directory
  """
  id = int(id)
  query_history = models.QueryHistory.objects.get(id=id)
  if query_history.owner != request.user:
    raise PopupException('This action is only available to the user who submitted the query.')
  _, state = _get_server_id_and_state(query_history)
  query_history.save_state(state)
  error_msg, log = None, None

  if request.method == 'POST':
    # Make sure the result is available.
    # Note that we may still hit errors during the actual save
    if state != models.QueryHistory.STATE.available:
      if state in (models.QueryHistory.STATE.failed, models.QueryHistory.STATE.expired):
        msg = 'This query has %s. Results unavailable.' % (state,)
      else:
        msg = 'The result of this query is not available yet.'
      raise PopupException(msg)

    form = beeswax.forms.SaveResultsForm(request.POST)

    # Cancel goes back to results
    if request.POST.get('cancel'):
      return format_preserving_redirect(request, '/beeswax/watch/%s' % (id,))
    if form.is_valid():
      # Do save
      # 1. Get the results metadata
      assert request.POST.get('save')
      handle = QueryHandle(id=query_history.server_id, log_context=query_history.log_context)
      try:
        result_meta = db_utils.db_client().get_results_metadata(handle)
      except QueryNotFoundException, ex:
        LOG.exception(ex)
        raise PopupException('Cannot find query.')
      if result_meta.table_dir:
        result_meta.table_dir = request.fs.urlsplit(result_meta.table_dir)[2]

      # 2. Check for partitioned tables
      if result_meta.table_dir is None:
        raise PopupException(
                  'Saving results from a partitioned table is not supported. '
                  'You may copy from the HDFS location manually.')

      # 3. Actual saving of results
      try:
        if form.cleaned_data['save_target'] == form.SAVE_TYPE_DIR:
          # To dir
          if result_meta.in_tablename:
            raise PopupException(
                      'Saving results from a table to a directory is not supported. '
                      'You may copy from the HDFS location manually.')
          target_dir = form.cleaned_data['target_dir']
          request.fs.rename_star(result_meta.table_dir, target_dir)
          LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, target_dir))
          query_history.save_state(models.QueryHistory.STATE.expired)
          fb_url = location_to_url(request, target_dir, strict=False)
          popup = PopupWithJframe('Query results stored in %s' % (target_dir,),
                                  launch_app_name='FileBrowser',
                                  launch_app_url=fb_url)
          return render_injected(list_query_history(request), popup)
        elif form.cleaned_data['save_target'] == form.SAVE_TYPE_TBL:
          # To new table
          try:
            return _save_results_ctas(request,
                                      query_history,
                                      form.cleaned_data['target_table'],
                                      result_meta)
          except BeeswaxException, bex:
            LOG.exception(bex)
            error_msg, log = expand_exception(bex)
      except IOError, ex:
        LOG.exception(ex)
        error_msg = str(ex)
Beispiel #41
0
def watch_query(request, id):
  """
  Wait for the query to finish and (by default) displays the results of query id.
  It understands the optional GET params:

    on_success_url
      If given, it will be displayed when the query is successfully finished.
      Otherwise, it will display the view query results page by default.

    context
      A string of "name:data" that describes the context
      that generated this query result. It may be:
        - "table":"<table_name>"
        - "design":<design_id>

  All other GET params will be passed to on_success_url (if present).
  """
  # Coerce types; manage arguments
  id = int(id)

  # GET param: context.
  context_param = request.GET.get('context', '')

  # GET param: on_success_url. Default to view_results
  results_url = urlresolvers.reverse(view_results, kwargs=dict(id=str(id), first_row=0))
  on_success_url = request.GET.get('on_success_url')
  if not on_success_url:
    on_success_url = results_url

  # Retrieve models from database to get the server_id
  query_history = models.QueryHistory.objects.get(id=id)
  server_id, state = _get_server_id_and_state(query_history)
  query_history.save_state(state)

  # Query finished?
  if state == models.QueryHistory.STATE.expired:
    raise PopupException("The result of this query has expired.")
  elif state == models.QueryHistory.STATE.available:
    return format_preserving_redirect(request, on_success_url, request.GET)
  elif state == models.QueryHistory.STATE.failed:
    # When we fetch, Beeswax server will throw us a BeeswaxException, which has the
    # log we want to display.
    return format_preserving_redirect(request, results_url, request.GET)

  # Still running
  log = db_utils.db_client().get_log(server_id)
  download_urls = {}
  for format in common.DL_FORMATS:
    download_urls[format] = urlresolvers.reverse(download, kwargs=dict(id=str(id), format=format))

  # Keep waiting
  # - Translate context into something more meaningful (type, data)
  context = _parse_query_context(context_param)
  return render('watch_wait.mako', request, {
                      'query': query_history,
                      'fwd_params': request.GET.urlencode(),
                      'download_urls': download_urls,
                      'log': log,
                      'hadoop_jobs': _parse_out_hadoop_jobs(log),
                      'query_context': context,
                    })
Beispiel #42
0
  # Go to next statement if asked to continue or when a statement with no dataset finished.
  if request.method == 'POST' or (not query_history.is_finished() and query_history.is_success() and not query_history.has_results):
    try:
      query_history = db.execute_next_statement(query_history)
    except BeeswaxException, ex:
      pass

  # Check query state
  handle, state = _get_query_handle_and_state(query_history)
  query_history.save_state(state)

  if query_history.is_failure():
    # When we fetch, Beeswax server will throw us a BeeswaxException, which has the
    # log we want to display.
    return format_preserving_redirect(request, results_url, request.GET)
  elif query_history.is_finished() or (query_history.is_success() and query_history.has_results):
    return format_preserving_redirect(request, on_success_url, request.GET)

  # Still running
  log = db.get_log(handle)

  # Keep waiting
  # - Translate context into something more meaningful (type, data)
  query_context = _parse_query_context(context_param)

  return render('watch_wait.mako', request, {
                'query': query_history,
                'fwd_params': request.GET.urlencode(),
                'log': log,
                'hadoop_jobs': _parse_out_hadoop_jobs(log),
Beispiel #43
0
def watch_query(request, id, download_format=None):
    """
    Wait for the query to finish and (by default) displays the results of query id.
    It understands the optional GET params:

      on_success_url
        If given, it will be displayed when the query is successfully finished.
        Otherwise, it will display the view query results page by default.

      context
        A string of "name:data" that describes the context
        that generated this query result. It may be:
          - "table":"<table_name>"
          - "design":<design_id>

    All other GET params will be passed to on_success_url (if present).
    """
    # Coerce types; manage arguments
    query_history = authorized_get_history(request, id, must_exist=True)
    db = dbms.get(request.user, query_history.get_query_server_config())

    # GET param: context.
    context_param = request.GET.get('context', '')

    # GET param: on_success_url. Default to view_results
    if request.session.get('dl_status', False)==False and download_format in common.DL_FORMATS:
      results_url = urlresolvers.reverse(get_app_name(request) + ':execute_query')
    else:
      results_url = urlresolvers.reverse(get_app_name(request) + ':view_results', kwargs={'id': id, 'first_row': 0})
    if request.GET.get('download', ''):
        results_url += '?download=true'
    on_success_url = request.GET.get('on_success_url')
    if not on_success_url:
        on_success_url = results_url

    # Go to next statement if asked to continue or when a statement with no dataset finished.
    if request.method == 'POST' or (
            not query_history.is_finished() and query_history.is_success() and not query_history.has_results):
        try:
            query_history = db.execute_next_statement(query_history)
        except Exception:
            pass

    # Check query state
    handle, state = _get_query_handle_and_state(query_history)
    query_history.save_state(state)

    if query_history.is_failure():
        # When we fetch, Beeswax server will throw us a BeeswaxException, which has the
        # log we want to display.
        return format_preserving_redirect(request, results_url, request.GET)
    elif query_history.is_finished() or (query_history.is_success() and query_history.has_results):
        if request.session.get('dl_status', False):  # BUG-20020
          on_success_url = urlresolvers.reverse(get_app_name(request) + ':download', kwargs=dict(id=str(id), format=download_format))
        _clean_session(request)
        return format_preserving_redirect(request, on_success_url, request.GET)

    # Still running
    log = db.get_log(handle)

    # Keep waiting
    # - Translate context into something more meaningful (type, data)
    query_context = _parse_query_context(context_param)

    return render('watch_wait.mako', request, {
        'query': query_history,
        'fwd_params': request.GET.urlencode(),
        'log': log,
        'hadoop_jobs': _parse_out_hadoop_jobs(log)[0],
        'query_context': query_context,
        'download_format': download_format, ## ExpV
    })
Beispiel #44
0
  # Go to next statement if asked to continue or when a statement with no dataset finished.
  if request.method == 'POST' or (not query_history.is_finished() and query_history.is_success() and not query_history.has_results):
    try:
      query_history = db.execute_next_statement(query_history)
    except Exception, ex:
      pass

  # Check query state
  handle, state = _get_query_handle_and_state(query_history)
  query_history.save_state(state)

  if query_history.is_failure():
    # When we fetch, Beeswax server will throw us a Exception, which has the
    # log we want to display.
    return format_preserving_redirect(request, results_url, request.GET)
  elif query_history.is_finished() or (query_history.is_success() and query_history.has_results):
    return format_preserving_redirect(request, on_success_url, request.GET)

  # Still running
  log = db.get_log(handle)

  # Keep waiting
  # - Translate context into something more meaningful (type, data)
  query_context = _parse_query_context(context_param)

  return render('watch_wait.mako', request, {
                'query': query_history,
                'fwd_params': request.GET.urlencode(),
                'log': log,
                'hadoop_jobs': _parse_out_hadoop_jobs(log),
Beispiel #45
0
def save_results(request, id):
  """
  Save the results of a query to an HDFS directory
  """
  query_history = authorized_get_history(request, id, must_exist=True)

  server_id, state = _get_query_handle_and_state(query_history)
  query_history.save_state(state)
  error_msg, log = None, None

  if request.method == 'POST':
    # Make sure the result is available.
    # Note that we may still hit errors during the actual save
    if not query_history.is_success():
    #if state != models.QueryHistory.STATE.available:
      if query_history.is_failure():
      #if state in (models.QueryHistory.STATE.failed, models.QueryHistory.STATE.expired):
        msg = _('This query has %(state)s. Results unavailable.') % {'state': state}
      else:
        msg = _('The result of this query is not available yet.')
      raise PopupException(msg)

    db = dbms.get(request.user, query_history.get_query_server())
    form = beeswax.forms.SaveResultsForm(request.POST, db=db)

    # Cancel goes back to results
    if request.POST.get('cancel'):
      return format_preserving_redirect(request, '/beeswax/watch/%s' % (id,))

    if form.is_valid():
      # Do save
      # 1. Get the results metadata
      assert request.POST.get('save')
      try:
        handle, state = _get_query_handle_and_state(query_history)
        result_meta = db.get_results_metadata(handle)
      except QueryNotFoundException, ex:
        LOG.exception(ex)
        raise PopupException(_('Cannot find query.'))
      if result_meta.table_dir:
        result_meta.table_dir = request.fs.urlsplit(result_meta.table_dir)[2]

      # 2. Check for partitioned tables
      if result_meta.table_dir is None:
        raise PopupException(_('Saving results from a partitioned table is not supported. You may copy from the HDFS location manually.'))

      # 3. Actual saving of results
      try:
        if form.cleaned_data['save_target'] == form.SAVE_TYPE_DIR:
          # To dir
          if result_meta.in_tablename:
            raise PopupException(_('Saving results from a table to a directory is not supported. You may copy from the HDFS location manually.'))
          target_dir = form.cleaned_data['target_dir']
          request.fs.rename_star(result_meta.table_dir, target_dir)
          LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, target_dir))
          query_history.save_state(models.QueryHistory.STATE.expired)
          return redirect(urlresolvers.reverse('filebrowser.views.view', kwargs={'path': target_dir}))
        elif form.cleaned_data['save_target'] == form.SAVE_TYPE_TBL:
          # To new table
          try:
            return _save_results_ctas(request, query_history, form.cleaned_data['target_table'], result_meta)
          except BeeswaxException, bex:
            LOG.exception(bex)
            error_msg, log = expand_exception(bex, db)
      except WebHdfsException, ex:
        raise PopupException(_('The table could not be saved.'), detail=ex)
      except IOError, ex:
        LOG.exception(ex)
        error_msg = str(ex)
Beispiel #46
0
  def create_table_as_a_select(self, request, query_history, target_database, target_table, result_meta):
    design = query_history.design.get_design()
    database = design.query['database']

    # Case 1: Hive Server 2 backend or results straight from an existing table
    if result_meta.in_tablename:
      self.use(database)
      query = self._get_and_validate_select_query(design, query_history)
      hql = 'CREATE TABLE `%s`.`%s` AS %s' % (target_database, target_table, query)
      query_history = self.execute_statement(hql)
    else:
      # FYI: this path is dead since moving to HiveServer2
      #
      # Case 2: The results are in some temporary location
      # Beeswax backward compatibility and optimization
      # 1. Create table
      cols = ''
      schema = result_meta.schema
      for i, field in enumerate(schema.fieldSchemas):
        if i != 0:
          cols += ',\n'
        cols += '`%s` %s' % (field.name, field.type)

      # The representation of the delimiter is messy.
      # It came from Java as a string, which might has been converted from an integer.
      # So it could be "1" (^A), or "10" (\n), or "," (a comma literally).
      delim = result_meta.delim
      if not delim.isdigit():
        delim = str(ord(delim))

      hql = '''
            CREATE TABLE `%s` (
            %s
            )
            ROW FORMAT DELIMITED
            FIELDS TERMINATED BY '\%s'
            STORED AS TextFile
            ''' % (target_table, cols, delim.zfill(3))

      query = hql_query(hql)
      self.execute_and_wait(query)

      try:
        # 2. Move the results into the table's storage
        table_obj = self.get_table('default', target_table)
        table_loc = request.fs.urlsplit(table_obj.path_location)[2]
        result_dir = request.fs.urlsplit(result_meta.table_dir)[2]
        request.fs.rename_star(result_dir, table_loc)
        LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc))
        request.info(request, _('Saved query results as new table %(table)s.') % {'table': target_table})
        query_history.save_state(QueryHistory.STATE.expired)
      except Exception as ex:
        query = hql_query('DROP TABLE `%s`' % target_table)
        try:
          self.execute_and_wait(query)
        except Exception as double_trouble:
          LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble))
        raise ex
      url = format_preserving_redirect(request, reverse('metastore:index'))

    return query_history
Beispiel #47
0
  # Go to next statement if asked to continue or when a statement with no dataset finished.
  if request.method == 'POST' or (not query_history.is_finished() and query_history.is_success() and not query_history.has_results):
    try:
      query_history = db.execute_next_statement(query_history)
    except Exception, ex:
      pass

  # Check query state
  handle, state = _get_query_handle_and_state(query_history)
  query_history.save_state(state)

  if query_history.is_failure():
    # When we fetch, Beeswax server will throw us a Exception, which has the
    # log we want to display.
    return format_preserving_redirect(request, results_url, request.GET)
  elif query_history.is_finished() or (query_history.is_success() and query_history.has_results):
    return format_preserving_redirect(request, on_success_url, request.GET)

  # Still running
  log = db.get_log(handle)

  # Keep waiting
  # - Translate context into something more meaningful (type, data)
  query_context = _parse_query_context(context_param)

  return render('watch_wait.mako', request, {
                'query': query_history,
                'fwd_params': request.GET.urlencode(),
                'log': log,
                'hadoop_jobs': _parse_out_hadoop_jobs(log),
Beispiel #48
0
                # 2. Move the results into the table's storage
                table_obj = self.get_table("default", target_table)
                table_loc = request.fs.urlsplit(table_obj.path_location)[2]
                result_dir = request.fs.urlsplit(result_meta.table_dir)[2]
                request.fs.rename_star(result_dir, table_loc)
                LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc))
                request.info(request, _("Saved query results as new table %(table)s.") % {"table": target_table})
                query_history.save_state(QueryHistory.STATE.expired)
            except Exception, ex:
                query = hql_query("DROP TABLE `%s`" % target_table)
                try:
                    self.execute_and_wait(query)
                except Exception, double_trouble:
                    LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble))
                raise ex
            url = format_preserving_redirect(request, reverse("metastore:index"))

        return query_history

    def use(self, database):
        query = hql_query("USE %s" % database)
        return self.client.use(query)

    def get_log(self, query_handle, start_over=True):
        return self.client.get_log(query_handle, start_over)

    def get_state(self, handle):
        return self.client.get_state(handle)

    def get_operation_status(self, handle):
        return self.client.get_operation_status(handle)
Beispiel #49
0
        # 2. Move the results into the table's storage
        table_obj = self.get_table('default', target_table)
        table_loc = request.fs.urlsplit(table_obj.path_location)[2]
        result_dir = request.fs.urlsplit(result_meta.table_dir)[2]
        request.fs.rename_star(result_dir, table_loc)
        LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc))
        request.info(request, _('Saved query results as new table %(table)s.') % {'table': target_table})
        query_history.save_state(QueryHistory.STATE.expired)
      except Exception, ex:
        query = hql_query('DROP TABLE `%s`' % target_table)
        try:
          self.execute_and_wait(query)
        except Exception, double_trouble:
          LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble))
        raise ex
      url = format_preserving_redirect(request, reverse('metastore:index'))

    return query_history


  def use(self, database):
    query = hql_query('USE `%s`' % database)
    return self.client.use(query)


  def get_log(self, query_handle, start_over=True):
    return self.client.get_log(query_handle, start_over)


  def get_state(self, handle):
    return self.client.get_state(handle)
Beispiel #50
0
def watch_query(request, id, download_format=None):
    """
    Wait for the query to finish and (by default) displays the results of query id.
    It understands the optional GET params:

      on_success_url
        If given, it will be displayed when the query is successfully finished.
        Otherwise, it will display the view query results page by default.

      context
        A string of "name:data" that describes the context
        that generated this query result. It may be:
          - "table":"<table_name>"
          - "design":<design_id>

    All other GET params will be passed to on_success_url (if present).
    """
    # Coerce types; manage arguments
    query_history = authorized_get_history(request, id, must_exist=True)
    db = dbms.get(request.user, query_history.get_query_server_config())

    # GET param: context.
    context_param = request.GET.get('context', '')

    # GET param: on_success_url. Default to view_results
    if request.session.get(
            'dl_status',
            False) == False and download_format in common.DL_FORMATS:
        results_url = urlresolvers.reverse(
            get_app_name(request) + ':execute_query')
    else:
        results_url = urlresolvers.reverse(get_app_name(request) +
                                           ':view_results',
                                           kwargs={
                                               'id': id,
                                               'first_row': 0
                                           })
    if request.GET.get('download', ''):
        results_url += '?download=true'
    on_success_url = request.GET.get('on_success_url')
    if not on_success_url:
        on_success_url = results_url

    # Go to next statement if asked to continue or when a statement with no dataset finished.
    if request.method == 'POST' or (not query_history.is_finished()
                                    and query_history.is_success()
                                    and not query_history.has_results):
        try:
            query_history = db.execute_next_statement(query_history)
        except Exception:
            pass

    # Check query state
    handle, state = _get_query_handle_and_state(query_history)
    query_history.save_state(state)

    if query_history.is_failure():
        # When we fetch, Beeswax server will throw us a BeeswaxException, which has the
        # log we want to display.
        return format_preserving_redirect(request, results_url, request.GET)
    elif query_history.is_finished() or (query_history.is_success()
                                         and query_history.has_results):
        if request.session.get('dl_status', False):  # BUG-20020
            on_success_url = urlresolvers.reverse(
                get_app_name(request) + ':download',
                kwargs=dict(id=str(id), format=download_format))
        _clean_session(request)
        return format_preserving_redirect(request, on_success_url, request.GET)

    # Still running
    log = db.get_log(handle)

    # Keep waiting
    # - Translate context into something more meaningful (type, data)
    query_context = _parse_query_context(context_param)

    return render(
        'watch_wait.mako',
        request,
        {
            'query': query_history,
            'fwd_params': request.GET.urlencode(),
            'log': log,
            'hadoop_jobs': _parse_out_hadoop_jobs(log)[0],
            'query_context': query_context,
            'download_format': download_format,  ## ExpV
        })