def view(request, path): """Dispatches viewing of a path to either index() or fileview(), depending on type.""" # default_to_home is set in bootstrap.js if 'default_to_home' in request.GET: home_dir_path = request.user.get_home_directory() if request.fs.isdir(home_dir_path): return format_preserving_redirect(request, reverse(view, kwargs=dict(path=home_dir_path))) # default_to_home is set in bootstrap.js if 'default_to_trash' in request.GET: home_trash = request.fs.join(request.fs.trash_path, 'Current', request.user.get_home_directory()[1:]) if request.fs.isdir(home_trash): return format_preserving_redirect(request, reverse(view, kwargs=dict(path=home_trash))) if request.fs.isdir(request.fs.trash_path): return format_preserving_redirect(request, reverse(view, kwargs=dict(path=request.fs.trash_path))) try: stats = request.fs.stats(path) if stats.isDir: return listdir_paged(request, path) else: return display(request, path) except (IOError, WebHdfsException), e: msg = _("Cannot access: %(path)s.") % {'path': escape(path)} if request.user.is_superuser and not request.user == request.fs.superuser: msg += _(' Note: You are a Hue admin but not a HDFS superuser (which is "%(superuser)s").') % {'superuser': request.fs.superuser} if request.is_ajax(): exception = { 'error': msg } return render_json(exception) else: raise PopupException(msg , detail=e)
def watch_query(request, id): """ Wait for the query to finish and (by default) displays the results of query id. It understands the optional GET params: on_success_url If given, it will be displayed when the query is successfully finished. Otherwise, it will display the view query results page by default. context A string of "name:data" that describes the context that generated this query result. It may be: - "table":"<table_name>" - "design":<design_id> All other GET params will be passed to on_success_url (if present). """ # Coerce types; manage arguments query_history = authorized_get_history(request, id, must_exist=True) # GET param: context. context_param = request.GET.get('context', '') # GET param: on_success_url. Default to view_results results_url = urlresolvers.reverse(view_results, kwargs={'id': id, 'first_row': 0}) on_success_url = request.GET.get('on_success_url') if not on_success_url: on_success_url = results_url # Check query state handle, state = _get_query_handle_and_state(query_history) query_history.save_state(state) # Query finished? # if state == models.QueryHistory.STATE.expired: # raise PopupException(_("The result of this query has expired.")) if query_history.is_success(): return format_preserving_redirect(request, on_success_url, request.GET) elif query_history.is_failure(): # When we fetch, Beeswax server will throw us a BeeswaxException, which has the # log we want to display. return format_preserving_redirect(request, results_url, request.GET) # Still running log = dbms.get(request.user, query_history.get_query_server()).get_log(handle) # Keep waiting # - Translate context into something more meaningful (type, data) query_context = _parse_query_context(context_param) return render('watch_wait.mako', request, { 'query': query_history, 'fwd_params': request.GET.urlencode(), 'log': log, 'hadoop_jobs': _parse_out_hadoop_jobs(log), 'query_context': query_context, })
def generic_op(form_class, request, op, parameter_names, piggyback=None, template="fileop.mako", data_extractor=default_data_extractor, arg_extractor=default_arg_extractor, initial_value_extractor=default_initial_value_extractor, extra_params=None): """ Generic implementation for several operations. @param form_class form to instantiate @param request incoming request, used for parameters @param op callable with the filesystem operation @param parameter_names list of form parameters that are extracted and then passed to op @param piggyback list of form parameters whose file stats to look up after the operation @param data_extractor function that extracts POST data to be used by op @param arg_extractor function that extracts args from a given form or formset @param initial_value_extractor function that extracts the initial values of a form or formset @param extra_params dictionary of extra parameters to send to the template for rendering """ # Use next for non-ajax requests, when available. next = request.GET.get("next", request.POST.get("next", None)) ret = dict({ 'next': next }) if extra_params is not None: ret['extra_params'] = extra_params for p in parameter_names: val = request.REQUEST.get(p) if val: ret[p] = val if request.method == 'POST': form = form_class(**data_extractor(request)) ret['form'] = form if form.is_valid(): args = arg_extractor(request, form, parameter_names) try: op(*args) except (IOError, WebHdfsException), e: msg = _("Cannot perform operation.") if request.user.is_superuser and not request.user == request.fs.superuser: msg += _(' Note: you are a Hue admin but not a HDFS superuser (which is "%(superuser)s").') \ % {'superuser': request.fs.superuser} raise PopupException(msg, detail=e) if next: logging.debug("Next: %s" % next) # Doesn't need to be quoted: quoting is done by HttpResponseRedirect. return format_preserving_redirect(request, next) ret["success"] = True try: if piggyback: piggy_path = form.cleaned_data[piggyback] ret["result"] = _massage_stats(request, request.fs.stats(piggy_path)) except Exception, e: # Hard to report these more naturally here. These happen either # because of a bug in the piggy-back code or because of a # race condition. logger.exception("Exception while processing piggyback data") ret["result_error"] = True ret['user'] = request.user return render(template, request, ret)
def _submit_to_cluster(request, job_design, form): plan = SubmissionPlan() plan.name = job_design.name plan.user = request.user.username plan.groups = request.user.get_groups() plan.steps = form.to_job_submission_steps(plan.name) submission = Submission(owner=request.user, last_seen_state=State.SUBMITTED, name=job_design.name, submission_plan=plan) # Save aggressively in case submit() below triggers an error. submission.save() try: try: submission.submission_handle = get_client().submit(plan) except Exception: submission.last_seen_state=State.ERROR raise finally: submission.save() watch_url = submission.watch_url() return format_preserving_redirect(request, watch_url)
def _submit_to_cluster(request, job_design, form): plan = SubmissionPlan() plan.name = job_design.name plan.user = request.user.username plan.groups = request.user.get_groups() plan.steps = form.to_job_submission_steps(plan.name) submission = Submission(owner=request.user, last_seen_state=State.SUBMITTED, name=job_design.name, submission_plan=plan) # Save aggressively in case submit() below triggers an error. submission.save() try: try: submission.submission_handle = get_client().submit(plan) except Exception: submission.last_seen_state = State.ERROR raise finally: submission.save() watch_url = submission.watch_url() return format_preserving_redirect(request, watch_url)
def setup(request): """Installs jobsub examples.""" if request.method == "GET": return render("confirm.html", request, dict(url=request.path, title="Install job design examples?")) else: jobsub_setup.Command().handle_noargs() return format_preserving_redirect(request, "/jobsub")
def execute_directly(request, query_msg, design=None, tablename=None, on_success_url=None, on_success_params=None, **kwargs): """ execute_directly(request, query_msg, tablename, design) -> HTTP response for execution This method wraps around db_utils.execute_directly() to take care of the HTTP response after the execution. query_msg The thrift Query object. design The design associated with the query. tablename The associated table name for the context. on_success_url Where to go after the query is done. The URL handler may expect an option "context" GET param. (See ``watch_query``.) For advanced usage, on_success_url can be a function, in which case the on complete URL is the return of: on_success_url(history_obj) -> URL string Defaults to the view results page. on_success_params Optional params to pass to the on_success_url (in additional to "context"). Note that this may throw a Beeswax exception. """ if design is not None: authorized_get_design(request, design.id) history_obj = db_utils.execute_directly(request.user, query_msg, design, **kwargs) watch_url = urlresolvers.reverse("hcatalog.views.watch_query", kwargs=dict(id=history_obj.id)) # Prepare the GET params for the watch_url get_dict = QueryDict(None, mutable=True) # (1) context if design: get_dict['context'] = make_query_context("design", design.id) elif tablename: get_dict['context'] = make_query_context("table", tablename) # (2) on_success_url if on_success_url: if callable(on_success_url): on_success_url = on_success_url(history_obj) get_dict['on_success_url'] = on_success_url # (3) misc if on_success_params: get_dict.update(on_success_params) return format_preserving_redirect(request, watch_url, get_dict)
def clone_design(request, design_id): """Clone a design belonging to any user""" design = authorized_get_design(request, design_id) if design is None: LOG.error('Cannot clone non-existent design %s' % (design_id, )) return list_designs(request) copy = design.clone() copy_doc = design.doc.get().copy() copy.name = design.name + ' (copy)' copy.owner = request.user copy.save() copy_doc.owner = copy.owner copy_doc.name = copy.name copy_doc.save() copy.doc.add(copy_doc) messages.info(request, _('Copied design: %(name)s') % {'name': design.name}) return format_preserving_redirect( request, reverse(get_app_name(request) + ':execute_design', kwargs={'design_id': copy.id}))
def generic_op(form_class, request, op, parameter_names, piggyback=None, template="fileop.mako", extra_params=None): """ Generic implementation for several operations. @param form_class form to instantiate @param request incoming request, used for parameters @param op callable with the filesystem operation @param parameter_names list of form parameters that are extracted and then passed to op @param piggyback list of form parameters whose file stats to look up after the operation @param extra_params dictionary of extra parameters to send to the template for rendering """ # Use next for non-ajax requests, when available. next = request.GET.get("next") if next is None: next = request.POST.get("next") ret = dict({'next': next}) if extra_params is not None: ret['extra_params'] = extra_params for p in parameter_names: val = request.REQUEST.get(p) if val: ret[p] = val if request.method == 'POST': form = form_class(request.POST) # TODO(philip): How best to do error handling? fs will throw # an arbitrary-ish exception (typically file not found or maybe permission # denied), and this needs to be coaxed into an HTTP error. ret['form'] = form if form.is_valid(): args = [form.cleaned_data[p] for p in parameter_names] op(*args) if next: logging.debug("Next: %s" % next) # Doesn't need to be quoted: quoting is done by HttpResponseRedirect. return format_preserving_redirect(request, next) ret["success"] = True try: if piggyback: piggy_path = form.cleaned_data[piggyback] ret["result"] = _massage_stats( request, request.fs.stats(piggy_path)) except Exception, e: # Hard to report these more naturally here. These happen either # because of a bug in the piggy-back code or because of a # race condition. logger.exception("Exception while processing piggyback data") ret["result_error"] = True return render_with_toolbars(template, request, ret)
def execute_directly(request, query, query_server=None, design=None, tablename=None, on_success_url=None, on_success_params=None, **kwargs): """ execute_directly(request, query_msg, tablename, design) -> HTTP response for execution This method wraps around dbms.execute_directly() to take care of the HTTP response after the execution. query The HQL model Query object. query_server To which Query Server to submit the query. Dictionary with keys: ['server_name', 'server_host', 'server_port']. design The design associated with the query. tablename The associated table name for the context. on_success_url Where to go after the query is done. The URL handler may expect an option "context" GET param. (See ``watch_query``.) For advanced usage, on_success_url can be a function, in which case the on complete URL is the return of: on_success_url(history_obj) -> URL string Defaults to the view results page. on_success_params Optional params to pass to the on_success_url (in additional to "context"). Note that this may throw a Beeswax exception. """ if design is not None: authorized_get_design(request, design.id) history_obj = dbms.get(request.user, query_server).execute_query(query, design) watch_url = urlresolvers.reverse(watch_query, kwargs={'id': history_obj.id}) # Prepare the GET params for the watch_url get_dict = QueryDict(None, mutable=True) # (1) context if design: get_dict['context'] = make_query_context("design", design.id) elif tablename: get_dict['context'] = make_query_context("table", tablename) # (2) on_success_url if on_success_url: if callable(on_success_url): on_success_url = on_success_url(history_obj) get_dict['on_success_url'] = on_success_url # (3) misc if on_success_params: get_dict.update(on_success_params) return format_preserving_redirect(request, watch_url, get_dict)
def setup(request): """Installs jobsub examples.""" if request.method == "GET": return render( "confirm.html", request, dict(url=request.path, title="Install job design examples?")) else: jobsub_setup.Command().handle_noargs() return format_preserving_redirect(request, "/jobsub")
def execute_directly(request, query, query_server=None, design=None, on_success_url=None, on_success_params=None, **kwargs): """ execute_directly(request, query_msg, tablename, design) -> HTTP response for execution This method wraps around dbms.execute_query() to take care of the HTTP response after the execution. query The HQL model Query object. query_server To which Query Server to submit the query. Dictionary with keys: ['server_name', 'server_host', 'server_port']. design The design associated with the query. on_success_url Where to go after the query is done. The URL handler may expect an option "context" GET param. (See ``watch_query``.) For advanced usage, on_success_url can be a function, in which case the on complete URL is the return of: on_success_url(history_obj) -> URL string Defaults to the view results page. on_success_params Optional params to pass to the on_success_url (in additional to "context"). Note that this may throw a Beeswax exception. """ if design is not None: authorized_get_design(request, design.id) db = dbms.get(request.user, query_server) database = query.query.get('database', 'default') db.use(database) query_history = db.execute_query(query, design) watch_url = reverse(get_app_name(request) + ':watch_query_history', kwargs={'query_history_id': query_history.id}) # Prepare the GET params for the watch_url get_dict = QueryDict(None, mutable=True) # (1) on_success_url if on_success_url: if callable(on_success_url): on_success_url = on_success_url(query_history) get_dict['on_success_url'] = on_success_url # (2) misc if on_success_params: get_dict.update(on_success_params) return format_preserving_redirect(request, watch_url, get_dict)
def generic_op(form_class, request, op, parameter_names, piggyback=None, template="fileop.mako", extra_params=None): """ Generic implementation for several operations. @param form_class form to instantiate @param request incoming request, used for parameters @param op callable with the filesystem operation @param parameter_names list of form parameters that are extracted and then passed to op @param piggyback list of form parameters whose file stats to look up after the operation @param extra_params dictionary of extra parameters to send to the template for rendering """ # Use next for non-ajax requests, when available. next = request.GET.get("next") if next is None: next = request.POST.get("next") ret = dict({ 'next':next }) if extra_params is not None: ret['extra_params'] = extra_params for p in parameter_names: val = request.REQUEST.get(p) if val: ret[p] = val if request.method == 'POST': form = form_class(request.POST) # TODO(philip): How best to do error handling? fs will throw # an arbitrary-ish exception (typically file not found or maybe permission # denied), and this needs to be coaxed into an HTTP error. ret['form'] = form if form.is_valid(): args = [ form.cleaned_data[p] for p in parameter_names ] op(*args) if next: logging.debug("Next: %s" % next) # Doesn't need to be quoted: quoting is done by HttpResponseRedirect. return format_preserving_redirect(request, next) ret["success"] = True try: if piggyback: piggy_path = form.cleaned_data[piggyback] ret["result"] = _massage_stats(request, request.fs.stats(piggy_path)) except Exception, e: # Hard to report these more naturally here. These happen either # because of a bug in the piggy-back code or because of a # race condition. logger.exception("Exception while processing piggyback data") ret["result_error"] = True return render_with_toolbars(template, request, ret)
def view(request, path): """Dispatches viewing of a path to either index() or fileview(), depending on type.""" # default_to_home is set in bootstrap.js if 'default_to_home' in request.GET: home_dir_path = request.user.get_home_directory() if request.fs.isdir(home_dir_path): return format_preserving_redirect(request, reverse(view, kwargs=dict(path=home_dir_path))) # default_to_home is set in bootstrap.js if 'default_to_trash' in request.GET: home_trash = request.fs.join(request.fs.trash_path, 'Current', request.user.get_home_directory()[1:]) if request.fs.isdir(home_trash): return format_preserving_redirect(request, reverse(view, kwargs=dict(path=home_trash))) if request.fs.isdir(request.fs.trash_path): return format_preserving_redirect(request, reverse(view, kwargs=dict(path=request.fs.trash_path))) try: decoded_path = urllib.unquote(path) if path != decoded_path: path = decoded_path stats = request.fs.stats(path) if stats.isDir: return listdir_paged(request, path) else: return display(request, path) except (IOError, WebHdfsException), e: msg = _("Cannot access: %(path)s. ") % {'path': escape(path)} if "Connection refused" in e.message: msg += _(" The HDFS REST service is not available. ") if request.user.is_superuser and not _is_hdfs_superuser(request): msg += _(' Note: you are a Hue admin but not a HDFS superuser, "%(superuser)s" or part of HDFS supergroup, "%(supergroup)s".') \ % {'superuser': request.fs.superuser, 'supergroup': request.fs.supergroup} if request.is_ajax(): exception = { 'error': msg } return JsonResponse(exception) else: raise PopupException(msg , detail=e)
def clone_design(request, design_id): """Clone a design belonging to any user""" design = authorized_get_design(request, design_id) if design is None: LOG.error('Cannot clone non-existent design %s' % (design_id,)) return list_designs(request) copy = design.clone(request.user) messages.info(request, _('Copied design: %(name)s') % {'name': design.name}) return format_preserving_redirect(request, reverse(get_app_name(request) + ':execute_design', kwargs={'design_id': copy.id}))
def execute_directly(request, query_msg, design=None, tablename=None, on_success_url=None, on_success_params=None, **kwargs): """ execute_directly(request, query_msg, tablename, design) -> HTTP response for execution This method wraps around db_utils.execute_directly() to take care of the HTTP response after the execution. query_msg The thrift Query object. design The design associated with the query. tablename The associated table name for the context. on_success_url Where to go after the query is done. The URL handler may expect an option "context" GET param. (See ``watch_query``.) For advanced usage, on_success_url can be a function, in which case the on complete URL is the return of: on_success_url(history_obj) -> URL string Defaults to the view results page. on_success_params Optional params to pass to the on_success_url (in additional to "context"). Note that this may throw a Beeswax exception. """ history_obj = db_utils.execute_directly(request.user, query_msg, design, **kwargs) watch_url = urlresolvers.reverse("beeswax.views.watch_query", kwargs=dict(id=history_obj.id)) # Prepare the GET params for the watch_url get_dict = QueryDict(None, mutable=True) # (1) context if design: get_dict['context'] = make_query_context("design", design.id) elif tablename: get_dict['context'] = make_query_context("table", tablename) # (2) on_success_url if on_success_url: if callable(on_success_url): on_success_url = on_success_url(history_obj) get_dict['on_success_url'] = on_success_url # (3) misc if on_success_params: get_dict.update(on_success_params) return format_preserving_redirect(request, watch_url, get_dict)
def view(request, path): """Dispatches viewing of a path to either index() or fileview(), depending on type.""" # default_to_home is set in bootstrap.js home_dir_path = request.user.get_home_directory() if request.GET.get('default_to_home') and request.fs.isdir(home_dir_path): return format_preserving_redirect(request, urlresolvers.reverse(view, kwargs=dict(path=home_dir_path))) if request.fs.isdir(path): return listdir(request, path) elif request.fs.isfile(path): return display(request, path) else: raise Http404("File not found: %s" % escape(path))
def view(request, path): """Dispatches viewing of a path to either index() or fileview(), depending on type.""" # default_to_home is set in bootstrap.js if 'default_to_home' in request.GET: home_dir_path = request.user.get_home_directory() if request.fs.isdir(home_dir_path): return format_preserving_redirect(request, reverse(view, kwargs=dict(path=home_dir_path))) # default_to_home is set in bootstrap.js if 'default_to_trash' in request.GET: home_trash = request.fs.join(request.fs.trash_path, 'Current', request.user.get_home_directory()[1:]) if request.fs.isdir(home_trash): return format_preserving_redirect(request, reverse(view, kwargs=dict(path=home_trash))) if request.fs.isdir(request.fs.trash_path): return format_preserving_redirect(request, reverse(view, kwargs=dict(path=request.fs.trash_path))) try: stats = request.fs.stats(path) if stats.isDir: return listdir_paged(request, path) else: return display(request, path) except (IOError, WebHdfsException), e: msg = _("Cannot access: %(path)s. ") % {'path': escape(path)} if "Connection refused" in e.message: msg += _(" The HDFS REST service is not available. ") if request.user.is_superuser and not _is_hdfs_superuser(request): msg += _(' Note: you are a Hue admin but not a HDFS superuser, "%(superuser)s" or part of HDFS supergroup, "%(supergroup)s".') \ % {'superuser': request.fs.superuser, 'supergroup': request.fs.supergroup} if request.is_ajax(): exception = { 'error': msg } return JsonResponse(exception) else: raise PopupException(msg , detail=e)
def clone_design(request, design_id): """Clone a design belonging to any user""" try: design = models.SavedQuery.get(design_id) except models.SavedQuery.DoesNotExist: LOG.error('Cannot clone non-existent design %s' % (design_id,)) return list_designs(request) copy = design.clone() copy.name = design.name + ' (copy)' copy.owner = request.user copy.save() request.flash.put('Copied design: %s' % (design.name,)) return format_preserving_redirect( request, urlresolvers.reverse(execute_query, kwargs={'design_id': copy.id}))
def clone_design(request, design_id): """Clone a design belonging to any user""" design = authorized_get_design(request, design_id) if design is None: LOG.error('Cannot clone non-existent design %s' % (design_id,)) return list_designs(request) copy = design.clone() copy.name = design.name + ' (copy)' copy.owner = request.user copy.save() messages.info(request, _('Copied design: %(name)s') % {'name': design.name}) return format_preserving_redirect( request, urlresolvers.reverse(execute_query, kwargs={'design_id': copy.id}))
def view(request, path): """Dispatches viewing of a path to either index() or fileview(), depending on type.""" # default_to_home is set in bootstrap.js home_dir_path = request.user.get_home_directory() if request.GET.get('default_to_home') and request.fs.isdir(home_dir_path): return format_preserving_redirect( request, urlresolvers.reverse(view, kwargs=dict(path=home_dir_path))) if request.fs.isdir(path): return listdir(request, path) elif request.fs.isfile(path): return display(request, path) else: raise Http404("File not found: %s" % escape(path))
def clone_design(request, design_id): """Clone a design belonging to any user""" design = authorized_get_design(request, design_id) if design is None: LOG.error("Cannot clone non-existent design %s" % (design_id,)) return list_designs(request) copy = design.clone() copy.name = design.name + " (copy)" copy.owner = request.user copy.save() messages.info(request, _("Copied design: %(name)s") % {"name": design.name}) return format_preserving_redirect( request, reverse(get_app_name(request) + ":execute_query", kwargs={"design_id": copy.id}) )
def save_results(request, id): """ Save the results of a query to an HDFS directory or Hive table. """ query_history = authorized_get_history(request, id, must_exist=True) app_name = get_app_name(request) server_id, state = _get_query_handle_and_state(query_history) query_history.save_state(state) error_msg, log = None, None if request.method == "POST": if not query_history.is_success(): msg = _("This query is %(state)s. Results unavailable.") % {"state": state} raise PopupException(msg) db = dbms.get(request.user, query_history.get_query_server_config()) form = beeswax.forms.SaveResultsForm(request.POST, db=db, fs=request.fs) if request.POST.get("cancel"): return format_preserving_redirect(request, "/%s/watch/%s" % (app_name, id)) if form.is_valid(): try: handle, state = _get_query_handle_and_state(query_history) result_meta = db.get_results_metadata(handle) except Exception, ex: raise PopupException(_("Cannot find query: %s") % ex) try: if form.cleaned_data["save_target"] == form.SAVE_TYPE_DIR: target_dir = form.cleaned_data["target_dir"] query_history = db.insert_query_into_directory(query_history, target_dir) redirected = redirect( reverse("beeswax:watch_query", args=[query_history.id]) + "?on_success_url=" + reverse("filebrowser.views.view", kwargs={"path": target_dir}) ) elif form.cleaned_data["save_target"] == form.SAVE_TYPE_TBL: redirected = db.create_table_as_a_select( request, query_history, form.cleaned_data["target_table"], result_meta ) except Exception, ex: error_msg, log = expand_exception(ex, db) raise PopupException(_("The result could not be saved: %s.") % log, detail=ex) return redirected
def install_examples(request): """ Handle installing sample data and example queries. """ if request.method == 'GET': return render('confirm.html', request, dict(url=request.path, title='Install sample tables and Beeswax examples?')) elif request.method == 'POST': try: beeswax.management.commands.beeswax_install_examples.Command().handle_noargs() if models.MetaInstall.get().installed_example: request.flash.put('Installed Beeswax examples.') except Exception, err: LOG.exception(err) raise PopupException(err) return format_preserving_redirect(request, '/beeswax/tables')
def clone_design(request, design_id): """Clone a design belonging to any user""" design = authorized_get_design(request, design_id) if design is None: LOG.error("Cannot clone non-existent design %s" % (design_id,)) return list_designs(request) copy = design.clone(request.user) copy.save() copy_doc = Document.objects.link(copy, owner=copy.owner, name=copy.name, description=copy.desc, extra=copy.type) messages.info(request, _("Copied design: %(name)s") % {"name": design.name}) return format_preserving_redirect( request, reverse(get_app_name(request) + ":execute_design", kwargs={"design_id": copy.id}) )
def save_results(request, id): """ DEPRECATED. Need to get rid of watch_wait dependency first. Save the results of a query to an HDFS directory or Hive table. """ query_history = authorized_get_history(request, id, must_exist=True) app_name = get_app_name(request) server_id, state = _get_query_handle_and_state(query_history) query_history.save_state(state) error_msg, log = None, None if request.method == 'POST': if not query_history.is_success(): msg = _('This query is %(state)s. Results unavailable.') % {'state': state} raise PopupException(msg) db = dbms.get(request.user, query_history.get_query_server_config()) form = beeswax.forms.SaveResultsForm(request.POST, db=db, fs=request.fs) if request.POST.get('cancel'): return format_preserving_redirect(request, '/%s/watch/%s' % (app_name, id)) if form.is_valid(): try: handle, state = _get_query_handle_and_state(query_history) result_meta = db.get_results_metadata(handle) except Exception, ex: raise PopupException(_('Cannot find query: %s') % ex) try: if form.cleaned_data['save_target'] == form.SAVE_TYPE_DIR: target_dir = form.cleaned_data['target_dir'] query_history = db.insert_query_into_directory(query_history, target_dir) redirected = redirect(reverse('beeswax:watch_query', args=[query_history.id]) \ + '?on_success_url=' + reverse('filebrowser.views.view', kwargs={'path': target_dir})) elif form.cleaned_data['save_target'] == form.SAVE_TYPE_TBL: redirected = db.create_table_as_a_select(request, query_history, form.cleaned_data['target_table'], result_meta) except Exception, ex: error_msg, log = expand_exception(ex, db) raise PopupException(_('The result could not be saved: %s.') % log, detail=ex) return redirected
def save_results(request, id): """ Save the results of a query to an HDFS directory or Hive table. """ query_history = authorized_get_history(request, id, must_exist=True) app_name = get_app_name(request) server_id, state = _get_query_handle_and_state(query_history) query_history.save_state(state) error_msg, log = None, None if request.method == 'POST': if not query_history.is_success(): msg = _('This query is %(state)s. Results unavailable.') % {'state': state} raise PopupException(msg) db = dbms.get(request.user, query_history.get_query_server_config()) form = beeswax.forms.SaveResultsForm(request.POST, db=db, fs=request.fs) if request.POST.get('cancel'): return format_preserving_redirect(request, '/%s/watch/%s' % (app_name, id)) if form.is_valid(): try: handle, state = _get_query_handle_and_state(query_history) result_meta = db.get_results_metadata(handle) except Exception, ex: raise PopupException(_('Cannot find query: %s') % ex) try: if form.cleaned_data['save_target'] == form.SAVE_TYPE_DIR: target_dir = form.cleaned_data['target_dir'] query_history = db.insert_query_into_directory(query_history, target_dir) redirected = redirect(reverse('beeswax:watch_query', args=[query_history.id]) \ + '?on_success_url=' + reverse('filebrowser.views.view', kwargs={'path': target_dir})) elif form.cleaned_data['save_target'] == form.SAVE_TYPE_TBL: redirected = db.create_table_as_a_select(request, query_history, form.cleaned_data['target_table'], result_meta) except Exception, ex: error_msg, log = expand_exception(ex, db) raise PopupException(_('The result could not be saved: %s.') % log, detail=ex) return redirected
def clone_design(request, design_id): """Clone a design belonging to any user""" design = authorized_get_design(request, design_id) if design is None: LOG.error('Cannot clone non-existent design %s' % (design_id,)) return list_designs(request) copy = design.clone(request.user) copy.save() copy_doc = Document.objects.link(copy, owner=copy.owner, name=copy.name, description=copy.desc, extra=copy.type) messages.info(request, _('Copied design: %(name)s') % {'name': design.name}) return format_preserving_redirect(request, reverse(get_app_name(request) + ':execute_design', kwargs={'design_id': copy.id}))
def view(request, path): """Dispatches viewing of a path to either index() or fileview(), depending on type.""" # default_to_home is set in bootstrap.js if "default_to_home" in request.GET: home_dir_path = request.user.get_home_directory() if request.fs.isdir(home_dir_path): return format_preserving_redirect(request, urlresolvers.reverse(view, kwargs=dict(path=home_dir_path))) try: stats = request.fs.stats(path) if stats.isDir: return listdir(request, path, False) else: return display(request, path) except (IOError, WebHdfsException), e: msg = _("Cannot access: %(path)s.") % {"path": escape(path)} if request.user.is_superuser and not request.user == request.fs.superuser: msg += _(' Note: you are a Hue admin but not a HDFS superuser (which is "%(superuser)s").') % { "superuser": request.fs.superuser } raise PopupException(msg, detail=e)
def save_results(request, id): """ Save the results of a query to an HDFS directory """ query_history = authorized_get_history(request, id, must_exist=True) server_id, state = _get_query_handle_and_state(query_history) query_history.save_state(state) error_msg, log = None, None if request.method == "POST": # Make sure the result is available. # Note that we may still hit errors during the actual save if not query_history.is_success(): if query_history.is_failure(): msg = _("This query has %(state)s. Results unavailable.") % {"state": state} else: msg = _("The result of this query is not available yet.") raise PopupException(msg) db = dbms.get(request.user, query_history.get_query_server_config()) form = beeswax.forms.SaveResultsForm(request.POST, db=db) # Cancel goes back to results if request.POST.get("cancel"): return format_preserving_redirect(request, "/beeswax/watch/%s" % (id,)) if form.is_valid(): # Do save # 1. Get the results metadata assert request.POST.get("save") try: handle, state = _get_query_handle_and_state(query_history) result_meta = db.get_results_metadata(handle) except Exception, ex: LOG.exception(ex) raise PopupException(_("Cannot find query.")) if result_meta.table_dir: result_meta.table_dir = request.fs.urlsplit(result_meta.table_dir)[2] # 2. Check for partitioned tables if result_meta.table_dir is None: raise PopupException( _( "Saving results from a partitioned table is not supported. You may copy from the HDFS location manually." ) ) # 3. Actual saving of results try: if form.cleaned_data["save_target"] == form.SAVE_TYPE_DIR: # To dir if result_meta.in_tablename: raise PopupException( _( "Saving results from a query with no MapReduce jobs is not supported. " "You may copy manually from the HDFS location %(path)s." ) % {"path": result_meta.table_dir} ) target_dir = form.cleaned_data["target_dir"] request.fs.rename_star(result_meta.table_dir, target_dir) LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, target_dir)) query_history.save_state(models.QueryHistory.STATE.expired) return redirect(reverse("filebrowser.views.view", kwargs={"path": target_dir})) elif form.cleaned_data["save_target"] == form.SAVE_TYPE_TBL: # To new table try: return _save_results_ctas( request, query_history, form.cleaned_data["target_table"], result_meta ) except Exception, bex: LOG.exception(bex) error_msg, log = expand_exception(bex, db) except WebHdfsException, ex: raise PopupException(_("The table could not be saved."), detail=ex) except IOError, ex: LOG.exception(ex) error_msg = str(ex)
if request.method == "POST" or ( not query_history.is_finished() and query_history.is_success() and not query_history.has_results ): try: query_history = db.execute_next_statement(query_history) except Exception, ex: pass # Check query state handle, state = _get_query_handle_and_state(query_history) query_history.save_state(state) if query_history.is_failure(): # When we fetch, Beeswax server will throw us a Exception, which has the # log we want to display. return format_preserving_redirect(request, results_url, request.GET) elif query_history.is_finished() or (query_history.is_success() and query_history.has_results): return format_preserving_redirect(request, on_success_url, request.GET) # Still running log = db.get_log(handle) # Keep waiting # - Translate context into something more meaningful (type, data) query_context = _parse_query_context(context_param) return render( "watch_wait.mako", request, { "query": query_history,
class HiveServer2Dbms(object): def __init__(self, client, server_type): self.client = client self.server_type = server_type self.server_name = self.client.query_server['server_name'] @classmethod def to_matching_wildcard(cls, identifier=None): cleaned = "*" if identifier and identifier.strip() != "*": cleaned = "*%s*" % identifier.strip().strip("*") return cleaned def get_databases(self, database_names='*'): if database_names != '*': database_names = self.to_matching_wildcard(database_names) databases = self.client.get_databases(schemaName=database_names) if len(databases) <= APPLY_NATURAL_SORT_MAX.get(): databases = apply_natural_sort(databases) return databases def get_database(self, database): return self.client.get_database(database) def get_tables_meta(self, database='default', table_names='*', table_types=None): if self.server_name == 'beeswax': identifier = self.to_matching_wildcard(table_names) else: identifier = None tables = self.client.get_tables_meta(database, identifier, table_types) if len(tables) <= APPLY_NATURAL_SORT_MAX.get(): tables = apply_natural_sort(tables, key='name') return tables def get_tables(self, database='default', table_names='*', table_types=None): if self.server_name == 'beeswax': identifier = self.to_matching_wildcard(table_names) else: identifier = None tables = self.client.get_tables(database, identifier, table_types) if len(tables) <= APPLY_NATURAL_SORT_MAX.get(): tables = apply_natural_sort(tables) return tables def get_table(self, database, table_name): return self.client.get_table(database, table_name) def alter_table(self, database, table_name, new_table_name=None, comment=None, tblproperties=None): hql = 'ALTER TABLE `%s`.`%s`' % (database, table_name) if new_table_name: table_name = new_table_name hql += ' RENAME TO `%s`' % table_name elif comment: hql += " SET TBLPROPERTIES ('comment' = '%s')" % comment elif tblproperties: hql += " SET TBLPROPERTIES (%s)" % ' ,'.join( "'%s' = '%s'" % (k, v) for k, v in tblproperties.items()) timeout = SERVER_CONN_TIMEOUT.get() query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=timeout) if handle: self.close(handle) else: msg = _("Failed to execute alter table statement: %s") % hql raise QueryServerException(msg) return self.client.get_table(database, table_name) def get_column(self, database, table_name, column_name): table = self.client.get_table(database, table_name) for col in table.cols: if col.name == column_name: return col return None def alter_column(self, database, table_name, column_name, new_column_name, column_type, comment=None, partition_spec=None, cascade=False): hql = 'ALTER TABLE `%s`.`%s`' % (database, table_name) if partition_spec: hql += ' PARTITION (%s)' % partition_spec hql += ' CHANGE COLUMN `%s` `%s` %s' % (column_name, new_column_name, column_type.upper()) if comment: hql += " COMMENT '%s'" % comment if cascade: hql += ' CASCADE' timeout = SERVER_CONN_TIMEOUT.get() query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=timeout) if handle: self.close(handle) else: msg = _("Failed to execute alter column statement: %s") % hql raise QueryServerException(msg) return self.get_column(database, table_name, new_column_name) def execute_query(self, query, design): return self.execute_and_watch(query, design=design) def select_star_from(self, database, table, limit=10000): if table.partition_keys: # Filter on max number of partitions for partitioned tables hql = self._get_sample_partition_query( database, table, limit=limit) # Currently need a limit else: hql = "SELECT * FROM `%s`.`%s` LIMIT %d;" % (database, table.name, limit) return self.execute_statement(hql) def get_select_star_query(self, database, table, limit=10000): if table.partition_keys: # Filter on max number of partitions for partitioned tables hql = self._get_sample_partition_query( database, table, limit=limit) # Currently need a limit else: hql = "SELECT * FROM `%s`.`%s` LIMIT %d;" % (database, table.name, limit) return hql def execute_statement(self, hql): if self.server_name == 'impala': query = hql_query(hql, QUERY_TYPES[1]) else: query = hql_query(hql, QUERY_TYPES[0]) return self.execute_and_watch(query) def fetch(self, query_handle, start_over=False, rows=None): no_start_over_support = [ config_variable for config_variable in self.get_default_configuration(False) if config_variable.key == 'support_start_over' and config_variable.value == 'false' ] if no_start_over_support: start_over = False return self.client.fetch(query_handle, start_over, rows) def close_operation(self, query_handle): return self.client.close_operation(query_handle) def open_session(self, user): return self.client.open_session(user) def close_session(self, session): resp = self.client.close_session(session) if resp.status.statusCode != 0: session.status_code = resp.status.statusCode session.save() raise QueryServerException( _('Failed to close session, session handle may already be closed or timed out.' )) else: session.status_code = 4 # Set to ttypes.TStatusCode.INVALID_HANDLE_STATUS session.save() return session def cancel_operation(self, query_handle): resp = self.client.cancel_operation(query_handle) if self.client.query_server['server_name'] == 'impala': resp = self.client.close_operation(query_handle) return resp def get_sample(self, database, table, column=None, nested=None, limit=100): result = None hql = None # Filter on max # of partitions for partitioned tables column = '`%s`' % column if column else '*' if table.partition_keys: hql = self._get_sample_partition_query(database, table, column, limit) elif self.server_name == 'impala': if column or nested: from impala.dbms import ImpalaDbms select_clause, from_clause = ImpalaDbms.get_nested_select( database, table.name, column, nested) hql = 'SELECT %s FROM %s LIMIT %s;' % (select_clause, from_clause, limit) else: hql = "SELECT * FROM `%s`.`%s` LIMIT %s;" % (database, table.name, limit) else: hql = "SELECT %s FROM `%s`.`%s` LIMIT %s;" % (column, database, table.name, limit) # TODO: Add nested select support for HS2 if hql: query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result def _get_sample_partition_query(self, database, table, column='*', limit=100): max_parts = QUERY_PARTITIONS_LIMIT.get() partitions = self.get_partitions(database, table, partition_spec=None, max_parts=max_parts) if partitions and max_parts: # Need to reformat partition specs for where clause syntax partition_specs = [ part.partition_spec.replace(',', ' AND ') for part in partitions ] partition_filters = ' OR '.join([ '(%s)' % partition_spec for partition_spec in partition_specs ]) partition_clause = 'WHERE %s' % partition_filters else: partition_clause = '' return "SELECT %(column)s FROM `%(database)s`.`%(table)s` %(partition_clause)s LIMIT %(limit)s" % \ {'column': column, 'database': database, 'table': table.name, 'partition_clause': partition_clause, 'limit': limit} def analyze_table(self, database, table): if self.server_name == 'impala': hql = 'COMPUTE STATS `%(database)s`.`%(table)s`' % { 'database': database, 'table': table } else: table_obj = self.get_table(database, table) partition_spec = '' if table_obj.partition_keys: partition_keys = ','.join( [part.name for part in table_obj.partition_keys]) partition_spec = 'PARTITION(%(partition_keys)s)' % { 'partition_keys': partition_keys } hql = 'ANALYZE TABLE `%(database)s`.`%(table)s` %(partition_spec)s COMPUTE STATISTICS' % \ {'database': database, 'table': table, 'partition_spec': partition_spec} return self.execute_statement(hql) def analyze_table_columns(self, database, table): if self.server_name == 'impala': hql = 'COMPUTE STATS `%(database)s`.`%(table)s`' % { 'database': database, 'table': table } else: table_obj = self.get_table(database, table) if table_obj.partition_keys: raise NotImplementedError( 'HIVE-4861: COMPUTE STATISTICS FOR COLUMNS not supported for partitioned-tables.' ) else: hql = 'ANALYZE TABLE `%(database)s`.`%(table)s` COMPUTE STATISTICS FOR COLUMNS' % { 'database': database, 'table': table } return self.execute_statement(hql) def get_table_stats(self, database, table): stats = [] if self.server_name == 'impala': hql = 'SHOW TABLE STATS `%(database)s`.`%(table)s`' % { 'database': database, 'table': table } query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) stats = list(result.rows()) else: table = self.get_table(database, table) stats = table.stats return stats def get_table_columns_stats(self, database, table, column): if self.server_name == 'impala': hql = 'SHOW COLUMN STATS `%(database)s`.`%(table)s`' % { 'database': database, 'table': table } else: hql = 'DESCRIBE FORMATTED `%(database)s`.`%(table)s` `%(column)s`' % { 'database': database, 'table': table, 'column': column } query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) data = list(result.rows()) if self.server_name == 'impala': data = [col for col in data if col[0] == column][0] return [ { 'col_name': data[0] }, { 'data_type': data[1] }, { 'distinct_count': data[2] }, { 'num_nulls': data[3] }, { 'max_col_len': data[4] }, { 'avg_col_len': data[5] }, ] else: return [{ 'col_name': data[2][0] }, { 'data_type': data[2][1] }, { 'min': data[2][2] }, { 'max': data[2][3] }, { 'num_nulls': data[2][4] }, { 'distinct_count': data[2][5] }, { 'avg_col_len': data[2][6] }, { 'max_col_len': data[2][7] }, { 'num_trues': data[2][8] }, { 'num_falses': data[2][9] }] else: return [] def get_top_terms(self, database, table, column, limit=30, prefix=None): limit = min(limit, 100) prefix_match = '' if prefix: prefix_match = "WHERE CAST(%(column)s AS STRING) LIKE '%(prefix)s%%'" % { 'column': column, 'prefix': prefix } hql = 'SELECT %(column)s, COUNT(*) AS ct FROM `%(database)s`.`%(table)s` %(prefix_match)s GROUP BY %(column)s ORDER BY ct DESC LIMIT %(limit)s' % { 'database': database, 'table': table, 'column': column, 'prefix_match': prefix_match, 'limit': limit, } query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=60.0) # Hive is very slow if handle: result = self.fetch(handle, rows=limit) self.close(handle) return list(result.rows()) else: return [] def drop_table(self, database, table): if table.is_view: hql = "DROP VIEW `%s`.`%s`" % ( database, table.name, ) else: hql = "DROP TABLE `%s`.`%s`" % ( database, table.name, ) return self.execute_statement(hql) def load_data(self, database, table, form, design): hql = "LOAD DATA INPATH" hql += " '%s'" % form.cleaned_data['path'] if form.cleaned_data['overwrite']: hql += " OVERWRITE" hql += " INTO TABLE " hql += "`%s`.`%s`" % ( database, table.name, ) if form.partition_columns: hql += " PARTITION (" vals = [] for key, column_name in form.partition_columns.iteritems(): vals.append("%s='%s'" % (column_name, form.cleaned_data[key])) hql += ", ".join(vals) hql += ")" query = hql_query(hql, database) design.data = query.dumps() design.save() return self.execute_query(query, design) def drop_tables(self, database, tables, design): hql = [] for table in tables: if table.is_view: hql.append("DROP VIEW `%s`.`%s`" % ( database, table.name, )) else: hql.append("DROP TABLE `%s`.`%s`" % ( database, table.name, )) query = hql_query(';'.join(hql), database) design.data = query.dumps() design.save() return self.execute_query(query, design) def drop_database(self, database): return self.execute_statement("DROP DATABASE `%s`" % database) def drop_databases(self, databases, design): hql = [] for database in databases: hql.append("DROP DATABASE `%s`" % database) query = hql_query(';'.join(hql), database) design.data = query.dumps() design.save() return self.execute_query(query, design) def _get_and_validate_select_query(self, design, query_history): query = design.get_query_statement(query_history.statement_number) if not query.strip().lower().startswith('select'): raise Exception( _('Only SELECT statements can be saved. Provided query: %(query)s' ) % {'query': query}) return query def insert_query_into_directory(self, query_history, target_dir): design = query_history.design.get_design() database = design.query['database'] self.use(database) query = self._get_and_validate_select_query(design, query_history) hql = "INSERT OVERWRITE DIRECTORY '%s' %s" % (target_dir, query) return self.execute_statement(hql) def create_table_as_a_select(self, request, query_history, target_database, target_table, result_meta): design = query_history.design.get_design() database = design.query['database'] # Case 1: Hive Server 2 backend or results straight from an existing table if result_meta.in_tablename: self.use(database) query = self._get_and_validate_select_query(design, query_history) hql = 'CREATE TABLE `%s`.`%s` AS %s' % (target_database, target_table, query) query_history = self.execute_statement(hql) else: # FYI: this path is dead since moving to HiveServer2 # # Case 2: The results are in some temporary location # Beeswax backward compatibility and optimization # 1. Create table cols = '' schema = result_meta.schema for i, field in enumerate(schema.fieldSchemas): if i != 0: cols += ',\n' cols += '`%s` %s' % (field.name, field.type) # The representation of the delimiter is messy. # It came from Java as a string, which might has been converted from an integer. # So it could be "1" (^A), or "10" (\n), or "," (a comma literally). delim = result_meta.delim if not delim.isdigit(): delim = str(ord(delim)) hql = ''' CREATE TABLE `%s` ( %s ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\%s' STORED AS TextFile ''' % (target_table, cols, delim.zfill(3)) query = hql_query(hql) self.execute_and_wait(query) try: # 2. Move the results into the table's storage table_obj = self.get_table('default', target_table) table_loc = request.fs.urlsplit(table_obj.path_location)[2] result_dir = request.fs.urlsplit(result_meta.table_dir)[2] request.fs.rename_star(result_dir, table_loc) LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc)) request.info( request, _('Saved query results as new table %(table)s.') % {'table': target_table}) query_history.save_state(QueryHistory.STATE.expired) except Exception, ex: query = hql_query('DROP TABLE `%s`' % target_table) try: self.execute_and_wait(query) except Exception, double_trouble: LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble)) raise ex url = format_preserving_redirect(request, reverse('metastore:index'))
table_loc = request.fs.urlsplit(table_obj.path_location)[2] request.fs.rename_star(result_meta.table_dir, table_loc) LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc)) messages.info(request, _('Saved query results as new table %(table)s') % {'table': target_table}) query_history.save_state(models.QueryHistory.STATE.expired) except Exception, ex: LOG.error('Error moving data into storage of table %s. Will drop table.' % (target_table,)) query = hql_query('DROP TABLE `%s`' % (target_table,)) try: db.execute_directly(query) # Don't wait for results except Exception, double_trouble: LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble)) raise ex # Show tables upon success return format_preserving_redirect(request, urlresolvers.reverse(show_tables)) def confirm_query(request, query, on_success_url=None): """ Used by other forms to confirm a query before it's executed. The form is the same as execute_query below. query - The HQL about to be executed on_success_url - The page to go to upon successful execution """ mform = QueryForm() mform.bind() mform.query.initial = dict(query=query) return render('execute.mako', request, { 'form': mform,
table_loc = request.fs.urlsplit(table_obj.path_location)[2] request.fs.rename_star(result_meta.table_dir, table_loc) LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc)) messages.info(request, _('Saved query results as new table %(table)s') % {'table': target_table}) query_history.save_state(models.QueryHistory.STATE.expired) except Exception, ex: LOG.error('Error moving data into storage of table %s. Will drop table.' % (target_table,)) query = hql_query('DROP TABLE `%s`' % (target_table,)) try: db.execute_directly(query) # Don't wait for results except Exception, double_trouble: LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble)) raise ex # Show tables upon success return format_preserving_redirect(request, reverse(get_app_name(request) + ':show_tables')) def confirm_query(request, query, on_success_url=None): """ Used by other forms to confirm a query before it's executed. The form is the same as execute_query below. query - The HQL about to be executed on_success_url - The page to go to upon successful execution """ mform = QueryForm() mform.bind() mform.query.initial = dict(query=query) return render('execute.mako', request, {
def watch_query(request, id): """ Wait for the query to finish and (by default) displays the results of query id. It understands the optional GET params: on_success_url If given, it will be displayed when the query is successfully finished. Otherwise, it will display the view query results page by default. context A string of "name:data" that describes the context that generated this query result. It may be: - "table":"<table_name>" - "design":<design_id> All other GET params will be passed to on_success_url (if present). """ # Coerce types; manage arguments id = int(id) query_history = authorized_get_history(request, id, must_exist=True) # GET param: context. context_param = request.GET.get('context', '') # GET param: on_success_url. Default to view_results results_url = urlresolvers.reverse(view_results, kwargs=dict(id=str(id), first_row=0, last_result_len=0)) on_success_url = request.GET.get('on_success_url') if not on_success_url: on_success_url = results_url # Get the server_id server_id, state = _get_server_id_and_state(query_history) query_history.save_state(state) # Query finished? if state == QueryHistory.STATE.expired: raise PopupException(_("The result of this query has expired.")) elif state == QueryHistory.STATE.available: return format_preserving_redirect(request, on_success_url, request.GET) elif state == QueryHistory.STATE.failed: # When we fetch, Beeswax server will throw us a BeeswaxException, which has the # log we want to display. return format_preserving_redirect(request, results_url, request.GET) # Still running log = db_utils.db_client( query_history.get_query_server()).get_log(server_id) # Keep waiting # - Translate context into something more meaningful (type, data) context = _parse_query_context(context_param) return render( 'watch_wait.mako', request, { 'query': query_history, 'fwd_params': request.GET.urlencode(), 'log': log, 'hadoop_jobs': _parse_out_hadoop_jobs(log), 'query_context': context, })
def flash_redirect(request): sleeper(request) request.flash.put('redirect test') return format_preserving_redirect(request, '/jframegallery/')
def save_results(request, id): """ Save the results of a query to an HDFS directory """ query_history = authorized_get_history(request, id, must_exist=True) server_id, state = _get_query_handle_and_state(query_history) query_history.save_state(state) error_msg, log = None, None if request.method == 'POST': # Make sure the result is available. # Note that we may still hit errors during the actual save if not query_history.is_success(): if query_history.is_failure(): msg = _('This query has %(state)s. Results unavailable.') % {'state': state} else: msg = _('The result of this query is not available yet.') raise PopupException(msg) db = dbms.get(request.user, query_history.get_query_server_config()) form = beeswax.forms.SaveResultsForm(request.POST, db=db) # Cancel goes back to results if request.POST.get('cancel'): return format_preserving_redirect(request, '/beeswax/watch/%s' % (id,)) if form.is_valid(): # Do save # 1. Get the results metadata assert request.POST.get('save') try: handle, state = _get_query_handle_and_state(query_history) result_meta = db.get_results_metadata(handle) except Exception, ex: LOG.exception(ex) raise PopupException(_('Cannot find query.')) if result_meta.table_dir: result_meta.table_dir = request.fs.urlsplit(result_meta.table_dir)[2] # 2. Check for partitioned tables if result_meta.table_dir is None: raise PopupException(_('Saving results from a partitioned table is not supported. You may copy from the HDFS location manually.')) # 3. Actual saving of results try: if form.cleaned_data['save_target'] == form.SAVE_TYPE_DIR: # To dir if result_meta.in_tablename: raise PopupException(_('Saving results from a query with no MapReduce jobs is not supported. ' 'You may copy manually from the HDFS location %(path)s.') % {'path': result_meta.table_dir}) target_dir = form.cleaned_data['target_dir'] request.fs.rename_star(result_meta.table_dir, target_dir) LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, target_dir)) query_history.save_state(models.QueryHistory.STATE.expired) return redirect(reverse('filebrowser.views.view', kwargs={'path': target_dir})) elif form.cleaned_data['save_target'] == form.SAVE_TYPE_TBL: # To new table try: return _save_results_ctas(request, query_history, form.cleaned_data['target_table'], result_meta) except Exception, bex: LOG.exception(bex) error_msg, log = expand_exception(bex, db) except WebHdfsException, ex: raise PopupException(_('The table could not be saved.'), detail=ex) except IOError, ex: LOG.exception(ex) error_msg = str(ex)
# Go to next statement if asked to continue or when a statement with no dataset finished. if request.method == 'POST' or (not query_history.is_finished() and query_history.is_success() and not query_history.has_results): try: query_history = db.execute_next_statement(query_history) except Exception, ex: pass # Check query state handle, state = _get_query_handle_and_state(query_history) query_history.save_state(state) if query_history.is_failure(): # When we fetch, Beeswax server will throw us a Exception, which has the # log we want to display. return format_preserving_redirect(request, results_url, request.GET) elif query_history.is_finished() or (query_history.is_success() and query_history.has_results): return format_preserving_redirect(request, on_success_url, request.GET) # Still running log = db.get_log(handle) # Keep waiting # - Translate context into something more meaningful (type, data) query_context = _parse_query_context(context_param) return render('watch_wait.mako', request, { 'query': query_history, 'fwd_params': request.GET.urlencode(), 'log': log, 'hadoop_jobs': _parse_out_hadoop_jobs(log),
class HiveServer2Dbms(object): def __init__(self, client, server_type): self.client = client self.server_type = server_type self.server_name = self.client.query_server['server_name'] def get_table(self, database, table_name): # DB name not supported in SHOW PARTITIONS required in Table self.use(database) return self.client.get_table(database, table_name) def get_tables(self, database='default', table_names='.*'): return self.client.get_tables(database, table_names) def get_databases(self): return self.client.get_databases() def execute_query(self, query, design): return self.execute_and_watch(query, design=design) def select_star_from(self, database, table): hql = "SELECT * FROM `%s.%s` %s" % ( database, table.name, self._get_browse_limit_clause(table)) return self.execute_statement(hql) def execute_statement(self, hql): if self.server_name == 'impala': query = hql_query(hql, QUERY_TYPES[1]) else: query = hql_query(hql, QUERY_TYPES[0]) return self.execute_and_watch(query) def fetch(self, query_handle, start_over=False, rows=None): no_start_over_support = [ config_variable for config_variable in self.get_default_configuration(False) if config_variable.key == 'support_start_over' and config_variable.value == 'false' ] if no_start_over_support: start_over = False return self.client.fetch(query_handle, start_over, rows) def close_operation(self, query_handle): return self.client.close_operation(query_handle) def open_session(self, user): return self.client.open_session(user) def close_session(self, session): return self.client.close_session(session) def cancel_operation(self, query_handle): resp = self.client.cancel_operation(query_handle) if self.client.query_server['server_name'] == 'impala': resp = self.client.close_operation(query_handle) return resp def get_sample(self, database, table): """No samples if it's a view (HUE-526)""" if not table.is_view: limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get()) hql = "SELECT * FROM %s.%s LIMIT %s" % (database, table.name, limit) query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result def analyze_table_table(self, database, table): hql = 'analyze table `%(database)s.%(table_name)` compute statistics' % { 'database': database, 'table_name': table.name } query = hql_query(hql, database) return self.execute_query(query) def analyze_table_column(self): # analyze table <table_name> partition <part_name> compute statistics for columns <col_name1>, <col_name2>... pass def drop_table(self, database, table): if table.is_view: hql = "DROP VIEW `%s.%s`" % ( database, table.name, ) else: hql = "DROP TABLE `%s.%s`" % ( database, table.name, ) return self.execute_statement(hql) def load_data(self, database, table, form, design): hql = "LOAD DATA INPATH" hql += " '%s'" % form.cleaned_data['path'] if form.cleaned_data['overwrite']: hql += " OVERWRITE" hql += " INTO TABLE " hql += "`%s.%s`" % ( database, table.name, ) if form.partition_columns: hql += " PARTITION (" vals = [] for key, column_name in form.partition_columns.iteritems(): vals.append("%s='%s'" % (column_name, form.cleaned_data[key])) hql += ", ".join(vals) hql += ")" query = hql_query(hql, database) design.data = query.dumps() design.save() return self.execute_query(query, design) def drop_tables(self, database, tables, design): hql = [] for table in tables: if table.is_view: hql.append("DROP VIEW `%s.%s`" % ( database, table.name, )) else: hql.append("DROP TABLE `%s.%s`" % ( database, table.name, )) query = hql_query(';'.join(hql), database) design.data = query.dumps() design.save() return self.execute_query(query, design) def invalidate_tables(self, database, tables): for table in tables: hql = "INVALIDATE METADATA %s.%s" % ( database, table, ) query = hql_query(hql, database, query_type=QUERY_TYPES[1]) handle = self.execute_and_wait(query, timeout_sec=10.0) if handle: self.close(handle) def drop_database(self, database): return self.execute_statement("DROP DATABASE `%s`" % database) def drop_databases(self, databases, design): hql = [] for database in databases: hql.append("DROP DATABASE `%s`" % database) query = hql_query(';'.join(hql), database) design.data = query.dumps() design.save() return self.execute_query(query, design) def insert_query_into_directory(self, query_history, target_dir): design = query_history.design.get_design() database = design.query['database'] self.use(database) hql = "INSERT OVERWRITE DIRECTORY '%s' %s" % (target_dir, design.query['query']) return self.execute_statement(hql) def create_table_as_a_select(self, request, query_history, target_database, target_table, result_meta): design = query_history.design.get_design() database = design.query['database'] # Case 1: Hive Server 2 backend or results straight from an existing table if result_meta.in_tablename: self.use(database) hql = 'CREATE TABLE %s.%s AS %s' % (target_database, target_table, design.query['query']) query_history = self.execute_statement(hql) else: # Case 2: The results are in some temporary location # Beeswax backward compatibility and optimization # 1. Create table cols = '' schema = result_meta.schema for i, field in enumerate(schema.fieldSchemas): if i != 0: cols += ',\n' cols += '`%s` %s' % (field.name, field.type) # The representation of the delimiter is messy. # It came from Java as a string, which might has been converted from an integer. # So it could be "1" (^A), or "10" (\n), or "," (a comma literally). delim = result_meta.delim if not delim.isdigit(): delim = str(ord(delim)) hql = ''' CREATE TABLE `%s` ( %s ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\%s' STORED AS TextFile ''' % (target_table, cols, delim.zfill(3)) query = hql_query(hql) self.execute_and_wait(query) try: # 2. Move the results into the table's storage table_obj = self.get_table('default', target_table) table_loc = request.fs.urlsplit(table_obj.path_location)[2] result_dir = request.fs.urlsplit(result_meta.table_dir)[2] request.fs.rename_star(result_dir, table_loc) LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc)) request.info( request, _('Saved query results as new table %(table)s.') % {'table': target_table}) query_history.save_state(QueryHistory.STATE.expired) except Exception, ex: query = hql_query('DROP TABLE `%s`' % target_table) try: self.execute_and_wait(query) except Exception, double_trouble: LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble)) raise ex url = format_preserving_redirect(request, reverse('metastore:index'))
def save_results(request, id): """ Save the results of a query to an HDFS directory """ id = int(id) query_history = models.QueryHistory.objects.get(id=id) if query_history.owner != request.user: raise PopupException('This action is only available to the user who submitted the query.') _, state = _get_server_id_and_state(query_history) query_history.save_state(state) error_msg, log = None, None if request.method == 'POST': # Make sure the result is available. # Note that we may still hit errors during the actual save if state != models.QueryHistory.STATE.available: if state in (models.QueryHistory.STATE.failed, models.QueryHistory.STATE.expired): msg = 'This query has %s. Results unavailable.' % (state,) else: msg = 'The result of this query is not available yet.' raise PopupException(msg) form = beeswax.forms.SaveResultsForm(request.POST) # Cancel goes back to results if request.POST.get('cancel'): return format_preserving_redirect(request, '/beeswax/watch/%s' % (id,)) if form.is_valid(): # Do save # 1. Get the results metadata assert request.POST.get('save') handle = QueryHandle(id=query_history.server_id, log_context=query_history.log_context) try: result_meta = db_utils.db_client().get_results_metadata(handle) except QueryNotFoundException, ex: LOG.exception(ex) raise PopupException('Cannot find query.') if result_meta.table_dir: result_meta.table_dir = request.fs.urlsplit(result_meta.table_dir)[2] # 2. Check for partitioned tables if result_meta.table_dir is None: raise PopupException( 'Saving results from a partitioned table is not supported. ' 'You may copy from the HDFS location manually.') # 3. Actual saving of results try: if form.cleaned_data['save_target'] == form.SAVE_TYPE_DIR: # To dir if result_meta.in_tablename: raise PopupException( 'Saving results from a table to a directory is not supported. ' 'You may copy from the HDFS location manually.') target_dir = form.cleaned_data['target_dir'] request.fs.rename_star(result_meta.table_dir, target_dir) LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, target_dir)) query_history.save_state(models.QueryHistory.STATE.expired) fb_url = location_to_url(request, target_dir, strict=False) popup = PopupWithJframe('Query results stored in %s' % (target_dir,), launch_app_name='FileBrowser', launch_app_url=fb_url) return render_injected(list_query_history(request), popup) elif form.cleaned_data['save_target'] == form.SAVE_TYPE_TBL: # To new table try: return _save_results_ctas(request, query_history, form.cleaned_data['target_table'], result_meta) except BeeswaxException, bex: LOG.exception(bex) error_msg, log = expand_exception(bex) except IOError, ex: LOG.exception(ex) error_msg = str(ex)
def watch_query(request, id): """ Wait for the query to finish and (by default) displays the results of query id. It understands the optional GET params: on_success_url If given, it will be displayed when the query is successfully finished. Otherwise, it will display the view query results page by default. context A string of "name:data" that describes the context that generated this query result. It may be: - "table":"<table_name>" - "design":<design_id> All other GET params will be passed to on_success_url (if present). """ # Coerce types; manage arguments id = int(id) # GET param: context. context_param = request.GET.get('context', '') # GET param: on_success_url. Default to view_results results_url = urlresolvers.reverse(view_results, kwargs=dict(id=str(id), first_row=0)) on_success_url = request.GET.get('on_success_url') if not on_success_url: on_success_url = results_url # Retrieve models from database to get the server_id query_history = models.QueryHistory.objects.get(id=id) server_id, state = _get_server_id_and_state(query_history) query_history.save_state(state) # Query finished? if state == models.QueryHistory.STATE.expired: raise PopupException("The result of this query has expired.") elif state == models.QueryHistory.STATE.available: return format_preserving_redirect(request, on_success_url, request.GET) elif state == models.QueryHistory.STATE.failed: # When we fetch, Beeswax server will throw us a BeeswaxException, which has the # log we want to display. return format_preserving_redirect(request, results_url, request.GET) # Still running log = db_utils.db_client().get_log(server_id) download_urls = {} for format in common.DL_FORMATS: download_urls[format] = urlresolvers.reverse(download, kwargs=dict(id=str(id), format=format)) # Keep waiting # - Translate context into something more meaningful (type, data) context = _parse_query_context(context_param) return render('watch_wait.mako', request, { 'query': query_history, 'fwd_params': request.GET.urlencode(), 'download_urls': download_urls, 'log': log, 'hadoop_jobs': _parse_out_hadoop_jobs(log), 'query_context': context, })
# Go to next statement if asked to continue or when a statement with no dataset finished. if request.method == 'POST' or (not query_history.is_finished() and query_history.is_success() and not query_history.has_results): try: query_history = db.execute_next_statement(query_history) except BeeswaxException, ex: pass # Check query state handle, state = _get_query_handle_and_state(query_history) query_history.save_state(state) if query_history.is_failure(): # When we fetch, Beeswax server will throw us a BeeswaxException, which has the # log we want to display. return format_preserving_redirect(request, results_url, request.GET) elif query_history.is_finished() or (query_history.is_success() and query_history.has_results): return format_preserving_redirect(request, on_success_url, request.GET) # Still running log = db.get_log(handle) # Keep waiting # - Translate context into something more meaningful (type, data) query_context = _parse_query_context(context_param) return render('watch_wait.mako', request, { 'query': query_history, 'fwd_params': request.GET.urlencode(), 'log': log, 'hadoop_jobs': _parse_out_hadoop_jobs(log),
def watch_query(request, id, download_format=None): """ Wait for the query to finish and (by default) displays the results of query id. It understands the optional GET params: on_success_url If given, it will be displayed when the query is successfully finished. Otherwise, it will display the view query results page by default. context A string of "name:data" that describes the context that generated this query result. It may be: - "table":"<table_name>" - "design":<design_id> All other GET params will be passed to on_success_url (if present). """ # Coerce types; manage arguments query_history = authorized_get_history(request, id, must_exist=True) db = dbms.get(request.user, query_history.get_query_server_config()) # GET param: context. context_param = request.GET.get('context', '') # GET param: on_success_url. Default to view_results if request.session.get('dl_status', False)==False and download_format in common.DL_FORMATS: results_url = urlresolvers.reverse(get_app_name(request) + ':execute_query') else: results_url = urlresolvers.reverse(get_app_name(request) + ':view_results', kwargs={'id': id, 'first_row': 0}) if request.GET.get('download', ''): results_url += '?download=true' on_success_url = request.GET.get('on_success_url') if not on_success_url: on_success_url = results_url # Go to next statement if asked to continue or when a statement with no dataset finished. if request.method == 'POST' or ( not query_history.is_finished() and query_history.is_success() and not query_history.has_results): try: query_history = db.execute_next_statement(query_history) except Exception: pass # Check query state handle, state = _get_query_handle_and_state(query_history) query_history.save_state(state) if query_history.is_failure(): # When we fetch, Beeswax server will throw us a BeeswaxException, which has the # log we want to display. return format_preserving_redirect(request, results_url, request.GET) elif query_history.is_finished() or (query_history.is_success() and query_history.has_results): if request.session.get('dl_status', False): # BUG-20020 on_success_url = urlresolvers.reverse(get_app_name(request) + ':download', kwargs=dict(id=str(id), format=download_format)) _clean_session(request) return format_preserving_redirect(request, on_success_url, request.GET) # Still running log = db.get_log(handle) # Keep waiting # - Translate context into something more meaningful (type, data) query_context = _parse_query_context(context_param) return render('watch_wait.mako', request, { 'query': query_history, 'fwd_params': request.GET.urlencode(), 'log': log, 'hadoop_jobs': _parse_out_hadoop_jobs(log)[0], 'query_context': query_context, 'download_format': download_format, ## ExpV })
# Go to next statement if asked to continue or when a statement with no dataset finished. if request.method == 'POST' or (not query_history.is_finished() and query_history.is_success() and not query_history.has_results): try: query_history = db.execute_next_statement(query_history) except Exception, ex: pass # Check query state handle, state = _get_query_handle_and_state(query_history) query_history.save_state(state) if query_history.is_failure(): # When we fetch, Beeswax server will throw us a Exception, which has the # log we want to display. return format_preserving_redirect(request, results_url, request.GET) elif query_history.is_finished() or (query_history.is_success() and query_history.has_results): return format_preserving_redirect(request, on_success_url, request.GET) # Still running log = db.get_log(handle) # Keep waiting # - Translate context into something more meaningful (type, data) query_context = _parse_query_context(context_param) return render('watch_wait.mako', request, { 'query': query_history, 'fwd_params': request.GET.urlencode(), 'log': log, 'hadoop_jobs': _parse_out_hadoop_jobs(log),
def save_results(request, id): """ Save the results of a query to an HDFS directory """ query_history = authorized_get_history(request, id, must_exist=True) server_id, state = _get_query_handle_and_state(query_history) query_history.save_state(state) error_msg, log = None, None if request.method == 'POST': # Make sure the result is available. # Note that we may still hit errors during the actual save if not query_history.is_success(): #if state != models.QueryHistory.STATE.available: if query_history.is_failure(): #if state in (models.QueryHistory.STATE.failed, models.QueryHistory.STATE.expired): msg = _('This query has %(state)s. Results unavailable.') % {'state': state} else: msg = _('The result of this query is not available yet.') raise PopupException(msg) db = dbms.get(request.user, query_history.get_query_server()) form = beeswax.forms.SaveResultsForm(request.POST, db=db) # Cancel goes back to results if request.POST.get('cancel'): return format_preserving_redirect(request, '/beeswax/watch/%s' % (id,)) if form.is_valid(): # Do save # 1. Get the results metadata assert request.POST.get('save') try: handle, state = _get_query_handle_and_state(query_history) result_meta = db.get_results_metadata(handle) except QueryNotFoundException, ex: LOG.exception(ex) raise PopupException(_('Cannot find query.')) if result_meta.table_dir: result_meta.table_dir = request.fs.urlsplit(result_meta.table_dir)[2] # 2. Check for partitioned tables if result_meta.table_dir is None: raise PopupException(_('Saving results from a partitioned table is not supported. You may copy from the HDFS location manually.')) # 3. Actual saving of results try: if form.cleaned_data['save_target'] == form.SAVE_TYPE_DIR: # To dir if result_meta.in_tablename: raise PopupException(_('Saving results from a table to a directory is not supported. You may copy from the HDFS location manually.')) target_dir = form.cleaned_data['target_dir'] request.fs.rename_star(result_meta.table_dir, target_dir) LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, target_dir)) query_history.save_state(models.QueryHistory.STATE.expired) return redirect(urlresolvers.reverse('filebrowser.views.view', kwargs={'path': target_dir})) elif form.cleaned_data['save_target'] == form.SAVE_TYPE_TBL: # To new table try: return _save_results_ctas(request, query_history, form.cleaned_data['target_table'], result_meta) except BeeswaxException, bex: LOG.exception(bex) error_msg, log = expand_exception(bex, db) except WebHdfsException, ex: raise PopupException(_('The table could not be saved.'), detail=ex) except IOError, ex: LOG.exception(ex) error_msg = str(ex)
def create_table_as_a_select(self, request, query_history, target_database, target_table, result_meta): design = query_history.design.get_design() database = design.query['database'] # Case 1: Hive Server 2 backend or results straight from an existing table if result_meta.in_tablename: self.use(database) query = self._get_and_validate_select_query(design, query_history) hql = 'CREATE TABLE `%s`.`%s` AS %s' % (target_database, target_table, query) query_history = self.execute_statement(hql) else: # FYI: this path is dead since moving to HiveServer2 # # Case 2: The results are in some temporary location # Beeswax backward compatibility and optimization # 1. Create table cols = '' schema = result_meta.schema for i, field in enumerate(schema.fieldSchemas): if i != 0: cols += ',\n' cols += '`%s` %s' % (field.name, field.type) # The representation of the delimiter is messy. # It came from Java as a string, which might has been converted from an integer. # So it could be "1" (^A), or "10" (\n), or "," (a comma literally). delim = result_meta.delim if not delim.isdigit(): delim = str(ord(delim)) hql = ''' CREATE TABLE `%s` ( %s ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\%s' STORED AS TextFile ''' % (target_table, cols, delim.zfill(3)) query = hql_query(hql) self.execute_and_wait(query) try: # 2. Move the results into the table's storage table_obj = self.get_table('default', target_table) table_loc = request.fs.urlsplit(table_obj.path_location)[2] result_dir = request.fs.urlsplit(result_meta.table_dir)[2] request.fs.rename_star(result_dir, table_loc) LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc)) request.info(request, _('Saved query results as new table %(table)s.') % {'table': target_table}) query_history.save_state(QueryHistory.STATE.expired) except Exception as ex: query = hql_query('DROP TABLE `%s`' % target_table) try: self.execute_and_wait(query) except Exception as double_trouble: LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble)) raise ex url = format_preserving_redirect(request, reverse('metastore:index')) return query_history
# Go to next statement if asked to continue or when a statement with no dataset finished. if request.method == 'POST' or (not query_history.is_finished() and query_history.is_success() and not query_history.has_results): try: query_history = db.execute_next_statement(query_history) except Exception, ex: pass # Check query state handle, state = _get_query_handle_and_state(query_history) query_history.save_state(state) if query_history.is_failure(): # When we fetch, Beeswax server will throw us a Exception, which has the # log we want to display. return format_preserving_redirect(request, results_url, request.GET) elif query_history.is_finished() or (query_history.is_success() and query_history.has_results): return format_preserving_redirect(request, on_success_url, request.GET) # Still running log = db.get_log(handle) # Keep waiting # - Translate context into something more meaningful (type, data) query_context = _parse_query_context(context_param) return render('watch_wait.mako', request, { 'query': query_history, 'fwd_params': request.GET.urlencode(), 'log': log, 'hadoop_jobs': _parse_out_hadoop_jobs(log),
# 2. Move the results into the table's storage table_obj = self.get_table("default", target_table) table_loc = request.fs.urlsplit(table_obj.path_location)[2] result_dir = request.fs.urlsplit(result_meta.table_dir)[2] request.fs.rename_star(result_dir, table_loc) LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc)) request.info(request, _("Saved query results as new table %(table)s.") % {"table": target_table}) query_history.save_state(QueryHistory.STATE.expired) except Exception, ex: query = hql_query("DROP TABLE `%s`" % target_table) try: self.execute_and_wait(query) except Exception, double_trouble: LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble)) raise ex url = format_preserving_redirect(request, reverse("metastore:index")) return query_history def use(self, database): query = hql_query("USE %s" % database) return self.client.use(query) def get_log(self, query_handle, start_over=True): return self.client.get_log(query_handle, start_over) def get_state(self, handle): return self.client.get_state(handle) def get_operation_status(self, handle): return self.client.get_operation_status(handle)
# 2. Move the results into the table's storage table_obj = self.get_table('default', target_table) table_loc = request.fs.urlsplit(table_obj.path_location)[2] result_dir = request.fs.urlsplit(result_meta.table_dir)[2] request.fs.rename_star(result_dir, table_loc) LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc)) request.info(request, _('Saved query results as new table %(table)s.') % {'table': target_table}) query_history.save_state(QueryHistory.STATE.expired) except Exception, ex: query = hql_query('DROP TABLE `%s`' % target_table) try: self.execute_and_wait(query) except Exception, double_trouble: LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble)) raise ex url = format_preserving_redirect(request, reverse('metastore:index')) return query_history def use(self, database): query = hql_query('USE `%s`' % database) return self.client.use(query) def get_log(self, query_handle, start_over=True): return self.client.get_log(query_handle, start_over) def get_state(self, handle): return self.client.get_state(handle)
def watch_query(request, id, download_format=None): """ Wait for the query to finish and (by default) displays the results of query id. It understands the optional GET params: on_success_url If given, it will be displayed when the query is successfully finished. Otherwise, it will display the view query results page by default. context A string of "name:data" that describes the context that generated this query result. It may be: - "table":"<table_name>" - "design":<design_id> All other GET params will be passed to on_success_url (if present). """ # Coerce types; manage arguments query_history = authorized_get_history(request, id, must_exist=True) db = dbms.get(request.user, query_history.get_query_server_config()) # GET param: context. context_param = request.GET.get('context', '') # GET param: on_success_url. Default to view_results if request.session.get( 'dl_status', False) == False and download_format in common.DL_FORMATS: results_url = urlresolvers.reverse( get_app_name(request) + ':execute_query') else: results_url = urlresolvers.reverse(get_app_name(request) + ':view_results', kwargs={ 'id': id, 'first_row': 0 }) if request.GET.get('download', ''): results_url += '?download=true' on_success_url = request.GET.get('on_success_url') if not on_success_url: on_success_url = results_url # Go to next statement if asked to continue or when a statement with no dataset finished. if request.method == 'POST' or (not query_history.is_finished() and query_history.is_success() and not query_history.has_results): try: query_history = db.execute_next_statement(query_history) except Exception: pass # Check query state handle, state = _get_query_handle_and_state(query_history) query_history.save_state(state) if query_history.is_failure(): # When we fetch, Beeswax server will throw us a BeeswaxException, which has the # log we want to display. return format_preserving_redirect(request, results_url, request.GET) elif query_history.is_finished() or (query_history.is_success() and query_history.has_results): if request.session.get('dl_status', False): # BUG-20020 on_success_url = urlresolvers.reverse( get_app_name(request) + ':download', kwargs=dict(id=str(id), format=download_format)) _clean_session(request) return format_preserving_redirect(request, on_success_url, request.GET) # Still running log = db.get_log(handle) # Keep waiting # - Translate context into something more meaningful (type, data) query_context = _parse_query_context(context_param) return render( 'watch_wait.mako', request, { 'query': query_history, 'fwd_params': request.GET.urlencode(), 'log': log, 'hadoop_jobs': _parse_out_hadoop_jobs(log)[0], 'query_context': query_context, 'download_format': download_format, ## ExpV })