def confirm_query(request, query, on_success_url=None): """ Used by other forms to confirm a query before it's executed. The form is the same as execute_query below. query - The HQL about to be executed on_success_url - The page to go to upon successful execution """ mform = QueryForm() mform.bind() mform.query.initial = dict(query=query) return render( "execute.mako", request, { "form": mform, "action": reverse(get_app_name(request) + ":execute_query"), "error_message": None, "design": None, "on_success_url": on_success_url, "design": None, "autocomplete_base_url": reverse(get_app_name(request) + ":autocomplete", kwargs={}), }, )
def _run_parameterized_query(request, design_id, explain): """ Given a design and arguments to parameterize that design, runs the query. - explain is a boolean to determine whether to run as an explain or as an execute. This is an extra "step" in the flow from execute_query. """ design = authorized_get_design(request, design_id, must_exist=True) # Reconstitute the form design_obj = beeswax.design.HQLdesign.loads(design.data) query_form = QueryForm() params = design_obj.get_query_dict() params.update(request.POST) databases = _get_db_choices(request) query_form.bind(params) query_form.query.fields["database"].choices = databases # Could not do it in the form if not query_form.is_valid(): raise PopupException(_("Query form is invalid: %s") % query_form.errors) query_str = query_form.query.cleaned_data["query"] app_name = get_app_name(request) query_server = get_query_server_config(app_name) query_type = SavedQuery.TYPES_MAPPING[app_name] parameterization_form_cls = make_parameterization_form(query_str) if not parameterization_form_cls: raise PopupException(_("Query is not parameterizable.")) parameterization_form = parameterization_form_cls(request.REQUEST, prefix="parameterization") if parameterization_form.is_valid(): real_query = substitute_variables(query_str, parameterization_form.cleaned_data) query = HQLdesign(query_form, query_type=query_type) query._data_dict["query"]["query"] = real_query try: if explain: return explain_directly(request, query, design, query_server) else: return execute_directly(request, query, query_server, design) except Exception, ex: db = dbms.get(request.user, query_server) error_message, log = expand_exception(ex, db) return render( "execute.mako", request, { "action": reverse(get_app_name(request) + ":execute_query"), "design": design, "error_message": error_message, "form": query_form, "log": log, "autocomplete_base_url": reverse(get_app_name(request) + ":autocomplete", kwargs={}), }, )
def query_history_to_dict(request, query_history): query_history_dict = { 'id': query_history.id, 'state': query_history.last_state, 'query': query_history.query, 'has_results': query_history.has_results, 'statement_number': query_history.statement_number, 'watch_url': reverse(get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': query_history.id}), 'results_url': reverse(get_app_name(request) + ':view_results', kwargs={'id': query_history.id, 'first_row': 0}) } if query_history.design: query_history_dict['design'] = design_to_dict(query_history.design) return query_history_dict
def explain_query(request): response = {"status": -1, "message": ""} if request.method != "POST": response["message"] = _("A POST request is required.") app_name = get_app_name(request) query_type = beeswax_models.SavedQuery.TYPES_MAPPING[app_name] try: form = get_query_form(request) if form.is_valid(): query = SQLdesign(form, query_type=query_type) query_server = dbms.get_query_server_config(request.POST.get("server")) db = dbms.get(request.user, query_server) try: db.use(form.cleaned_data["database"]) datatable = db.explain(query) results = db.client.create_result(datatable) response["status"] = 0 response["results"] = results_to_dict(results) except Exception, e: response["status"] = -1 response["message"] = str(e) else:
def load_table(request, database, table): table_obj = dbms.get(request.user).get_table(database, table) if request.method == "POST": form = beeswax.forms.LoadDataForm(table_obj, request.POST) if form.is_valid(): # TODO(philip/todd): When PathField might refer to non-HDFS, # we need a pathfield.is_local function. hql = "LOAD DATA INPATH" hql += " '%s'" % form.cleaned_data['path'] if form.cleaned_data['overwrite']: hql += " OVERWRITE" hql += " INTO TABLE " hql += "`%s.%s`" % (database, table,) if form.partition_columns: hql += " PARTITION (" vals = [] for key, column_name in form.partition_columns.iteritems(): vals.append("%s='%s'" % (column_name, form.cleaned_data[key])) hql += ", ".join(vals) hql += ")" on_success_url = reverse(get_app_name(request) + ':describe_table', kwargs={'database': database, 'table': table}) query = hql_query(hql, database=database) try: return execute_directly(request, query, on_success_url=on_success_url) except Exception, e: raise PopupException(_("Can't load the data"), detail=e)
def list_designs(request): """ View function for show all saved queries. We get here from /beeswax/list_designs?filterargs, with the options being: page=<n> - Controls pagination. Defaults to 1. user=<name> - Show design items belonging to a user. Default to all users. type=<type> - <type> is "hql", for saved query type. Default to show all. sort=<key> - Sort by the attribute <key>, which is one of: "date", "name", "desc", and "type" (design type) Accepts the form "-date", which sort in descending order. Default to "-date". text=<frag> - Search for fragment "frag" in names and descriptions. """ DEFAULT_PAGE_SIZE = 20 app_name = get_app_name(request) # Extract the saved query list. prefix = 'q-' querydict_query = _copy_prefix(prefix, request.GET) # Manually limit up the user filter. querydict_query[ prefix + 'type' ] = app_name page, filter_params = _list_designs(request.user, querydict_query, DEFAULT_PAGE_SIZE, prefix) return render('list_designs.mako', request, { 'page': page, 'filter_params': filter_params, 'user': request.user, 'designs_json': json.dumps([query.id for query in page.object_list]) })
def execute_directly(request, query, design, query_server, tablename=None, **kwargs): if design is not None: design = authorized_get_design(request, design.id) parameters = kwargs.pop('parameters', None) db = dbms.get(request.user, query_server) database = query.query.get('database', 'default') db.use(database) history_obj = db.execute_query(query, design) watch_url = reverse(get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': history_obj.id}) if parameters is not None: history_obj.update_extra('parameters', parameters) history_obj.save() response = { 'status': 0, 'id': history_obj.id, 'watch_url': watch_url, 'statement': history_obj.get_current_statement(), 'is_redacted': history_obj.is_redacted } return JsonResponse(response)
def save_query_design(request, design_id=None): response = {'status': -1, 'message': ''} if request.method != 'POST': response['message'] = _('A POST request is required.') app_name = get_app_name(request) query_type = beeswax.models.SavedQuery.TYPES_MAPPING[app_name] design = safe_get_design(request, query_type, design_id) try: query_form = get_query_form(request) if query_form.is_valid(): design = save_design(request, query_form, query_type, design, True) response['design_id'] = design.id response['status'] = 0 else: response['errors'] = { 'query': [query_form.query.errors], 'settings': query_form.settings.errors, 'file_resources': query_form.file_resources.errors, 'functions': query_form.functions.errors, 'saveform': query_form.saveform.errors, } except RuntimeError, e: response['message'] = str(e)
def execute_query(request, design_id=None): """ View function for executing an arbitrary query. It understands the optional GET/POST params: on_success_url If given, it will be displayed when the query is successfully finished. Otherwise, it will display the view query results page by default. """ authorized_get_design(request, design_id) error_message = None form = QueryForm() action = request.path log = None app_name = get_app_name(request) query_type = SavedQuery.TYPES_MAPPING[app_name] design = safe_get_design(request, query_type, design_id) on_success_url = request.REQUEST.get('on_success_url') databases = [] query_server = get_query_server_config(app_name) db = dbms.get(request.user, query_server) try: databases = get_db_choices(request) except Exception, ex: error_message, log = expand_exception(ex, db)
def list_trashed_designs(request): DEFAULT_PAGE_SIZE = 20 app_name= get_app_name(request) user = request.user # Extract the saved query list. prefix = 'q-' querydict_query = _copy_prefix(prefix, request.GET) # Manually limit up the user filter. querydict_query[ prefix + 'type' ] = app_name # Get search filter input if any search_filter = request.GET.get('text', None) if search_filter is not None: querydict_query[ prefix + 'text' ] = search_filter page, filter_params = _list_designs(user, querydict_query, DEFAULT_PAGE_SIZE, prefix, is_trashed=True) return render('list_trashed_designs.mako', request, { 'page': page, 'filter_params': filter_params, 'prefix': prefix, 'user': request.user, 'designs_json': json.dumps([query.id for query in page.object_list]) })
def get_session(request, session_id=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) response = {'status': -1, 'message': ''} if session_id: session = Session.objects.get(id=session_id, owner=request.user, application=query_server['server_name']) else: # get the latest session for given user and server type session = Session.objects.get_session(request.user, query_server['server_name']) if session is not None: properties = json.loads(session.properties) # Redact passwords for key, value in properties.items(): if 'password' in key.lower(): properties[key] = '*' * len(value) response['status'] = 0 response['session'] = {'id': session.id, 'application': session.application, 'status': session.status_code} response['properties'] = properties else: response['message'] = _('Could not find session or no open sessions found.') return JsonResponse(response)
def explain_query(request): response = {'status': -1, 'message': ''} if request.method != 'POST': response['message'] = _('A POST request is required.') app_name = get_app_name(request) query_type = beeswax_models.SavedQuery.TYPES_MAPPING[app_name] try: form = get_query_form(request) if form.is_valid(): query = SQLdesign(form, query_type=query_type) query_server = dbms.get_query_server_config(app_name) db = dbms.get(request.user, query_server) try: db.use(form.cleaned_data['database']) datatable = db.explain(query) results = db.client.create_result(datatable) response['status'] = 0 response['results'] = results_to_dict(results) except Exception, e: response['status'] = -1 response['message'] = str(e) else:
def list_trashed_designs(request): DEFAULT_PAGE_SIZE = 20 app_name = get_app_name(request) user = request.user # Extract the saved query list. prefix = "q-" querydict_query = _copy_prefix(prefix, request.GET) # Manually limit up the user filter. querydict_query[prefix + "type"] = app_name # Get search filter input if any search_filter = request.GET.get("text", None) if search_filter is not None: querydict_query[prefix + "text"] = search_filter page, filter_params = _list_designs(user, querydict_query, DEFAULT_PAGE_SIZE, prefix, is_trashed=True) return render( "list_trashed_designs.mako", request, { "page": page, "filter_params": filter_params, "prefix": prefix, "user": request.user, "designs_json": json.dumps([query.id for query in page.object_list]), }, )
def view_results(request, id, first_row=0): """ Returns the view for the results of the QueryHistory with the given id. The query results MUST be ready. To display query results, one should always go through the execute_query view. If the result set has has_result_set=False, display an empty result. If ``first_row`` is 0, restarts (if necessary) the query read. Otherwise, just spits out a warning if first_row doesn't match the servers conception. Multiple readers will produce a confusing interaction here, and that's known. It understands the ``context`` GET parameter. (See execute_query().) """ first_row = long(first_row) start_over = (first_row == 0) results = type('Result', (object,), { 'rows': 0, 'columns': [], 'has_more': False, 'start_row': 0, }) data = [] fetch_error = False error_message = '' log = '' columns = [] app_name = get_app_name(request) query_history = authorized_get_query_history(request, id, must_exist=True) query_server = query_history.get_query_server_config() db = dbms.get(request.user, query_server) handle, state = _get_query_handle_and_state(query_history) context_param = request.GET.get('context', '') query_context = parse_query_context(context_param) # Update the status as expired should not be accessible expired = state == models.QueryHistory.STATE.expired # Retrieve query results or use empty result if no result set try: if query_server['server_name'] == 'impala' and not handle.has_result_set: downloadable = False else: results = db.fetch(handle, start_over, 100) # Materialize and HTML escape results data = escape_rows(results.rows()) # We display the "Download" button only when we know that there are results: downloadable = first_row > 0 or data log = db.get_log(handle) columns = results.data_table.cols() except Exception, ex: LOG.exception('error fetching results') fetch_error = True error_message, log = expand_exception(ex, db, handle)
def get_sample_data(request, database, table, column=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) db = dbms.get(request.user, query_server) response = _get_sample_data(db, database, table, column) return JsonResponse(response)
def configuration(request): app_name = get_app_name(request) query_server = get_query_server_config(app_name) config_values = dbms.get(request.user, query_server).get_default_configuration( bool(request.REQUEST.get("include_hadoop", False))) return render("configuration.mako", request, {'config_values': config_values})
def analyze_table(request, database, table, columns=None): app_name = get_app_name(request) cluster = json.loads(request.POST.get('cluster', '{}')) query_server = get_query_server_config(app_name, cluster=cluster) db = dbms.get(request.user, query_server) table_obj = db.get_table(database, table) if table_obj.is_impala_only and app_name != 'impala': query_server = get_query_server_config('impala') db = dbms.get(request.user, query_server) response = {'status': -1, 'message': '', 'redirect': ''} if request.method == "POST": if columns is None: query_history = db.analyze_table(database, table) else: query_history = db.analyze_table_columns(database, table) response['watch_url'] = reverse('beeswax:api_watch_query_refresh_json', kwargs={'id': query_history.id}) response['status'] = 0 else: response['message'] = _('A POST request is required.') return JsonResponse(response)
def show_tables(request, database=None): if database is None: database = _get_last_database(request, database) if request.method == 'POST': resp = {} try: tables = _get_table_list(request, database) table_list_rendered = django_mako.render_to_string("table_list.mako", dict( app_name=get_app_name(request), database=database, tables=tables, )) except Exception as ex: resp['error'] = escapejs(ex.message) else: resp['table_list_rendered'] = table_list_rendered resp['tables'] = tables return HttpResponse(json.dumps(resp)) db = dbms.get(request.user) databases = db.get_databases() db_form = hcatalog.forms.DbForm(initial={'database': database}, databases=databases) response = render("show_tables.mako", request, { 'database': database, 'db_form': db_form, }) response.set_cookie("hueHcatalogLastDatabase", database, expires=90) return response
def autocomplete(request, database=None, table=None, column=None, nested=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) do_as = request.user if (request.user.is_superuser or request.user.has_hue_permission(action="impersonate", app="security")) and 'doas' in request.GET: do_as = User.objects.get(username=request.GET.get('doas')) db = dbms.get(do_as, query_server) response = {} try: if database is None: response['databases'] = db.get_databases() elif table is None: response['tables'] = db.get_tables(database=database) elif column is None: t = db.get_table(database, table) response['hdfs_link'] = t.hdfs_link response['columns'] = [column.name for column in t.cols] response['extended_columns'] = massage_columns_for_json(t.cols) else: col = db.get_column(database, table, column) if col: parse_tree = parser.parse_column(col.name, col.type, col.comment) if nested: parse_tree = _extract_nested_type(parse_tree, nested) response = parse_tree else: raise Exception('Could not find column `%s`.`%s`.`%s`' % (database, table, column)) except (QueryServerTimeoutException, TTransportException), e: response['code'] = 503 response['error'] = e.message
def process_response(self, request, response): try: audit_logger = get_audit_logger() audit_logger.debug( JsonMessage( **{ datetime.utcnow().strftime('%s'): { 'user': request.user.username if hasattr(request, 'user') else 'anonymous', "status": response.status_code, "impersonator": None, "ip_address": request.META.get('REMOTE_ADDR'), "authorization_failure": response.status_code == 401, "service": get_app_name(request), "url": request.path, } })) response['audited'] = True except Exception, e: LOG.error('Could not audit the request: %s' % e)
def save_query(request, design_id=None): response = {'status': -1, 'message': ''} if request.method != 'POST': response['message'] = _('A POST request is required.') app_name = get_app_name(request) query_type = beeswax_models.SavedQuery.TYPES_MAPPING[app_name] design = safe_get_design(request, query_type, design_id) form = QueryForm() api = get_api(request.user) app_names = api.jars() try: form.bind(request.POST) form.query.fields['appName'].choices = ((key, key) for key in app_names) if form.is_valid(): design = save_design(request, form, query_type, design, True) response['design_id'] = design.id response['status'] = 0 else: response['message'] = smart_str(form.query.errors) + smart_str(form.saveform.errors) except RuntimeError, e: response['message'] = str(e)
def list_query_history(request): """ View the history of query (for the current user). We get here from /beeswax/query_history?filterargs, with the options being: page=<n> - Controls pagination. Defaults to 1. user=<name> - Show history items from a user. Default to current user only. Also accepts ':all' to show all history items. type=<type> - <type> is "report|hql", for design type. Default to show all. design_id=<id> - Show history for this particular design id. sort=<key> - Sort by the attribute <key>, which is one of: "date", "state", "name" (design name), and "type" (design type) Accepts the form "-date", which sort in descending order. Default to "-date". auto_query=<bool> - Show auto generated actions (drop table, read data, etc). Default False """ DEFAULT_PAGE_SIZE = 20 share_queries = conf.SHARE_SAVED_QUERIES.get() or request.user.is_superuser querydict_query = request.GET.copy() if not share_queries: querydict_query['user'] = request.user.username app_name= get_app_name(request) querydict_query['type'] = app_name page, filter_params = _list_query_history(request.user, querydict_query, DEFAULT_PAGE_SIZE) return render('list_history.mako', request, { 'request': request, 'page': page, 'filter_params': filter_params, 'share_queries': share_queries, })
def load_table(request, database, table): table_obj = dbms.get(request.user).get_table(database, table) if request.method == "POST": form = beeswax.forms.LoadDataForm(table_obj, request.POST) if form.is_valid(): # TODO(philip/todd): When PathField might refer to non-HDFS, # we need a pathfield.is_local function. hql = "LOAD DATA INPATH" hql += " '%s'" % form.cleaned_data['path'] if form.cleaned_data['overwrite']: hql += " OVERWRITE" hql += " INTO TABLE " hql += "`%s.%s`" % (database, table,) if form.partition_columns: hql += " PARTITION (" vals = [] for key, column_name in form.partition_columns.iteritems(): vals.append("%s='%s'" % (column_name, form.cleaned_data[key])) hql += ", ".join(vals) hql += ")" on_success_url = reverse(get_app_name(request) + ':describe_table', kwargs={'database': database, 'table': table}) return confirm_query(request, hql, on_success_url) else: form = beeswax.forms.LoadDataForm(table_obj) return render("load_table.mako", request, {'form': form, 'table': table, 'action': request.get_full_path()})
def _run_parameterized_query(request, design_id, explain): """ Given a design and arguments to parameterize that design, runs the query. - explain is a boolean to determine whether to run as an explain or as an execute. This is an extra "step" in the flow from execute_query. """ design = authorized_get_design(request, design_id, must_exist=True) # Reconstitute the form design_obj = beeswax.design.HQLdesign.loads(design.data) query_form = QueryForm() params = design_obj.get_query_dict() params.update(request.POST) query_form.bind(params) assert query_form.is_valid() query_str = query_form.query.cleaned_data["query"] query_server = get_query_server_config(get_app_name(request)) parameterization_form_cls = make_parameterization_form(query_str) if not parameterization_form_cls: raise PopupException(_("Query is not parameterizable.")) parameterization_form = parameterization_form_cls(request.REQUEST, prefix="parameterization") if parameterization_form.is_valid(): real_query = substitute_variables(query_str, parameterization_form.cleaned_data) query = HQLdesign(query_form) query._data_dict['query']['query'] = real_query try: if explain: return explain_directly(request, query, design, query_server) else: return execute_directly(request, query, query_server, design) except Exception, ex: db = dbms.get(request.user, query_server) error_message, log = expand_exception(ex, db) return render('execute.mako', request, { 'action': reverse(get_app_name(request) + ':execute_query'), 'design': design, 'error_message': error_message, 'form': query_form, 'log': log, 'autocomplete': json.dumps({}), })
def view_results(request, id, first_row=0): """ Returns the view for the results of the QueryHistory with the given id. The query results MUST be ready. To display query results, one should always go through the watch_query view. If ``first_row`` is 0, restarts (if necessary) the query read. Otherwise, just spits out a warning if first_row doesn't match the servers conception. Multiple readers will produce a confusing interaction here, and that's known. It understands the ``context`` GET parameter. (See watch_query().) """ first_row = long(first_row) start_over = (first_row == 0) results = None data = None fetch_error = False error_message = '' log = '' app_name = get_app_name(request) query_history = authorized_get_history(request, id, must_exist=True) db = dbms.get(request.user, query_history.get_query_server_config()) handle, state = _get_query_handle_and_state(query_history) context_param = request.GET.get('context', '') query_context = _parse_query_context(context_param) # To remove in Hue 2.3 download = request.GET.get('download', '') # Update the status as expired should not be accessible # Impala does not support startover for now expired = state == models.QueryHistory.STATE.expired if expired or app_name == 'impala': state = models.QueryHistory.STATE.expired query_history.save_state(state) # Retrieve query results try: if not download: results = db.fetch(handle, start_over, 100) data = list(results.rows()) # Materialize results # We display the "Download" button only when we know that there are results: downloadable = first_row > 0 or data else: downloadable = True data = [] results = type('Result', (object,), { 'rows': 0, 'columns': [], 'has_more': False, 'start_row': 0, }) log = db.get_log(handle) except Exception, ex: fetch_error = True error_message, log = expand_exception(ex, db)
def get_query_form(request): try: # Get database choices query_server = dbms.get_query_server_config(get_app_name(request)) db = dbms.get(request.user, query_server) databases = [(database, database) for database in db.get_databases()] except Exception, e: raise PopupException(_('Unable to access databases, Query Server or Metastore may be down.'), detail=e)
def save_results_hdfs_file(request, query_history_id): """ Save the results of a query to an HDFS file. Do not rerun the query. """ response = {'status': 0, 'message': ''} query_history = authorized_get_query_history(request, query_history_id, must_exist=True) server_id, state = _get_query_handle_and_state(query_history) query_history.save_state(state) error_msg, log = None, None if request.method != 'POST': response['message'] = _('A POST request is required.') else: if not query_history.is_success(): response['message'] = _('This query is %(state)s. Results unavailable.') % {'state': state} response['status'] = -1 return JsonResponse(response) db = dbms.get(request.user, query_history.get_query_server_config()) form = beeswax.forms.SaveResultsFileForm({ 'target_file': request.POST.get('path'), 'overwrite': request.POST.get('overwrite', False), }) if form.is_valid(): target_file = form.cleaned_data['target_file'] overwrite = form.cleaned_data['overwrite'] try: handle, state = _get_query_handle_and_state(query_history) except Exception, ex: response['message'] = _('Cannot find query handle and state: %s') % str(query_history) response['status'] = -2 return JsonResponse(response) try: if overwrite and request.fs.exists(target_file): if request.fs.isfile(target_file): request.fs.do_as_user(request.user.username, request.fs.rmtree, target_file) else: raise PopupException(_("The target path is a directory")) upload(target_file, handle, request.user, db, request.fs) response['type'] = 'hdfs-file' response['id'] = query_history.id response['query'] = query_history.query response['path'] = target_file response['success_url'] = '/filebrowser/view=%s' % target_file response['watch_url'] = reverse(get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': query_history.id}) except Exception, ex: error_msg, log = expand_exception(ex, db) response['message'] = _('The result could not be saved: %s.') % error_msg response['status'] = -3
def watch_query(request, id): """ Wait for the query to finish and (by default) displays the results of query id. It understands the optional GET params: on_success_url If given, it will be displayed when the query is successfully finished. Otherwise, it will display the view query results page by default. context A string of "name:data" that describes the context that generated this query result. It may be: - "table":"<table_name>" - "design":<design_id> All other GET params will be passed to on_success_url (if present). """ # Coerce types; manage arguments query_history = authorized_get_history(request, id, must_exist=True) # GET param: context. context_param = request.GET.get('context', '') # GET param: on_success_url. Default to view_results results_url = reverse(get_app_name(request) + ':view_results', kwargs={'id': id, 'first_row': 0}) on_success_url = request.GET.get('on_success_url') if not on_success_url: on_success_url = results_url # Check query state handle, state = _get_query_handle_and_state(query_history) query_history.save_state(state) # Query finished? # if state == models.QueryHistory.STATE.expired: # raise PopupException(_("The result of this query has expired.")) if query_history.is_success(): return format_preserving_redirect(request, on_success_url, request.GET) elif query_history.is_failure(): # When we fetch, Beeswax server will throw us a BeeswaxException, which has the # log we want to display. return format_preserving_redirect(request, results_url, request.GET) # Still running log = dbms.get(request.user, query_history.get_query_server_config()).get_log(handle) # Keep waiting # - Translate context into something more meaningful (type, data) query_context = _parse_query_context(context_param) return render('watch_wait.mako', request, { 'query': query_history, 'fwd_params': request.GET.urlencode(), 'log': log, 'hadoop_jobs': _parse_out_hadoop_jobs(log), 'query_context': query_context, })
def execute_directly(request, query, query_server=None, design=None, tablename=None, on_success_url=None, on_success_params=None, **kwargs): """ execute_directly(request, query_msg, tablename, design) -> HTTP response for execution This method wraps around dbms.execute_directly() to take care of the HTTP response after the execution. query The HQL model Query object. query_server To which Query Server to submit the query. Dictionary with keys: ['server_name', 'server_host', 'server_port']. design The design associated with the query. tablename The associated table name for the context. on_success_url Where to go after the query is done. The URL handler may expect an option "context" GET param. (See ``watch_query``.) For advanced usage, on_success_url can be a function, in which case the on complete URL is the return of: on_success_url(history_obj) -> URL string Defaults to the view results page. on_success_params Optional params to pass to the on_success_url (in additional to "context"). Note that this may throw a Beeswax exception. """ if design is not None: authorized_get_design(request, design.id) history_obj = dbms.get(request.user, query_server).execute_query(query, design) watch_url = reverse(get_app_name(request) + ':watch_query', kwargs={'id': history_obj.id}) # Prepare the GET params for the watch_url get_dict = QueryDict(None, mutable=True) # (1) context if design: get_dict['context'] = make_query_context('design', design.id) elif tablename: get_dict['context'] = make_query_context('table', tablename) # (2) on_success_url if on_success_url: if callable(on_success_url): on_success_url = on_success_url(history_obj) get_dict['on_success_url'] = on_success_url # (3) misc if on_success_params: get_dict.update(on_success_params) return format_preserving_redirect(request, watch_url, get_dict)
def _save_results_ctas(request, query_history, target_table, result_meta): """ Handle saving results as a new table. Returns HTTP response. May raise BeeswaxException, IOError. """ query_server = query_history.get_query_server_config() # Query server requires DDL support db = dbms.get(request.user) # Case 1: The results are straight from an existing table if result_meta.in_tablename: hql = 'CREATE TABLE `%s` AS SELECT * FROM %s' % (target_table, result_meta.in_tablename) query = hql_query(hql) # Display the CTAS running. Could take a long time. return execute_directly(request, query, query_server, on_success_url=reverse(get_app_name(request) + ':show_tables')) # Case 2: The results are in some temporary location # 1. Create table cols = '' schema = result_meta.schema for i, field in enumerate(schema.fieldSchemas): if i != 0: cols += ',\n' cols += '`%s` %s' % (field.name, field.type) # The representation of the delimiter is messy. # It came from Java as a string, which might has been converted from an integer. # So it could be "1" (^A), or "10" (\n), or "," (a comma literally). delim = result_meta.delim if not delim.isdigit(): delim = str(ord(delim)) hql = ''' CREATE TABLE `%s` ( %s ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\%s' STORED AS TextFile ''' % (target_table, cols, delim.zfill(3)) query = hql_query(hql) db.execute_and_wait(query) try: # 2. Move the results into the table's storage table_obj = db.get_table('default', target_table) table_loc = request.fs.urlsplit(table_obj.path_location)[2] request.fs.rename_star(result_meta.table_dir, table_loc) LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc)) messages.info(request, _('Saved query results as new table %(table)s') % {'table': target_table}) query_history.save_state(models.QueryHistory.STATE.expired) except Exception, ex: LOG.error('Error moving data into storage of table %s. Will drop table.' % (target_table,)) query = hql_query('DROP TABLE `%s`' % (target_table,)) try: db.execute_directly(query) # Don't wait for results except Exception, double_trouble: LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble))
def view_results(request, id, first_row=0): """ Returns the view for the results of the QueryHistory with the given id. The query results MUST be ready. To display query results, one should always go through the execute_query view. If the result set has has_result_set=False, display an empty result. If ``first_row`` is 0, restarts (if necessary) the query read. Otherwise, just spits out a warning if first_row doesn't match the servers conception. Multiple readers will produce a confusing interaction here, and that's known. It understands the ``context`` GET parameter. (See execute_query().) """ first_row = int(first_row) start_over = (first_row == 0) results = type('Result', (object,), { 'rows': 0, 'columns': [], 'has_more': False, 'start_row': 0, }) data = [] fetch_error = False error_message = '' log = '' columns = [] app_name = get_app_name(request) query_history = authorized_get_query_history(request, id, must_exist=True) query_server = query_history.get_query_server_config() db = dbms.get(request.user, query_server) handle, state = _get_query_handle_and_state(query_history) context_param = request.GET.get('context', '') query_context = parse_query_context(context_param) # Update the status as expired should not be accessible expired = state == models.QueryHistory.STATE.expired # Retrieve query results or use empty result if no result set try: if query_server['server_name'] == 'impala' and not handle.has_result_set: downloadable = False else: results = db.fetch(handle, start_over, 100) # Materialize and HTML escape results data = escape_rows(results.rows()) # We display the "Download" button only when we know that there are results: downloadable = first_row > 0 or data log = db.get_log(handle) columns = results.data_table.cols() except Exception as ex: LOG.exception('error fetching results') fetch_error = True error_message, log = expand_exception(ex, db, handle) # Handle errors error = fetch_error or results is None or expired context = { 'error': error, 'message': error_message, 'query': query_history, 'results': data, 'columns': columns, 'expected_first_row': first_row, 'log': log, 'hadoop_jobs': app_name != 'impala' and parse_out_jobs(log), 'query_context': query_context, 'can_save': False, 'context_param': context_param, 'expired': expired, 'app_name': app_name, 'next_json_set': None, 'is_finished': query_history.is_finished() } if not error: download_urls = {} if downloadable: for format in common.DL_FORMATS: download_urls[format] = reverse(app_name + ':download', kwargs=dict(id=str(id), format=format)) results.start_row = first_row context.update({ 'id': id, 'results': data, 'has_more': results.has_more, 'next_row': results.start_row + len(data), 'start_row': results.start_row, 'expected_first_row': first_row, 'columns': columns, 'download_urls': download_urls, 'can_save': query_history.owner == request.user, 'next_json_set': reverse(get_app_name(request) + ':view_results', kwargs={ 'id': str(id), 'first_row': results.start_row + len(data) } ) + ('?context=' + context_param or '') + '&format=json' }) context['columns'] = massage_columns_for_json(columns) if 'save_form' in context: del context['save_form'] if 'query' in context: del context['query'] return JsonResponse(context)
def execute_query(request, design_id=None): """ View function for executing an arbitrary query. It understands the optional GET/POST params: on_success_url If given, it will be displayed when the query is successfully finished. Otherwise, it will display the view query results page by default. """ authorized_get_design(request, design_id) request.session['start_time'] = time.time( ) # FIXME: add job id to not intersect simultaneous jobs error_message = None form = QueryForm() action = request.path log = None app_name = get_app_name(request) query_type = SavedQuery.TYPES_MAPPING['beeswax'] design = safe_get_design(request, query_type, design_id) on_success_url = request.REQUEST.get('on_success_url') query_server = get_query_server_config(app_name) db = dbms.get(request.user, query_server) databases = _get_db_choices(request) if request.method == 'POST': form.bind(request.POST) form.query.fields[ 'database'].choices = databases # Could not do it in the form to_explain = request.POST.has_key('button-explain') to_submit = request.POST.has_key('button-submit') # Always validate the saveform, which will tell us whether it needs explicit saving if form.is_valid(): to_save = form.saveform.cleaned_data['save'] to_saveas = form.saveform.cleaned_data['saveas'] if to_save or to_saveas: if 'beeswax-autosave' in request.session: del request.session['beeswax-autosave'] if to_saveas and not design.is_auto: # Save As only affects a previously saved query design = design.clone() if to_submit or to_save or to_saveas or to_explain: explicit_save = to_save or to_saveas design = save_design(request, form, query_type, design, explicit_save) action = urlresolvers.reverse(app_name + ':execute_query', kwargs=dict(design_id=design.id)) if to_explain or to_submit: query_str = form.query.cleaned_data["query"] if conf.CHECK_PARTITION_CLAUSE_IN_QUERY.get(): query_str = _strip_trailing_semicolon(query_str) # check query. if a select query on partitioned table without partition keys, # intercept it and raise a PopupException. _check_partition_clause_in_query( form.query.cleaned_data.get('database', None), query_str, db) # (Optional) Parameterization. parameterization = get_parameterization( request, query_str, form, design, to_explain) if parameterization: return parameterization try: query = HQLdesign(form, query_type=query_type) if to_explain: return explain_directly(request, query, design, query_server) else: download = request.POST.has_key('download') download_format = form.query.cleaned_data.get( 'download_format', None) if not download_format: download_format = None if download_format in common.DL_FORMATS: request.session['dl_status'] = True return execute_directly( request, query, query_server, design, on_success_url=on_success_url, download_format=download_format, download=download) except QueryServerException, ex: error_message, log = expand_exception(ex, db)
def watch_query(request, id, download_format=None): """ Wait for the query to finish and (by default) displays the results of query id. It understands the optional GET params: on_success_url If given, it will be displayed when the query is successfully finished. Otherwise, it will display the view query results page by default. context A string of "name:data" that describes the context that generated this query result. It may be: - "table":"<table_name>" - "design":<design_id> All other GET params will be passed to on_success_url (if present). """ # Coerce types; manage arguments query_history = authorized_get_history(request, id, must_exist=True) db = dbms.get(request.user, query_history.get_query_server_config()) # GET param: context. context_param = request.GET.get('context', '') # GET param: on_success_url. Default to view_results if request.session.get( 'dl_status', False) == False and download_format in common.DL_FORMATS: results_url = urlresolvers.reverse( get_app_name(request) + ':execute_query') else: results_url = urlresolvers.reverse(get_app_name(request) + ':view_results', kwargs={ 'id': id, 'first_row': 0 }) if request.GET.get('download', ''): results_url += '?download=true' on_success_url = request.GET.get('on_success_url') if not on_success_url: on_success_url = results_url # Go to next statement if asked to continue or when a statement with no dataset finished. if request.method == 'POST' or (not query_history.is_finished() and query_history.is_success() and not query_history.has_results): try: query_history = db.execute_next_statement(query_history) except Exception: pass # Check query state handle, state = _get_query_handle_and_state(query_history) query_history.save_state(state) if query_history.is_failure(): # When we fetch, Beeswax server will throw us a BeeswaxException, which has the # log we want to display. return format_preserving_redirect(request, results_url, request.GET) elif query_history.is_finished() or (query_history.is_success() and query_history.has_results): if request.session.get('dl_status', False): # BUG-20020 on_success_url = urlresolvers.reverse( get_app_name(request) + ':download', kwargs=dict(id=str(id), format=download_format)) _clean_session(request) return format_preserving_redirect(request, on_success_url, request.GET) # Still running log = db.get_log(handle) # Keep waiting # - Translate context into something more meaningful (type, data) query_context = _parse_query_context(context_param) return render( 'watch_wait.mako', request, { 'query': query_history, 'fwd_params': request.GET.urlencode(), 'log': log, 'hadoop_jobs': _parse_out_hadoop_jobs(log)[0], 'query_context': query_context, 'download_format': download_format, ## ExpV })
def watch_query_refresh_json(request, id): query_history = authorized_get_query_history(request, id, must_exist=True) db = dbms.get(request.user, query_history.get_query_server_config()) if not request.POST.get( 'next' ): # We need this as multi query would fail as current query is closed handle, state = _get_query_handle_and_state(query_history) query_history.save_state(state) # Go to next statement if asked to continue or when a statement with no dataset finished. try: if request.POST.get('next') or (not query_history.is_finished() and query_history.is_success() and not query_history.has_results): close_operation(request, id) query_history = db.execute_next_statement( query_history, request.POST.get('query-query')) handle, state = _get_query_handle_and_state(query_history) except QueryServerException as ex: raise ex except Exception as ex: LOG.exception(ex) handle, state = _get_query_handle_and_state(query_history) try: start_over = request.POST.get('log-start-over') == 'true' log = db.get_log(handle, start_over=start_over) except Exception as ex: log = str(ex) jobs = parse_out_jobs(log) job_urls = massage_job_urls_for_json(jobs) result = { 'status': -1, 'log': log, 'jobs': jobs, 'jobUrls': job_urls, 'isSuccess': query_history.is_success(), 'isFailure': query_history.is_failure(), 'id': id, 'statement': query_history.get_current_statement(), 'watch_url': reverse(get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': query_history.id}), 'oldLogsApi': USE_GET_LOG_API.get() } # Run time error if query_history.is_failure(): res = db.get_operation_status(handle) if query_history.is_canceled(res): result['status'] = 0 elif hasattr(res, 'errorMessage') and res.errorMessage: result['message'] = res.errorMessage else: result['message'] = _('Bad status for request %s:\n%s') % (id, res) else: result['status'] = 0 return JsonResponse(result)
except Exception, ex: log = str(ex) jobs = _parse_out_hadoop_jobs(log) job_urls = massage_job_urls_for_json(jobs) result = { 'status': -1, 'log': log, 'jobs': jobs, 'jobUrls': job_urls, 'isSuccess': query_history.is_success(), 'isFailure': query_history.is_failure(), 'id': id, 'statement': query_history.get_current_statement(), 'watch_url': reverse(get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': query_history.id}), 'oldLogsApi': USE_GET_LOG_API.get() } # Run time error if query_history.is_failure(): res = db.get_operation_status(handle) if query_history.is_canceled(res): result['status'] = 0 elif hasattr(res, 'errorMessage') and res.errorMessage: result['message'] = res.errorMessage else: result['message'] = _('Bad status for request %s:\n%s') % (id, res) else: result['status'] = 0
else: action = 'watch-results' else: action = 'editor-results' except QueryServerException, e: if 'Invalid query handle' in e.message or 'Invalid OperationHandle' in e.message: query_history.save_state(QueryHistory.STATE.expired) LOG.warn("Invalid query handle", exc_info=sys.exc_info()) action = 'editor-expired-results' else: raise e else: # Check perms. authorized_get_design(request, design_id) app_name = get_app_name(request) query_type = SavedQuery.TYPES_MAPPING[app_name] design = safe_get_design(request, query_type, design_id) query_history = None doc = design and design.id and design.doc.get() context = { 'design': design, 'query': query_history, # Backward 'query_history': query_history, 'autocomplete_base_url': reverse(get_app_name(request) + ':api_autocomplete_databases', kwargs={}),
def view_results(request, id, first_row=0): """ Returns the view for the results of the QueryHistory with the given id. The query results MUST be ready. To display query results, one should always go through the execute_query view. If the result set has has_result_set=False, display an empty result. If ``first_row`` is 0, restarts (if necessary) the query read. Otherwise, just spits out a warning if first_row doesn't match the servers conception. Multiple readers will produce a confusing interaction here, and that's known. It understands the ``context`` GET parameter. (See execute_query().) """ first_row = long(first_row) start_over = (first_row == 0) results = type('Result', (object, ), { 'rows': 0, 'columns': [], 'has_more': False, 'start_row': 0, }) data = [] fetch_error = False error_message = '' log = '' columns = [] app_name = get_app_name(request) query_history = authorized_get_query_history(request, id, must_exist=True) query_server = query_history.get_query_server_config() db = dbms.get(request.user, query_server) handle, state = _get_query_handle_and_state(query_history) context_param = request.GET.get('context', '') query_context = parse_query_context(context_param) # Update the status as expired should not be accessible expired = state == models.QueryHistory.STATE.expired # Retrieve query results or use empty result if no result set try: if query_server[ 'server_name'] == 'impala' and not handle.has_result_set: downloadable = False else: results = db.fetch(handle, start_over, 100) # Materialize and HTML escape results data = escape_rows(results.rows()) # We display the "Download" button only when we know that there are results: downloadable = first_row > 0 or data log = db.get_log(handle) columns = results.data_table.cols() except Exception, ex: LOG.exception('error fetching results') fetch_error = True error_message, log = expand_exception(ex, db, handle)
def process_view(self, request, view_func, view_args, view_kwargs): """ We also perform access logging in ``process_view()`` since we have the view function, which tells us the log level. The downside is that we don't have the status code, which isn't useful for status logging anyways. """ request.ts = time.time() request.view_func = view_func access_log_level = getattr(view_func, 'access_log_level', None) # Skip loop for oidc if request.path in [ '/oidc/authenticate/', '/oidc/callback/', '/oidc/logout/', '/hue/oidc_failed/' ]: return None # Skip views not requiring login # If the view has "opted out" of login required, skip if hasattr(view_func, "login_notrequired"): log_page_hit(request, view_func, level=access_log_level or logging.DEBUG) return None # There are certain django views which are also opt-out, but # it would be evil to go add attributes to them if view_func in DJANGO_VIEW_AUTH_WHITELIST: log_page_hit(request, view_func, level=access_log_level or logging.DEBUG) return None # If user is logged in, check that he has permissions to access the app if request.user.is_active and request.user.is_authenticated: AppSpecificMiddleware.augment_request_with_app(request, view_func) # Until Django 1.3 which resolves returning the URL name, just do a match of the name of the view try: access_view = 'access_view:%s:%s' % ( request._desktop_app, resolve(request.path)[0].__name__) except Exception as e: access_log(request, 'error checking view perm: %s' % e, level=access_log_level) access_view = '' app_accessed = request._desktop_app app_libs_whitelist = [ "desktop", "home", "home2", "about", "hue", "editor", "notebook", "indexer", "404", "500", "403" ] if has_connectors(): app_libs_whitelist.append('metadata') if DASHBOARD_ENABLED.get(): app_libs_whitelist.append('dashboard') # Accessing an app can access an underlying other app. # e.g. impala or spark uses code from beeswax and so accessing impala shows up as beeswax here. # Here we trust the URL to be the real app we need to check the perms. ui_app_accessed = get_app_name(request) if app_accessed != ui_app_accessed and ui_app_accessed not in ( 'logs', 'accounts', 'login'): app_accessed = ui_app_accessed if app_accessed and \ app_accessed not in app_libs_whitelist and \ not ( is_admin(request.user) or request.user.has_hue_permission(action="access", app=app_accessed) or request.user.has_hue_permission(action=access_view, app=app_accessed) ) and \ not (app_accessed == '__debug__' and DJANGO_DEBUG_MODE.get()): access_log(request, 'permission denied', level=access_log_level) return PopupException(_( "You do not have permission to access the %(app_name)s application." ) % { 'app_name': app_accessed.capitalize() }, error_code=401).response(request) else: if not hasattr(request, 'view_func'): log_page_hit(request, view_func, level=access_log_level) return None if desktop.conf.CORS_ENABLED.get(): user = authenticate(request, username='******', password='******') if user is not None: login(request, user) return None logging.info("Redirecting to login page: %s", request.get_full_path()) access_log(request, 'login redirection', level=access_log_level) no_idle_backends = ("libsaml.backend.SAML2Backend", "desktop.auth.backend.SpnegoDjangoBackend", "desktop.auth.backend.KnoxSpnegoDjangoBackend") if request.ajax and all(no_idle_backend not in AUTH.BACKEND.get() for no_idle_backend in no_idle_backends): # Send back a magic header which causes Hue.Request to interpose itself # in the ajax request and make the user login before resubmitting the # request. response = HttpResponse("/* login required */", content_type="text/javascript") response[MIDDLEWARE_HEADER] = 'LOGIN_REQUIRED' return response else: if request.GET.get('is_embeddable'): return JsonResponse( { 'url': "%s?%s=%s" % (settings.LOGIN_URL, REDIRECT_FIELD_NAME, quote('/hue' + request.get_full_path().replace( 'is_embeddable=true', '').replace('&&', '&'))) } ) # Remove embeddable so redirect from & to login works. Login page is not embeddable else: return HttpResponseRedirect( "%s?%s=%s" % (settings.LOGIN_URL, REDIRECT_FIELD_NAME, quote(request.get_full_path())))
else: action = 'watch-results' else: action = 'editor-results' except QueryServerException, e: if 'Invalid query handle' in e.message or 'Invalid OperationHandle' in e.message: query_history.save_state(QueryHistory.STATE.expired) LOG.warn("Invalid query handle", exc_info=sys.exc_info()) action = 'editor-expired-results' else: raise e else: # Check perms. authorized_get_design(request, design_id) app_name = get_app_name(request) query_type = SavedQuery.TYPES_MAPPING[app_name] design = safe_get_design(request, query_type, design_id) query_history = None context = { 'design': design, 'query': query_history, # Backward 'query_history': query_history, 'autocomplete_base_url': reverse(get_app_name(request) + ':api_autocomplete_databases', kwargs={}), 'autocomplete_base_url_hive': reverse('beeswax:api_autocomplete_databases', kwargs={}), 'can_edit_name': design and design.id and not design.is_auto, 'can_edit': design and design.id and design.doc.get().can_write(request.user), 'action': action, 'on_success_url': request.GET.get('on_success_url'), 'has_metastore': 'metastore' in get_apps_dict(request.user)
def execute_query(request, design_id=None): """ View function for executing an arbitrary query. It understands the optional GET/POST params: on_success_url If given, it will be displayed when the query is successfully finished. Otherwise, it will display the view query results page by default. """ authorized_get_design(request, design_id) error_message = None form = QueryForm() action = request.path log = None app_name = get_app_name(request) query_type = SavedQuery.TYPES_MAPPING[app_name] design = safe_get_design(request, query_type, design_id) on_success_url = request.REQUEST.get('on_success_url') query_server = get_query_server_config(app_name) db = dbms.get(request.user, query_server) databases = _get_db_choices(request) if request.method == 'POST': form.bind(request.POST) form.query.fields['database'].choices = databases # Could not do it in the form to_explain = request.POST.has_key('button-explain') to_submit = request.POST.has_key('button-submit') # Always validate the saveform, which will tell us whether it needs explicit saving if form.is_valid(): to_save = form.saveform.cleaned_data['save'] to_saveas = form.saveform.cleaned_data['saveas'] if to_saveas and not design.is_auto: # Save As only affects a previously saved query design = design.clone() if to_submit or to_save or to_saveas or to_explain: explicit_save = to_save or to_saveas design = save_design(request, form, query_type, design, explicit_save) action = reverse(app_name + ':execute_query', kwargs=dict(design_id=design.id)) if to_explain or to_submit: query_str = form.query.cleaned_data["query"] # (Optional) Parameterization. parameterization = get_parameterization(request, query_str, form, design, to_explain) if parameterization: return parameterization try: query = HQLdesign(form, query_type=query_type) if to_explain: return explain_directly(request, query, design, query_server) else: download = request.POST.has_key('download') return execute_directly(request, query, query_server, design, on_success_url=on_success_url, download=download) except Exception, ex: error_message, log = expand_exception(ex, db)
def _get_db_choices(request): app_name = get_app_name(request) query_server = get_query_server_config(app_name) db = dbms.get(request.user, query_server) dbs = db.get_databases() return ((db, db) for db in dbs)
def process_view(self, request, view_func, view_args, view_kwargs): """ We also perform access logging in ``process_view()`` since we have the view function, which tells us the log level. The downside is that we don't have the status code, which isn't useful for status logging anyways. """ access_log_level = getattr(view_func, 'access_log_level', None) # First, skip views not requiring login # If the view has "opted out" of login required, skip if hasattr(view_func, "login_notrequired"): log_page_hit(request, view_func, level=access_log_level or logging.DEBUG) return None # There are certain django views which are also opt-out, but # it would be evil to go add attributes to them if view_func in DJANGO_VIEW_AUTH_WHITELIST: log_page_hit(request, view_func, level=access_log_level or logging.DEBUG) return None # If user is logged in, check that he has permissions to access the # app. if request.user.is_active and request.user.is_authenticated(): AppSpecificMiddleware.augment_request_with_app(request, view_func) # Until we get Django 1.3 and resolve returning the URL name, we just do a match of the name of the view try: access_view = 'access_view:%s:%s' % ( request._desktop_app, resolve(request.path)[0].__name__) except Exception, e: access_log(request, 'error checking view perm: %s', e, level=access_log_level) access_view = '' # Accessing an app can access an underlying other app. # e.g. impala or spark uses code from beeswax and so accessing impala shows up as beeswax here. # Here we trust the URL to be the real app we need to check the perms. app_accessed = request._desktop_app ui_app_accessed = get_app_name(request) if app_accessed != ui_app_accessed and ui_app_accessed not in ( 'logs', 'accounts', 'login'): app_accessed = ui_app_accessed if app_accessed and \ app_accessed not in ("desktop", "home", "about") and \ not (request.user.has_hue_permission(action="access", app=app_accessed) or request.user.has_hue_permission(action=access_view, app=app_accessed)): access_log(request, 'permission denied', level=access_log_level) return PopupException(_( "You do not have permission to access the %(app_name)s application." ) % { 'app_name': app_accessed.capitalize() }, error_code=401).response(request) else: log_page_hit(request, view_func, level=access_log_level) return None
def execute_query(request, design_id=None, query_history_id=None): """ View function for executing an arbitrary query. """ action = 'query' if query_history_id: query_history = authorized_get_query_history(request, query_history_id, must_exist=True) design = query_history.design try: if query_history.server_id and query_history.server_guid: handle, state = _get_query_handle_and_state(query_history) if 'on_success_url' in request.GET: if request.GET.get('on_success_url') and any([ regexp.match(request.GET.get('on_success_url')) for regexp in REDIRECT_WHITELIST.get() ]): action = 'watch-redirect' else: action = 'watch-results' else: action = 'editor-results' except QueryServerException as e: if 'Invalid query handle' in e.message or 'Invalid OperationHandle' in e.message: query_history.save_state(QueryHistory.STATE.expired) LOG.warn("Invalid query handle", exc_info=sys.exc_info()) action = 'editor-expired-results' else: raise e else: # Check perms. authorized_get_design(request, design_id) app_name = get_app_name(request) query_type = SavedQuery.TYPES_MAPPING[app_name] design = safe_get_design(request, query_type, design_id) query_history = None current_app, other_apps, apps_list = _get_apps(request.user, '') doc = design and design.id and design.doc.get() context = { 'design': design, 'apps': apps_list, 'query': query_history, # Backward 'query_history': query_history, 'autocomplete_base_url': reverse(get_app_name(request) + ':api_autocomplete_databases', kwargs={}), 'autocomplete_base_url_hive': reverse('beeswax:api_autocomplete_databases', kwargs={}), 'can_edit_name': design and design.id and not design.is_auto, 'doc_id': doc and doc.id or -1, 'can_edit': doc and doc.can_write(request.user), 'action': action, 'on_success_url': request.GET.get('on_success_url'), 'has_metastore': 'metastore' in get_apps_dict(request.user) } return render('execute.mako', request, context)
def import_wizard(request, database='default'): """ Help users define table and based on a file they want to import to Hive. Limitations: - Rows are delimited (no serde). - No detection for map and array types. - No detection for the presence of column header in the first row. - No partition table. - Does not work with binary data. """ encoding = i18n.get_site_encoding() app_name = get_app_name(request) if request.method == 'POST': # Have a while loop to allow an easy way to break for _ in range(1): # # General processing logic: # - We have 3 steps. Each requires the previous. # * Step 1 : Table name and file location # * Step 2a : Display sample with auto chosen delim # * Step 2b : Display sample with user chosen delim (if user chooses one) # * Step 3 : Display sample, and define columns # - Each step is represented by a different form. The form of an earlier step # should be present when submitting to a later step. # - To preserve the data from the earlier steps, we send the forms back as # hidden fields. This way, when users revisit a previous step, the data would # be there as well. # delim_is_auto = False fields_list, n_cols = [ [] ], 0 s3_col_formset = None # Everything requires a valid file form db = dbms.get(request.user) s1_file_form = CreateByImportFileForm(request.POST, db=db) if not s1_file_form.is_valid(): break do_s2_auto_delim = request.POST.get('submit_file') # Step 1 -> 2 do_s2_user_delim = request.POST.get('submit_preview') # Step 2 -> 2 do_s3_column_def = request.POST.get('submit_delim') # Step 2 -> 3 do_hive_create = request.POST.get('submit_create') # Step 3 -> execute cancel_s2_user_delim = request.POST.get('cancel_delim') # Step 2 -> 1 cancel_s3_column_def = request.POST.get('cancel_create') # Step 3 -> 2 # Exactly one of these should be True assert len(filter(None, (do_s2_auto_delim, do_s2_user_delim, do_s3_column_def, do_hive_create, cancel_s2_user_delim, cancel_s3_column_def))) == 1, 'Invalid form submission' # # Fix up what we should do in case any form is invalid # if not do_s2_auto_delim: # We should have a valid delim form s2_delim_form = CreateByImportDelimForm(request.POST) if not s2_delim_form.is_valid(): # Go back to picking delimiter do_s2_user_delim, do_s3_column_def, do_hive_create = True, False, False if do_hive_create: # We should have a valid columns formset s3_col_formset = ColumnTypeFormSet(prefix='cols', data=request.POST) if not s3_col_formset.is_valid(): # Go back to define columns do_s3_column_def, do_hive_create = True, False # # Go to step 2: We've just picked the file. Preview it. # if do_s2_auto_delim: delim_is_auto = True fields_list, n_cols, s2_delim_form = _delim_preview( request.fs, s1_file_form, encoding, [ reader.TYPE for reader in FILE_READERS ], DELIMITERS) if (do_s2_user_delim or do_s3_column_def or cancel_s3_column_def) and s2_delim_form.is_valid(): # Delimit based on input fields_list, n_cols, s2_delim_form = _delim_preview( request.fs, s1_file_form, encoding, (s2_delim_form.cleaned_data['file_type'],), (s2_delim_form.cleaned_data['delimiter'],)) if do_s2_auto_delim or do_s2_user_delim or cancel_s3_column_def: return render('choose_delimiter.mako', request, { 'action': reverse(app_name + ':import_wizard', kwargs={'database': database}), 'delim_readable': DELIMITER_READABLE.get(s2_delim_form['delimiter'].data[0], s2_delim_form['delimiter'].data[1]), 'initial': delim_is_auto, 'file_form': s1_file_form, 'delim_form': s2_delim_form, 'fields_list': fields_list, 'delimiter_choices': TERMINATOR_CHOICES, 'n_cols': n_cols, 'database': database, }) # # Go to step 3: Define column. # if do_s3_column_def: if s3_col_formset is None: columns = [] for i in range(n_cols): columns.append(dict( column_name='col_%s' % (i,), column_type='string', )) s3_col_formset = ColumnTypeFormSet(prefix='cols', initial=columns) return render('define_columns.mako', request, { 'action': reverse(app_name + ':import_wizard', kwargs={'database': database}), 'file_form': s1_file_form, 'delim_form': s2_delim_form, 'column_formset': s3_col_formset, 'fields_list': fields_list, 'n_cols': n_cols, 'database': database, }) # # Finale: Execute # if do_hive_create: delim = s2_delim_form.cleaned_data['delimiter'] table_name = s1_file_form.cleaned_data['name'] proposed_query = django_mako.render_to_string("create_table_statement.mako", { 'table': dict(name=table_name, comment=s1_file_form.cleaned_data['comment'], row_format='Delimited', field_terminator=delim), 'columns': [ f.cleaned_data for f in s3_col_formset.forms ], 'partition_columns': [], 'database': database, } ) do_load_data = s1_file_form.cleaned_data.get('do_import') path = s1_file_form.cleaned_data['path'] return _submit_create_and_load(request, proposed_query, table_name, path, do_load_data, database=database) else: s1_file_form = CreateByImportFileForm() return render('choose_file.mako', request, { 'action': reverse(app_name + ':import_wizard', kwargs={'database': database}), 'file_form': s1_file_form, 'database': database, })
def save_results_hive_table(request, query_history_id): """ Save the results of a query to a hive table. Rerun the query. """ response = {'status': 0, 'message': ''} query_history = authorized_get_query_history(request, query_history_id, must_exist=True) server_id, state = _get_query_handle_and_state(query_history) query_history.save_state(state) error_msg, log = None, None if request.method != 'POST': response['message'] = _('A POST request is required.') else: if not query_history.is_success(): response['message'] = _( 'This query is %(state)s. Results unavailable.') % { 'state': state } response['status'] = -1 return JsonResponse(response) db = dbms.get(request.user, query_history.get_query_server_config()) database = query_history.design.get_design().query.get( 'database', 'default') form = beeswax.forms.SaveResultsTableForm( {'target_table': request.POST.get('table')}, db=db, database=database) if form.is_valid(): try: handle, state = _get_query_handle_and_state(query_history) result_meta = db.get_results_metadata(handle) except Exception, ex: response['message'] = _( 'Cannot find query handle and state: %s') % str( query_history) response['status'] = -2 return JsonResponse(response) try: query_history = db.create_table_as_a_select( request, query_history, form.target_database, form.cleaned_data['target_table'], result_meta) response['id'] = query_history.id response['query'] = query_history.query response['type'] = 'hive-table' response['path'] = form.cleaned_data['target_table'] response['success_url'] = reverse( 'metastore:describe_table', kwargs={ 'database': form.target_database, 'table': form.cleaned_data['target_table'] }) response['watch_url'] = reverse( get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': query_history.id}) except Exception, ex: error_msg, log = expand_exception(ex, db) response['message'] = _( 'The result could not be saved: %s.') % error_msg response['status'] = -3
def save_results_hdfs_file(request, query_history_id): """ Save the results of a query to an HDFS file. Do not rerun the query. """ response = {'status': 0, 'message': ''} query_history = authorized_get_query_history(request, query_history_id, must_exist=True) server_id, state = _get_query_handle_and_state(query_history) query_history.save_state(state) error_msg, log = None, None if request.method != 'POST': response['message'] = _('A POST request is required.') else: if not query_history.is_success(): response['message'] = _( 'This query is %(state)s. Results unavailable.') % { 'state': state } response['status'] = -1 return JsonResponse(response) db = dbms.get(request.user, query_history.get_query_server_config()) form = beeswax.forms.SaveResultsFileForm({ 'target_file': request.POST.get('path'), 'overwrite': request.POST.get('overwrite', False), }) if form.is_valid(): target_file = form.cleaned_data['target_file'] overwrite = form.cleaned_data['overwrite'] try: handle, state = _get_query_handle_and_state(query_history) except Exception, ex: response['message'] = _( 'Cannot find query handle and state: %s') % str( query_history) response['status'] = -2 return JsonResponse(response) try: if overwrite and request.fs.exists(target_file): if request.fs.isfile(target_file): request.fs.do_as_user(request.user.username, request.fs.rmtree, target_file) else: raise PopupException( _("The target path is a directory")) upload(target_file, handle, request.user, db, request.fs) response['type'] = 'hdfs-file' response['id'] = query_history.id response['query'] = query_history.query response['path'] = target_file response['success_url'] = '/filebrowser/view%s' % target_file response['watch_url'] = reverse( get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': query_history.id}) except Exception, ex: error_msg, log = expand_exception(ex, db) response['message'] = _( 'The result could not be saved: %s.') % error_msg response['status'] = -3
def execute(request, design_id=None): response = {'status': -1, 'message': ''} if request.method != 'POST': response['message'] = _('A POST request is required.') app_name = get_app_name(request) query_server = get_query_server_config(app_name) query_type = beeswax.models.SavedQuery.TYPES_MAPPING[app_name] design = safe_get_design(request, query_type, design_id) try: query_form = get_query_form(request) if query_form.is_valid(): query_str = query_form.query.cleaned_data["query"] explain = request.GET.get('explain', 'false').lower() == 'true' design = save_design(request, query_form, query_type, design, False) if query_form.query.cleaned_data['is_parameterized']: # Parameterized query parameterization_form_cls = make_parameterization_form( query_str) if parameterization_form_cls: parameterization_form = parameterization_form_cls( request.REQUEST, prefix="parameterization") if parameterization_form.is_valid(): parameters = parameterization_form.cleaned_data real_query = substitute_variables( query_str, parameters) query = HQLdesign(query_form, query_type=query_type) query._data_dict['query']['query'] = real_query try: if explain: return explain_directly( request, query_server, query) else: return execute_directly(request, query, design, query_server, parameters=parameters) except Exception, ex: db = dbms.get(request.user, query_server) error_message, log = expand_exception(ex, db) response['message'] = error_message return JsonResponse(response) else: response['errors'] = parameterization_form.errors return JsonResponse(response) # Non-parameterized query query = HQLdesign(query_form, query_type=query_type) if request.GET.get('explain', 'false').lower() == 'true': return explain_directly(request, query_server, query) else: return execute_directly(request, query, design, query_server) else:
'log': log, 'jobs': jobs, 'jobUrls': job_urls, 'isSuccess': query_history.is_success(), 'isFailure': query_history.is_failure(), 'id': id, 'statement': query_history.get_current_statement(), 'watch_url': reverse(get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': query_history.id}), 'oldLogsApi': USE_GET_LOG_API.get() } # Run time error if query_history.is_failure(): res = db.get_operation_status(handle) if query_history.is_canceled(res): result['status'] = 0 elif hasattr(res, 'errorMessage') and res.errorMessage: result['message'] = res.errorMessage else: result['message'] = _('Bad status for request %s:\n%s') % (id, res) else:
download_urls[format] = reverse(app_name + ':download', kwargs=dict(id=str(id), format=format)) save_form = beeswax.forms.SaveResultsForm() results.start_row = first_row context.update({ 'results': data, 'has_more': results.has_more, 'next_row': results.start_row + len(data), 'start_row': results.start_row, 'expected_first_row': first_row, 'columns': results.columns, 'download_urls': download_urls, 'save_form': save_form, 'can_save': query_history.owner == request.user and not download, 'next_json_set': reverse(get_app_name(request) + ':view_results', kwargs={ 'id': str(id), 'first_row': results.start_row + len(data) }) + ('?context=' + context_param or '') + '&format=json' }) if request.GET.get('format') == 'json': context = { 'results': data, 'has_more': results.has_more, 'next_row': results.start_row + len(data), 'start_row': results.start_row, 'next_json_set': reverse(get_app_name(request) + ':view_results', kwargs={ 'id': str(id), 'first_row': results.start_row + len(data) }) + ('?context=' + context_param or '') + '&format=json'
def describe_table(request, database, table): app_name = get_app_name(request) cluster = json.loads(request.POST.get('cluster', '{}')) source_type = request.POST.get('source_type', request.GET.get('source_type', 'hive')) db = _get_db(user=request.user, source_type=source_type, cluster=cluster) try: table = db.get_table(database, table) except Exception as e: LOG.exception("Describe table error") raise PopupException( _("DB Error"), detail=e.message if hasattr(e, 'message') and e.message else e) if request.POST.get("format", "html") == "json": return JsonResponse({ 'status': 0, 'name': table.name, 'partition_keys': [{ 'name': part.name, 'type': part.type } for part in table.partition_keys], 'cols': [{ 'name': col.name, 'type': col.type, 'comment': col.comment } for col in table.cols], 'path_location': table.path_location, 'hdfs_link': table.hdfs_link, 'comment': table.comment, 'is_view': table.is_view, 'properties': table.properties, 'details': table.details, 'stats': table.stats }) else: # Render HTML renderable = "metastore.mako" apps_list = _get_apps(request.user, '') partitions = None if app_name != 'impala' and table.partition_keys: try: partitions = [ _massage_partition(database, table, partition) for partition in db.get_partitions(database, table) ] except: LOG.exception('Table partitions could not be retrieved') return render( renderable, request, { 'apps': apps_list, 'breadcrumbs': [ { 'name': database, 'url': reverse('metastore:show_tables', kwargs={'database': database}) }, { 'name': str(table.name), 'url': reverse('metastore:describe_table', kwargs={ 'database': database, 'table': table.name }) }, ], 'table': table, 'partitions': partitions, 'database': database, 'has_write_access': has_write_access(request.user), 'is_optimizer_enabled': has_optimizer(), 'is_navigator_enabled': has_catalog(request.user), 'optimizer_url': get_optimizer_url(), 'navigator_url': get_catalog_url(), 'is_embeddable': request.GET.get('is_embeddable', False), 'source_type': _get_servername(db), })
def view_results(request, id, first_row=0): """ Returns the view for the results of the QueryHistory with the given id. The query results MUST be ready. To display query results, one should always go through the watch_query view. If ``first_row`` is 0, restarts (if necessary) the query read. Otherwise, just spits out a warning if first_row doesn't match the servers conception. Multiple readers will produce a confusing interaction here, and that's known. It understands the ``context`` GET parameter. (See watch_query().) """ first_row = long(first_row) start_over = (first_row == 0) results = None data = None fetch_error = False error_message = '' log = '' app_name = get_app_name(request) query_history = authorized_get_history(request, id, must_exist=True) db = dbms.get(request.user, query_history.get_query_server_config()) handle, state = _get_query_handle_and_state(query_history) context_param = request.GET.get('context', '') query_context = _parse_query_context(context_param) # To remove in Hue 2.3 download = request.GET.get('download', '') # Update the status as expired should not be accessible expired = state == QueryHistory.STATE.expired if expired: state = QueryHistory.STATE.expired query_history.save_state(state) # Retrieve query results try: if not download: results = db.fetch(handle, start_over, 100) data = list(results.rows()) # Materialize results # We display the "Download" button only when we know that there are results: downloadable = first_row > 0 or data else: downloadable = True data = [] results = type('Result', (object, ), { 'rows': 0, 'columns': [], 'has_more': False, 'start_row': 0, }) log = db.get_log(handle) except Exception as ex: fetch_error = True error_message, log = expand_exception(ex, db) # Handle errors error = fetch_error or results is None or expired context = { 'error': error, 'error_message': error_message, 'has_more': True, 'query': query_history, 'results': data, 'expected_first_row': first_row, 'log': log, 'hadoop_jobs': _parse_out_hadoop_jobs(log)[0], 'query_context': query_context, 'can_save': False, 'context_param': context_param, 'expired': expired, 'app_name': app_name, 'download': download, } if not error: download_urls = {} if downloadable: for format in common.DL_FORMATS: download_urls[format] = urlresolvers.reverse( 'beeswax' + ':download', kwargs=dict(id=str(id), format=format)) save_form = SaveResultsForm() results.start_row = first_row context.update({ 'results': data, 'has_more': results.has_more, 'next_row': results.start_row + len(data), 'start_row': results.start_row, 'expected_first_row': first_row, 'columns': results.columns, 'download_urls': download_urls, 'save_form': save_form, 'can_save': query_history.owner == request.user and not download, }) return render('watch_results.mako', request, context)
def import_wizard(request, database='default'): """ Help users define table and based on a file they want to import to Hive. Limitations: - Rows are delimited (no serde). - No detection for map and array types. - No detection for the presence of column header in the first row. - No partition table. - Does not work with binary data. """ encoding = i18n.get_site_encoding() app_name = get_app_name(request) db = dbms.get(request.user) dbs = db.get_databases() databases = [{ 'name': db, 'url': reverse('beeswax:import_wizard', kwargs={'database': db}) } for db in dbs] if request.method == 'POST': # # General processing logic: # - We have 3 steps. Each requires the previous. # * Step 1 : Table name and file location # * Step 2a : Display sample with auto chosen delim # * Step 2b : Display sample with user chosen delim (if user chooses one) # * Step 3 : Display sample, and define columns # - Each step is represented by a different form. The form of an earlier step # should be present when submitting to a later step. # - To preserve the data from the earlier steps, we send the forms back as # hidden fields. This way, when users revisit a previous step, the data would # be there as well. # delim_is_auto = False fields_list, n_cols = [[]], 0 s3_col_formset = None s1_file_form = CreateByImportFileForm(request.POST, db=db) if s1_file_form.is_valid(): do_s2_auto_delim = request.POST.get('submit_file') # Step 1 -> 2 do_s2_user_delim = request.POST.get( 'submit_preview') # Step 2 -> 2 do_s3_column_def = request.POST.get('submit_delim') # Step 2 -> 3 do_hive_create = request.POST.get( 'submit_create') # Step 3 -> execute cancel_s2_user_delim = request.POST.get( 'cancel_delim') # Step 2 -> 1 cancel_s3_column_def = request.POST.get( 'cancel_create') # Step 3 -> 2 # Exactly one of these should be True if len( filter(None, (do_s2_auto_delim, do_s2_user_delim, do_s3_column_def, do_hive_create, cancel_s2_user_delim, cancel_s3_column_def))) != 1: raise PopupException(_('Invalid form submission')) if not do_s2_auto_delim: # We should have a valid delim form s2_delim_form = CreateByImportDelimForm(request.POST) if not s2_delim_form.is_valid(): # Go back to picking delimiter do_s2_user_delim, do_s3_column_def, do_hive_create = True, False, False if do_hive_create: # We should have a valid columns formset s3_col_formset = ColumnTypeFormSet(prefix='cols', data=request.POST) if not s3_col_formset.is_valid(): # Go back to define columns do_s3_column_def, do_hive_create = True, False # # Go to step 2: We've just picked the file. Preview it. # if do_s2_auto_delim: delim_is_auto = True fields_list, n_cols, s2_delim_form = _delim_preview( request.fs, s1_file_form, encoding, [reader.TYPE for reader in FILE_READERS], DELIMITERS) if (do_s2_user_delim or do_s3_column_def or cancel_s3_column_def) and s2_delim_form.is_valid(): # Delimit based on input fields_list, n_cols, s2_delim_form = _delim_preview( request.fs, s1_file_form, encoding, (s2_delim_form.cleaned_data['file_type'], ), (s2_delim_form.cleaned_data['delimiter'], )) if do_s2_auto_delim or do_s2_user_delim or cancel_s3_column_def: return render( 'import_wizard_choose_delimiter.mako', request, { 'action': reverse(app_name + ':import_wizard', kwargs={'database': database}), 'delim_readable': DELIMITER_READABLE.get( s2_delim_form['delimiter'].data[0], s2_delim_form['delimiter'].data[1]), 'initial': delim_is_auto, 'file_form': s1_file_form, 'delim_form': s2_delim_form, 'fields_list': fields_list, 'delimiter_choices': TERMINATOR_CHOICES, 'n_cols': n_cols, 'database': database, 'databases': databases }) # # Go to step 3: Define column. # if do_s3_column_def: if s3_col_formset is None: columns = [] for i in range(n_cols): columns.append({ 'column_name': 'col_%s' % (i, ), 'column_type': 'string', }) s3_col_formset = ColumnTypeFormSet(prefix='cols', initial=columns) try: fields_list_for_json = list(fields_list) if fields_list_for_json: fields_list_for_json[0] = map( lambda a: re.sub('[^\w]', '', a), fields_list_for_json[0]) # Cleaning headers return render( 'import_wizard_define_columns.mako', request, { 'action': reverse(app_name + ':import_wizard', kwargs={'database': database}), 'file_form': s1_file_form, 'delim_form': s2_delim_form, 'column_formset': s3_col_formset, 'fields_list': fields_list, 'fields_list_json': json.dumps(fields_list_for_json), 'n_cols': n_cols, 'database': database, 'databases': databases }) except Exception, e: raise PopupException(_( "The selected delimiter is creating an un-even number of columns. Please make sure you don't have empty columns." ), detail=e) # # Final: Execute # if do_hive_create: delim = s2_delim_form.cleaned_data['delimiter'] table_name = s1_file_form.cleaned_data['name'] proposed_query = django_mako.render_to_string( "create_table_statement.mako", { 'table': { 'name': table_name, 'comment': s1_file_form.cleaned_data['comment'], 'row_format': 'Delimited', 'field_terminator': delim, 'file_format': 'TextFile' }, 'columns': [f.cleaned_data for f in s3_col_formset.forms], 'partition_columns': [], 'database': database, 'databases': databases }) do_load_data = s1_file_form.cleaned_data.get('do_import') path = s1_file_form.cleaned_data['path'] try: return _submit_create_and_load(request, proposed_query, table_name, path, do_load_data, database=database) except QueryServerException, e: raise PopupException(_('The table could not be created.'), detail=e.message)
if design.id is not None: data = HQLdesign.loads(design.data).get_query_dict() form.bind(data) form.saveform.set_data(design.name, design.desc) else: # New design form.bind() form.query.fields['database'].choices = databases # Could not do it in the form return render('execute.mako', request, { 'action': action, 'design': design, 'error_message': error_message, 'form': form, 'log': log, 'autocomplete_base_url': reverse(get_app_name(request) + ':autocomplete', kwargs={}), 'on_success_url': on_success_url, 'can_edit_name': design and not design.is_auto and design.name, }) def execute_parameterized_query(request, design_id): return _run_parameterized_query(request, design_id, False) def explain_parameterized_query(request, design_id): return _run_parameterized_query(request, design_id, True) def watch_query(request, id): """
def view_results(request, id, first_row=0): """ Returns the view for the results of the QueryHistory with the given id. The query results MUST be ready. To display query results, one should always go through the execute_query view. If the result set has has_result_set=False, display an empty result. If ``first_row`` is 0, restarts (if necessary) the query read. Otherwise, just spits out a warning if first_row doesn't match the servers conception. Multiple readers will produce a confusing interaction here, and that's known. It understands the ``context`` GET parameter. (See execute_query().) """ first_row = long(first_row) start_over = (first_row == 0) results = type('Result', (object,), { 'rows': 0, 'columns': [], 'has_more': False, 'start_row': 0, }) data = [] fetch_error = False error_message = '' log = '' columns = [] app_name = get_app_name(request) query_history = authorized_get_query_history(request, id, must_exist=True) query_server = query_history.get_query_server_config() db = dbms.get(request.user, query_server) handle, state = _get_query_handle_and_state(query_history) context_param = request.GET.get('context', '') query_context = parse_query_context(context_param) # Update the status as expired should not be accessible # Impala does not support startover for now expired = state == models.QueryHistory.STATE.expired # Retrieve query results or use empty result if no result set try: if query_server['server_name'] == 'impala' and not handle.has_result_set: downloadable = False else: results = db.fetch(handle, start_over, 100) data = [] # Materialize and HTML escape results # TODO: use Number + list comprehension for row in results.rows(): escaped_row = [] for field in row: if isinstance(field, (int, long, float, complex, bool)): escaped_field = field elif field is None: escaped_field = 'NULL' else: field = smart_unicode(field, errors='replace') # Prevent error when getting back non utf8 like charset=iso-8859-1 escaped_field = escape(field).replace(' ', ' ') escaped_row.append(escaped_field) data.append(escaped_row) # We display the "Download" button only when we know that there are results: downloadable = first_row > 0 or data log = db.get_log(handle) columns = results.data_table.cols() except Exception, ex: fetch_error = True error_message, log = expand_exception(ex, db, handle)
def execute_directly(request, query, query_server=None, design=None, tablename=None, on_success_url=None, on_success_params=None, **kwargs): """ execute_directly(request, query_msg, tablename, design) -> HTTP response for execution This method wraps around dbms.execute_query() to take care of the HTTP response after the execution. query The HQL model Query object. query_server To which Query Server to submit the query. Dictionary with keys: ['server_name', 'server_host', 'server_port']. design The design associated with the query. tablename The associated table name for the context. on_success_url Where to go after the query is done. The URL handler may expect an option "context" GET param. (See ``watch_query``.) For advanced usage, on_success_url can be a function, in which case the on complete URL is the return of: on_success_url(history_obj) -> URL string Defaults to the view results page. on_success_params Optional params to pass to the on_success_url (in additional to "context"). Note that this may throw a Beeswax exception. """ if design is not None: authorized_get_design(request, design.id) db = dbms.get(request.user, query_server) database = query.query.get('database', 'default') db.use(database) history_obj = db.execute_query(query, design) watch_url = reverse(get_app_name(request) + ':watch_query', kwargs={'id': history_obj.id}) if 'download' in kwargs and kwargs['download']: watch_url += '?download=true' # Prepare the GET params for the watch_url get_dict = QueryDict(None, mutable=True) # (1) context if design: get_dict['context'] = make_query_context('design', design.id) elif tablename: get_dict['context'] = make_query_context('table', '%s:%s' % (tablename, database)) # (2) on_success_url if on_success_url: if callable(on_success_url): on_success_url = on_success_url(history_obj) get_dict['on_success_url'] = on_success_url # (3) misc if on_success_params: get_dict.update(on_success_params) return format_preserving_redirect(request, watch_url, get_dict)
def load_table(request, database, table): """ Loads data into a table. """ try: table_desc_extended = HCatClient( request.user.username).describe_table_extended(table, db=database) is_table_partitioned = table_desc_extended['partitioned'] partitionColumns = [] if is_table_partitioned: partitionColumns = table_desc_extended['partitionColumns'] table_obj = { 'tableName': table, 'columns': table_desc_extended['columns'], 'partitionKeys': partitionColumns } except Exception: import traceback error = traceback.format_exc() raise PopupException('Error getting table description', title="Error getting table description", detail=error) if request.method == "POST": form = hcatalog.forms.LoadDataForm(table_obj, request.POST) hql = '' if form.is_valid(): hql += "LOAD DATA INPATH" hql += " '%s'" % form.cleaned_data['path'] if form.cleaned_data['overwrite']: hql += " OVERWRITE" hql += " INTO TABLE " hql += "`%s.%s`" % (database, table) if len(form.partition_columns) > 0: hql += " PARTITION (" vals = [] for key, column_name in form.partition_columns.iteritems(): vals.append("%s='%s'" % (column_name, form.cleaned_data[key])) hql += ", ".join(vals) hql += ")" hql += ";" try: do_load_table(request, hql) except Exception: import traceback error = traceback.format_exc() raise PopupException('Error loading data into the table', title="Error loading data into the table", detail=error) on_success_url = urlresolvers.reverse(get_app_name(request) + ':describe_table', kwargs=dict(database=database, table=table)) result = {'on_success_url': on_success_url} return HttpResponse(json.dumps(result)) else: form = hcatalog.forms.LoadDataForm(table_obj) return render( "load_table.mako", request, dict(form=form, table=table, action=request.get_full_path()))