def execute_query(self, query, design): from beeswax.models import QueryHistory sql_query = query.sql_query query_history = QueryHistory.build( owner=self.client.user, query=sql_query, server_host="%(server_host)s" % self.client.query_server, server_port="%(server_port)d" % self.client.query_server, server_name="%(server_name)s" % self.client.query_server, server_type=self.server_type, last_state=QueryHistory.STATE.available.index, design=design, notify=False, query_type=query.query["type"], statement_number=0, ) query_history.save() LOG.debug( "Updated QueryHistory id %s user %s statement_number: %s" % (query_history.id, self.client.user, query_history.statement_number) ) return query_history
def authorized_get_query_history(request, query_history_id, owner_only=False, must_exist=False): if query_history_id is None and not must_exist: return None try: query_history = QueryHistory.get(id=query_history_id) except QueryHistory.DoesNotExist: if must_exist: raise PopupException( _('QueryHistory %(id)s does not exist.') % {'id': query_history_id}) else: return None # Some queries don't have a design so are not linked to Document Model permission if query_history.design is None or not query_history.design.doc.exists(): if not is_admin(request.user) and request.user != query_history.owner: raise PopupException( _('Permission denied to read QueryHistory %(id)s') % {'id': query_history_id}) else: query_history.design.doc.get().can_read_or_exception(request.user) return query_history
def execute_and_watch(self, query, design=None, notify=False): """ Run query and return a QueryHistory object in order to see its progress on a Web page. """ query_statement = query.query['query'] query_history = QueryHistory.build( owner=self.client.user, query=query_statement, server_host='%(server_host)s' % self.client.query_server, server_port='%(server_port)d' % self.client.query_server, server_name='%(server_name)s' % self.client.query_server, server_type=self.server_type, last_state=QueryHistory.STATE.submitted.index, design=design, notify=notify) query_history.save() LOG.debug("Made new QueryHistory id %s user %s query: %s..." % (query_history.id, self.client.user, query_history.query[:25])) try: handle = self.client.query(query) if not handle.is_valid(): msg = _("Server returning invalid handle for query id %(id)d [%(query)s]...") % \ {'id': query_history.id, 'query': query_statement[:40]} raise BeeswaxException(msg) except BeeswaxException, ex: # TODO HS2 LOG.exception(ex) # Kind of expected (hql compile/syntax error, etc.) if hasattr(ex, 'handle') and ex.handle: query_history.server_id = ex.handle.id query_history.log_context = ex.handle.log_context query_history.save_state(QueryHistory.STATE.failed) raise ex
def test_get_exec_summary(self): query = """ SELECT COUNT(1) FROM tweets; """ response = _make_query(self.client, query, database=self.DATABASE, local=False, server_name='impala') content = json.loads(response.content) query_history = QueryHistory.get(content['id']) wait_for_query_to_finish(self.client, response, max=180.0) resp = self.client.post( reverse('impala:get_exec_summary', kwargs={'query_history_id': query_history.id})) data = json.loads(resp.content) assert_equal(0, data['status'], data) assert_true('nodes' in data['summary'], data) assert_true( len(data['summary']['nodes']) > 0, data['summary']['nodes']) # Attempt to call get_exec_summary on a closed query resp = self.client.post( reverse('impala:get_exec_summary', kwargs={'query_history_id': query_history.id})) data = json.loads(resp.content) assert_equal(0, data['status'], data) assert_true('nodes' in data['summary'], data) assert_true( len(data['summary']['nodes']) > 0, data['summary']['nodes'])
def execute_and_watch(self, query, design=None, notify=False): """ Run query and return a QueryHistory object in order to see its progress on a Web page. """ query_statement = query.query['query'] query_history = QueryHistory.build( owner=self.client.user, query=query_statement, server_host='%(server_host)s' % self.client.query_server, server_port='%(server_port)d' % self.client.query_server, server_name='%(server_name)s' % self.client.query_server, server_type=self.server_type, last_state=QueryHistory.STATE.submitted.index, design=design, notify=notify) query_history.save() LOG.debug("Made new QueryHistory id %s user %s query: %s..." % (query_history.id, self.client.user, query_history.query[:25])) try: handle = self.client.query(query) if not handle.is_valid(): msg = _("Server returning invalid handle for query id %(id)d [%(query)s]...") % \ {'id': query_history.id, 'query': query_statement[:40]} raise BeeswaxException(msg) except BeeswaxException, ex: # TODO HS2 LOG.exception(ex) # Kind of expected (hql compile/syntax error, etc.) if ex.handle: query_history.server_id = ex.handle.id query_history.log_context = ex.handle.log_context query_history.save_state(QueryHistory.STATE.failed) raise ex
def handle(self, *args, **options): days = int(args[0]) if len(args) >= 1 else 7 close_all = args[1] == 'all' if len(args) >= 2 else False self.stdout.write( 'Closing (all=%s) HiveServer2 queries older than %s days...\n' % (close_all, days)) queries = QueryHistory.objects.filter(last_state__in=[ QueryHistory.STATE.expired.value, QueryHistory.STATE.failed.value, QueryHistory.STATE.available.value ]) if close_all: queries = QueryHistory.objects.all() queries = queries.filter(submission_date__lte=datetime.today() - timedelta(days=days)) import os import beeswax from beeswax import conf from beeswax import hive_site try: beeswax.conf.HIVE_CONF_DIR.set_for_testing( os.environ['HIVE_CONF_DIR']) except: LOG.exception('failed to lookup HIVE_CONF_DIR in environment') self.stdout.write('Did you export HIVE_CONF_DIR=/etc/hive/conf?\n') raise hive_site.reset() hive_site.get_conf() closed_queries = 0 already_closed_queries = 0 for query in queries: try: query_history = QueryHistory.get(id=query.id) if query_history.server_id is not None: handle = query_history.get_handle() dbms.get(user=query_history.owner).close_operation(handle) closed_queries += 1 else: already_closed_queries += 1 query.last_state = QueryHistory.STATE.expired.value query.save() except Exception as e: if 'None' in str(e) or 'Invalid OperationHandle' in str(e): already_closed_queries += 1 query.last_state = QueryHistory.STATE.expired.value query.save() else: self.stdout.write('Info: %s\n' % e) self.stdout.write('%s queries closed. %s queries already closed.\n' % (closed_queries, already_closed_queries))
def execute_and_watch(self, query, design=None, query_history=None): """ Run query and return a QueryHistory object in order to see its progress on a Web page. """ hql_query = query.hql_query if query_history is None: query_history = QueryHistory.build( owner=self.client.user, query=hql_query, server_host='%(server_host)s' % self.client.query_server, server_port='%(server_port)d' % self.client.query_server, server_name='%(server_name)s' % self.client.query_server, server_type=self.server_type, last_state=QueryHistory.STATE.submitted.value, design=design, notify=query.query.get('email_notify', False), query_type=query.query['type'], statement_number=0) query_history.save() LOG.debug( "Made new QueryHistory id %s user %s query: %s..." % (query_history.id, self.client.user, query_history.query[:25])) try: handle = self.client.query(query, query_history.statement_number) if not handle.is_valid(): msg = _( "Server returning invalid handle for query id %(id)d [%(query)s]..." ) % { 'id': query_history.id, 'query': query[:40] } raise QueryServerException(msg) except QueryServerException as ex: LOG.exception(ex) # Kind of expected (hql compile/syntax error, etc.) if hasattr(ex, 'handle') and ex.handle: query_history.server_id, query_history.server_guid = ex.handle.id, ex.handle.id query_history.log_context = ex.handle.log_context query_history.save_state(QueryHistory.STATE.failed) raise ex # All good query_history.server_id, query_history.server_guid = handle.get() query_history.operation_type = handle.operation_type query_history.has_results = handle.has_result_set query_history.modified_row_count = handle.modified_row_count query_history.log_context = handle.log_context query_history.query_type = query.query['type'] query_history.set_to_running() query_history.save() LOG.debug("Updated QueryHistory id %s user %s statement_number: %s" % (query_history.id, self.client.user, query_history.statement_number)) return query_history
def execute_directly(user, query_msg, design=None, notify=False): """ execute_directly(user, query_msg [,design]) -> QueryHistory object This method also creates a QueryHistory object and saves it. user - Django user. query_msg - The thrift Query object. design - The SavedQuery object (i.e. design) associated with this query. notify - Whether to notify the user upon completion. """ query_history = QueryHistory( owner=user, query=query_msg.query, last_state=QueryHistory.STATE.submitted.index, design=design, notify=notify) query_history.save() LOG.debug("Made new QueryHistory id %s user %s query: %s..." % (query_history.id, user, query_history.query[:25])) # Now submit it try: handle = db_client().query(query_msg) if not handle or not handle.id or not handle.log_context: # It really shouldn't happen msg = "BeeswaxServer returning invalid handle for query id %d [%s]..." % \ (query_history.id, query_msg.query[:40]) raise Exception(msg) except BeeswaxException, bex: # Kind of expected (hql compile/syntax error, etc.) if bex.handle: query_history.server_id = bex.handle.id query_history.log_context = bex.handle.log_context query_history.save_state(QueryHistory.STATE.failed) raise bex
def test_basic_flow(self): dbs = self.db.get_databases() assert_true('_impala_builtins' in dbs, dbs) assert_true(self.DATABASE in dbs, dbs) tables = self.db.get_tables(database=self.DATABASE) assert_true('tweets' in tables, tables) QUERY = """ SELECT * FROM tweets ORDER BY row_num; """ response = _make_query(self.client, QUERY, database=self.DATABASE, local=False, server_name='impala') content = json.loads(response.content) query_history = QueryHistory.get(content['id']) response = wait_for_query_to_finish(self.client, response, max=180.0) results = [] # Check that we multiple fetches get all the result set while len(results) < 5: # We get less than 5 results most of the time, so increase offset content = fetch_query_result_data(self.client, response, n=len(results), server_name='impala') results += content['results'] assert_equal([1, 2, 3, 4, 5], [col[0] for col in results]) # Check start over results_start_over = [] while len(results_start_over) < 5: content = fetch_query_result_data(self.client, response, n=len(results_start_over), server_name='impala') results_start_over += content['results'] assert_equal(results_start_over, results) # Check cancel query resp = self.client.post( reverse('impala:api_cancel_query', kwargs={'query_history_id': query_history.id})) content = json.loads(resp.content) assert_equal(0, content['status'])
def authorized_get_history(request, query_history_id, owner_only=False, must_exist=False): if query_history_id is None and not must_exist: return None try: query_history = QueryHistory.get(id=query_history_id) except QueryHistory.DoesNotExist: if must_exist: raise PopupException(_('QueryHistory %(id)s does not exist.') % {'id': query_history_id}) else: return None query_history.design.doc.get().can_read_or_exception(request.user) return query_history
def handle(self, *args, **options): days = int(args[0]) if len(args) >= 1 else 7 close_all = args[1] == 'all' if len(args) >= 2 else False self.stdout.write('Closing (all=%s) HiveServer2 queries older than %s days...\n' % (close_all, days)) queries = QueryHistory.objects.filter(last_state__in=[QueryHistory.STATE.expired.value, QueryHistory.STATE.failed.value, QueryHistory.STATE.available.value]) if close_all: queries = QueryHistory.objects.all() queries = queries.filter(submission_date__lte=datetime.today() - timedelta(days=days)) import os import beeswax from beeswax import conf from beeswax import hive_site try: beeswax.conf.HIVE_CONF_DIR.set_for_testing(os.environ['HIVE_CONF_DIR']) except: LOG.exception('failed to lookup HIVE_CONF_DIR in environment') self.stdout.write('Did you export HIVE_CONF_DIR=/etc/hive/conf?\n') raise hive_site.reset() hive_site.get_conf() closed_queries = 0 already_closed_queries = 0 for query in queries: try: query_history = QueryHistory.get(id=query.id) if query_history.server_id is not None: handle = query_history.get_handle() dbms.get(user=query_history.owner).close_operation(handle) closed_queries += 1 else: already_closed_queries += 1 query.last_state = QueryHistory.STATE.expired.value query.save() except Exception, e: if 'None' in str(e) or 'Invalid OperationHandle' in str(e): already_closed_queries += 1 query.last_state = QueryHistory.STATE.expired.value query.save() else: self.stdout.write('Info: %s\n' % e)
def test_get_runtime_profile(self): query = """ SELECT COUNT(1) FROM tweets; """ response = _make_query(self.client, query, database=self.DATABASE, local=False, server_name='impala') content = json.loads(response.content) query_history = QueryHistory.get(content['id']) wait_for_query_to_finish(self.client, response, max=180.0) resp = self.client.post(reverse('impala:get_runtime_profile', kwargs={'query_history_id': query_history.id})) data = json.loads(resp.content) assert_equal(0, data['status'], data) assert_true('Execution Profile' in data['profile'], data)
def execute_directly(user, query_msg, query_server=None, design=None, notify=False): """ execute_directly(user, query_msg [,design]) -> QueryHistory object This method also creates a QueryHistory object and saves it. user - Django user. query_msg - The thrift Query object. design - The SavedQuery object (i.e. design) associated with this query. notify - Whether to notify the user upon completion. """ if query_server is None: query_server = get_query_server(support_ddl=True) # For convenience with DDL queries only query_history = QueryHistory( owner=user, query=query_msg.query, server_host='%(server_host)s' % query_server, server_port='%(server_port)d' % query_server, server_name='%(server_name)s' % query_server, last_state=QueryHistory.STATE.submitted.index, design=design, notify=notify) query_history.save() LOG.debug("Made new QueryHistory id %s user %s query: %s..." % (query_history.id, user, query_history.query[:25])) # Now submit it try: handle = db_client(query_server).query(query_msg) if not handle or not handle.id: # It really shouldn't happen msg = _("BeeswaxServer returning invalid handle for query id %(id)d [%(query)s]...") % \ {'id': query_history.id, 'query': query_msg.query[:40]} raise Exception(msg) except BeeswaxException, bex: # Kind of expected (hql compile/syntax error, etc.) if bex.handle: query_history.server_id = bex.handle.id query_history.log_context = bex.handle.log_context query_history.save_state(QueryHistory.STATE.failed) raise bex
def authorized_get_query_history(request, query_history_id, owner_only=False, must_exist=False): if query_history_id is None and not must_exist: return None try: query_history = QueryHistory.get(id=query_history_id) except QueryHistory.DoesNotExist: if must_exist: raise PopupException(_('QueryHistory %(id)s does not exist.') % {'id': query_history_id}) else: return None # Some queries don't have a design so are not linked to Document Model permission if query_history.design is None or not query_history.design.doc.exists(): if not request.user.is_superuser and request.user != query_history.owner: raise PopupException(_('Permission denied to read QueryHistory %(id)s') % {'id': query_history_id}) else: query_history.design.doc.get().can_read_or_exception(request.user) return query_history
def execute_and_watch(self, query, design=None, query_history=None): """ Run query and return a QueryHistory object in order to see its progress on a Web page. """ hql_query = query.hql_query if query_history is None: query_history = QueryHistory.build( owner=self.client.user, query=hql_query, server_host="%(server_host)s" % self.client.query_server, server_port="%(server_port)d" % self.client.query_server, server_name="%(server_name)s" % self.client.query_server, server_type=self.server_type, last_state=QueryHistory.STATE.submitted.index, design=design, notify=query.query.get("email_notify", False), query_type=query.query["type"], statement_number=0, ) query_history.save() LOG.debug( "Made new QueryHistory id %s user %s query: %s..." % (query_history.id, self.client.user, query_history.query[:25]) ) try: handle = self.client.query(query, query_history.statement_number) if not handle.is_valid(): msg = _("Server returning invalid handle for query id %(id)d [%(query)s]...") % { "id": query_history.id, "query": query[:40], } raise QueryServerException(msg) except QueryServerException, ex: LOG.exception(ex) # Kind of expected (hql compile/syntax error, etc.) if hasattr(ex, "handle") and ex.handle: query_history.server_id, query_history.server_guid = ex.handle.id, ex.handle.id query_history.log_context = ex.handle.log_context query_history.save_state(QueryHistory.STATE.failed) raise ex
def test_basic_flow(self): dbs = self.db.get_databases() assert_true("_impala_builtins" in dbs, dbs) assert_true(self.DATABASE in dbs, dbs) tables = self.db.get_tables(database=self.DATABASE) assert_true("tweets" in tables, tables) QUERY = """ SELECT * FROM tweets ORDER BY row_num; """ response = _make_query(self.client, QUERY, database=self.DATABASE, local=False, server_name="impala") content = json.loads(response.content) query_history = QueryHistory.get(content["id"]) response = wait_for_query_to_finish(self.client, response, max=180.0) results = [] # Check that we multiple fetches get all the result set while len(results) < 5: content = fetch_query_result_data( self.client, response, n=len(results), server_name="impala" ) # We get less than 5 results most of the time, so increase offset results += content["results"] assert_equal([1, 2, 3, 4, 5], [col[0] for col in results]) # Check start over results_start_over = [] while len(results_start_over) < 5: content = fetch_query_result_data(self.client, response, n=len(results_start_over), server_name="impala") results_start_over += content["results"] assert_equal(results_start_over, results) # Check cancel query resp = self.client.post(reverse("impala:api_cancel_query", kwargs={"query_history_id": query_history.id})) content = json.loads(resp.content) assert_equal(0, content["status"])
def execute_query(self, query, design): sql_query = query.sql_query query_history = QueryHistory.build( owner=self.client.user, query=sql_query, server_host='%(server_host)s' % self.client.query_server, server_port='%(server_port)d' % self.client.query_server, server_name='%(server_name)s' % self.client.query_server, server_type=self.server_type, last_state=QueryHistory.STATE.available.index, design=design, notify=False, query_type=query.query['type'], statement_number=0) query_history.save() LOG.debug("Updated QueryHistory id %s user %s statement_number: %s" % (query_history.id, self.client.user, query_history.statement_number)) return query_history
def test_get_exec_summary(self): query = """ SELECT COUNT(1) FROM tweets; """ response = _make_query(self.client, query, database=self.DATABASE, local=False, server_name='impala') content = json.loads(response.content) query_history = QueryHistory.get(content['id']) wait_for_query_to_finish(self.client, response, max=180.0) resp = self.client.post(reverse('impala:get_exec_summary', kwargs={'query_history_id': query_history.id})) data = json.loads(resp.content) assert_equal(0, data['status'], data) assert_true('nodes' in data['summary'], data) assert_true(len(data['summary']['nodes']) > 0, data['summary']['nodes']) # Attempt to call get_exec_summary on a closed query resp = self.client.post(reverse('impala:get_exec_summary', kwargs={'query_history_id': query_history.id})) data = json.loads(resp.content) assert_equal(0, data['status'], data) assert_true('nodes' in data['summary'], data) assert_true(len(data['summary']['nodes']) > 0, data['summary']['nodes'])