def create_beeswax_query(self, query_str, set_query_options): """Create a beeswax query object from a query string""" query = BeeswaxService.Query() query.hadoop_user = self.user query.query = query_str query.configuration = self._options_to_string_list(set_query_options) return query
def _make_query_msg(hql): """ Make a thrift Query object. Need to run query as a valid hadoop user. Use hue:supergroup """ query_msg = BeeswaxService.Query(query=hql, configuration=[]) query_msg.hadoop_user = get_install_user() return query_msg
def execute_query_async(self, query_string): """ Executes a query asynchronously Issues a query and returns the query handle to the caller for processing. """ query = BeeswaxService.Query() query.query = query_string query.hadoop_user = getpass.getuser() query.configuration = self.__options_to_string_list() return self.__do_rpc(lambda: self.imp_service.query(query, ))
def make_query(self, hql_query, statement=0): # HUE-535 without having to modify Beeswaxd, add 'use database' as first option if self.query_server['server_name'] == 'impala': configuration = [','.join(['%(key)s=%(value)s' % setting for setting in hql_query.settings])] else: configuration = ['use ' + hql_query.query.get('database', 'default')] configuration.extend(hql_query.get_configuration()) query_statement = hql_query.get_query_statement(statement) thrift_query = BeeswaxService.Query(query=query_statement, configuration=configuration) thrift_query.hadoop_user = self.user.username return thrift_query
def execute_query_async(self, query_string, user=None): """ Executes a query asynchronously Issues a query and returns the query handle to the caller for processing. """ query = BeeswaxService.Query() query.query = query_string query.hadoop_user = user if user is not None else getpass.getuser() query.configuration = self.__options_to_string_list() handle = self.__do_rpc(lambda: self.imp_service.query(query, )) LOG.info("Started query {0}".format(handle.id)) return handle
def do_explain(self, args): """Explain the query execution plan""" query = BeeswaxService.Query() # Args is all text except for 'explain', so no need to strip it out query.query = args query.configuration = self.__options_to_string_list() print "Explain query: %s" % (query.query,) (explanation, status) = self.__do_rpc(lambda: self.imp_service.explain(query)) if status != RpcStatus.OK: return False print explanation.textual return True
def test_data_export_limit_clause(self): limit = 3 query_msg = BeeswaxService.Query() query_msg.query = 'SELECT foo FROM test limit %d' % (limit,) query_msg.configuration = [] query_msg.hadoop_user = "******" handle = beeswax.db_utils.db_client(get_query_server()).query(query_msg) query_data = beeswax.models.QueryHistory(server_id=handle.id, log_context=handle.log_context) query_data.server_host = beeswax.conf.QUERY_SERVERS['default'].SERVER_HOST.get() # Needed as we query directly query_data.server_port = beeswax.conf.QUERY_SERVERS['default'].SERVER_PORT.get() # Get the result in csv. Should have 3 + 1 header row. csv_resp = beeswax.data_export.download(query_data, 'csv') assert_equal(len(csv_resp.content.strip().split('\n')), limit + 1)
def test_data_export(self): query_msg = BeeswaxService.Query() query_msg.query = 'SELECT * FROM test' query_msg.configuration = [] query_msg.hadoop_user = "******" handle = beeswax.db_utils.db_client().query(query_msg) query_data = beeswax.models.QueryHistory( server_id=handle.id, log_context=handle.log_context) # Get the result in xls. Then translate it into csv. xls_resp = beeswax.data_export.download(query_data, 'xls') translated_csv = xls2csv(xls_resp.content) # It should have 257 lines (256 + header) assert_equal(len(translated_csv.strip('\r\n').split('\r\n')), 257) handle = beeswax.db_utils.db_client().query(query_msg) # Get the result in csv. csv_resp = beeswax.data_export.download(query_data, 'csv') assert_equal(csv_resp.content, translated_csv)
def test_sync_query_error(self): # Execute incorrect Query , verify the error code and sqlstate QUERY = """ SELECT foo FROM test; """ query_msg = BeeswaxService.Query() query_msg.query = """ SELECT FROM zzzzz """ query_msg.configuration = [] query_msg.hadoop_user = "******" try: handle = beeswax.db_utils.db_client(get_query_server()).executeAndWait(query_msg, "") except ttypes.BeeswaxException, bex: assert_equal(bex.errorCode, 40000) assert_equal(bex.SQLState, "42000")
def test_sync_query_exec(self): # Execute Query Synchronously, set fetch size and fetch results # verify the size of resultset, QUERY = """ SELECT foo FROM test; """ query_msg = BeeswaxService.Query() query_msg.query = """ SELECT foo FROM test """ query_msg.configuration = [] query_msg.hadoop_user = "******" handle = beeswax.db_utils.db_client(get_query_server()).executeAndWait(query_msg, "") results = beeswax.db_utils.db_client(get_query_server()).fetch(handle, True, 5) row_list = list(parse_results(results.data)) assert_equal(len(row_list), 5) beeswax.db_utils.db_client(get_query_server()).close(handle) beeswax.db_utils.db_client(get_query_server()).clean(handle.log_context)
def do_insert(self, args): """Executes an INSERT query""" query = BeeswaxService.Query() query.query = "insert %s" % (args, ) query.configuration = self.__options_to_string_list() print "Query: %s" % (query.query, ) start, end = time.time(), 0 (handle, status) = self.__do_rpc(lambda: self.imp_service.query(query)) if status != RpcStatus.OK: return False while True: query_state = self.__get_query_state(handle) if query_state == self.query_state["FINISHED"]: break elif query_state == self.query_state["EXCEPTION"]: print 'Remote error' if self.connected: # It's ok to close an INSERT that's failed rather than do the full # CloseInsert. The latter builds an InsertResult which is meaningless # here. return self.__close_query_handle(handle) else: return False elif self.is_interrupted.isSet(): return self.__cancel_query(handle) time.sleep(0.05) (insert_result, status) = self.__do_rpc(lambda: self.imp_service.CloseInsert(handle)) end = time.time() if status != RpcStatus.OK or self.is_interrupted.isSet(): return False num_rows = sum([int(k) for k in insert_result.rows_appended.values()]) self.__print_if_verbose("Inserted %d rows in %2.2fs" % (num_rows, end - start)) return True
def do_alter(self, args): query = BeeswaxService.Query() query.query = "alter %s" % (args, ) query.configuration = self.__options_to_string_list() return self.__query_with_results(query)
def __create_beeswax_query_handle(self): handle = BeeswaxService.Query() handle.hadoop_user = self.user return handle
def make_query(self, hql_query): thrift_query = BeeswaxService.Query( query=hql_query.query['query'], configuration=hql_query.get_configuration()) thrift_query.hadoop_user = self.user.username return thrift_query
def _create_query_req(self, query_str, set_query_options): query = BeeswaxService.Query() query.hadoop_user = self.user query.query = query_str query.configuration = self._options_to_string_list(set_query_options) return query
def do_describe(self, args): """Executes a DESCRIBE... query, fetching all rows""" query = BeeswaxService.Query() query.query = "describe %s" % (args, ) query.configuration = self.__options_to_string_list() return self.__query_with_results(query)
def do_show(self, args): """Executes a SHOW... query, fetching all rows""" query = BeeswaxService.Query() query.query = "show %s" % (args, ) query.configuration = self.__options_to_string_list() return self.__query_with_results(query)
def do_use(self, args): """Executes a USE... query""" query = BeeswaxService.Query() query.query = "use %s" % (args, ) query.configuration = self.__options_to_string_list() return self.__query_with_results(query)