def test_custom_authorization_provider(self): from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() # User is 'test_user' (defined in the authorization policy file) open_session_req.username = '******' open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Try to query a table we are not authorized to access. self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch_seq.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) assert 'User \'%s\' does not have privileges to access' % 'test_user' in\ str(execute_statement_resp) # Now try the same operation on a table we are authorized to access. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp)
def test_get_exec_summary(self): execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "SELECT COUNT(1) FROM functional.alltypes" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) exec_summary_req = ImpalaHiveServer2Service.TGetExecSummaryReq() exec_summary_req.operationHandle = execute_statement_resp.operationHandle exec_summary_req.sessionHandle = self.session_handle exec_summary_resp = self.hs2_client.GetExecSummary(exec_summary_req) # GetExecSummary() only works for closed queries TestHS2.check_response(exec_summary_resp, TCLIService.TStatusCode.ERROR_STATUS) close_operation_req = TCLIService.TCloseOperationReq() close_operation_req.operationHandle = execute_statement_resp.operationHandle TestHS2.check_response( self.hs2_client.CloseOperation(close_operation_req)) exec_summary_resp = self.hs2_client.GetExecSummary(exec_summary_req) TestHS2.check_response(exec_summary_resp) assert len(exec_summary_resp.summary.nodes) > 0
def test_close_session(self): """Test that an open session can be closed""" open_session_req = TCLIService.TOpenSessionReq() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) close_session_req = TCLIService.TCloseSessionReq() close_session_req.sessionHandle = resp.sessionHandle TestHS2.check_response(self.hs2_client.CloseSession(close_session_req))
def test_double_close_session(self): """Test that an already closed session cannot be closed a second time""" open_session_req = TCLIService.TOpenSessionReq() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) close_session_req = TCLIService.TCloseSessionReq() close_session_req.sessionHandle = resp.sessionHandle TestHS2.check_response(self.hs2_client.CloseSession(close_session_req)) # Double close should be an error TestHS2.check_response(self.hs2_client.CloseSession(close_session_req), TCLIService.TStatusCode.ERROR_STATUS)
def add_session(self): open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = getuser() open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) HS2TestSuite.check_response(resp) self.session_handle = resp.sessionHandle try: fn(self) finally: close_session_req = TCLIService.TCloseSessionReq() close_session_req.sessionHandle = resp.sessionHandle HS2TestSuite.check_response( self.hs2_client.CloseSession(close_session_req)) self.session_handle = None
def setup(self): host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = TCLIService.Client(self.protocol)
def __test_invalid_result_caching(self, sql_stmt): """ Tests that invalid requests for query-result caching fail using the given sql_stmt.""" execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = sql_stmt execute_statement_req.confOverlay = dict() # Test that a malformed result-cache size returns an error. execute_statement_req.confOverlay[ self.IMPALA_RESULT_CACHING_OPT] = "bad_number" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) HS2TestSuite.check_response( execute_statement_resp, TCLIService.TStatusCode.ERROR_STATUS, "Invalid value 'bad_number' for 'impala.resultset.cache.size' option" ) self.__verify_num_cached_rows(0) # Test that a result-cache size exceeding the per-Impalad maximum returns an error. # The default maximum result-cache size is 100000. execute_statement_req.confOverlay[ self.IMPALA_RESULT_CACHING_OPT] = "100001" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) HS2TestSuite.check_response( execute_statement_resp, TCLIService.TStatusCode.ERROR_STATUS, "Requested result-cache size of 100001 exceeds Impala's maximum of 100000" ) self.__verify_num_cached_rows(0)
def test_impersonation(self): """End-to-end impersonation + authorization test. Expects authorization to be configured before running this test""" # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import # the module within this test function, rather than as a top-level import. This way # the tests in that module will not get pulled when executing this test suite. The fix # is to split the utility code out of the TestHS2 class and support HS2 as a first # class citizen in our test framework. from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = '******' open_session_req.configuration = dict() open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Try to query a table we are not authorized to access. self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch_seq.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) assert 'User \'%s\' does not have privileges to access' % getuser() in\ str(execute_statement_resp) assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\ 'No matching audit event recorded in time window' # Now try the same operation on a table we are authorized to access. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # Try to impersonate as a user we are not authorized to impersonate. open_session_req.configuration['impala.doas.user'] = '******' resp = self.hs2_client.OpenSession(open_session_req) assert 'User \'hue\' is not authorized to impersonate \'some_user\'' in str( resp) self.socket.close() self.socket = None
def test_get_schemas(self): get_schemas_req = TCLIService.TGetSchemasReq() get_schemas_req.sessionHandle = self.session_handle get_schemas_resp = self.hs2_client.GetSchemas(get_schemas_req) TestHS2.check_response(get_schemas_resp) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = get_schemas_resp.operationHandle fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) TestHS2.check_response(fetch_results_resp) query_id = operation_id_to_query_id( get_schemas_resp.operationHandle.operationId) profile_page = self.impalad_test_service.read_query_profile_page( query_id) # Test fix for IMPALA-619 assert "Sql Statement: GET_SCHEMAS" in profile_page assert "Query Type: DDL" in profile_page
def get_log(self, query_stmt): execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = query_stmt execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # Fetch results to make sure errors are generated fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = execute_statement_resp.operationHandle fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) TestHS2.check_response(fetch_results_resp) get_log_req = TCLIService.TGetLogReq() get_log_req.operationHandle = execute_statement_resp.operationHandle get_log_resp = self.hs2_client.GetLog(get_log_req) TestHS2.check_response(get_log_resp) return get_log_resp.log
def test_get_operation_status(self): """Tests that GetOperationStatus returns a valid result for a running query""" execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) get_operation_status_req = TCLIService.TGetOperationStatusReq() get_operation_status_req.operationHandle = execute_statement_resp.operationHandle get_operation_status_resp = \ self.hs2_client.GetOperationStatus(get_operation_status_req) TestHS2.check_response(get_operation_status_resp) assert get_operation_status_resp.operationState in \ [TCLIService.TOperationState.INITIALIZED_STATE, TCLIService.TOperationState.RUNNING_STATE, TCLIService.TOperationState.FINISHED_STATE]
def test_execute_select(self): """Test that a simple select statement works""" execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = execute_statement_resp.operationHandle fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) TestHS2.check_response(fetch_results_resp) assert len(fetch_results_resp.results.rows) == 1 assert fetch_results_resp.results.startRowOffset == 0 try: assert not fetch_results_resp.hasMoreRows except AssertionError: pytest.xfail("IMPALA-558")
def fetch_fail(self, handle, orientation, expected_error_prefix): """Attempts to fetch rows from the query identified by the given operation handle. Asserts that the fetch returns an error with an error message matching the given expected_error_prefix.""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix) return fetch_results_resp
def test_malformed_get_operation_status(self): """Tests that a short guid / secret returns an error (regression would be to crash impalad)""" operation_handle = TCLIService.TOperationHandle() operation_handle.operationId = TCLIService.THandleIdentifier() operation_handle.operationId.guid = "short" operation_handle.operationId.secret = "short_secret" assert len(operation_handle.operationId.guid) != 16 assert len(operation_handle.operationId.secret) != 16 operation_handle.operationType = TCLIService.TOperationType.EXECUTE_STATEMENT operation_handle.hasResultSet = False get_operation_status_req = TCLIService.TGetOperationStatusReq() get_operation_status_req.operationHandle = operation_handle get_operation_status_resp = \ self.hs2_client.GetOperationStatus(get_operation_status_req) TestHS2.check_response(get_operation_status_resp, TCLIService.TStatusCode.ERROR_STATUS) err_msg = "(guid size: %d, expected 16, secret size: %d, expected 16)" \ % (len(operation_handle.operationId.guid), len(operation_handle.operationId.secret)) assert err_msg in get_operation_status_resp.status.errorMessage
def test_socket_close_forces_session_close(self): """Test that closing the underlying socket forces the associated session to close. See IMPALA-564""" open_session_req = TCLIService.TOpenSessionReq() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) num_sessions = self.impalad_test_service.get_metric_value( "impala-server.num-open-hiveserver2-sessions") assert num_sessions > 0 self.socket.close() self.socket = None self.impalad_test_service.wait_for_metric_value( "impala-server.num-open-hiveserver2-sessions", num_sessions - 1)
def test_get_profile(self): execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "SELECT COUNT(2) FROM functional.alltypes" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq() get_profile_req.operationHandle = execute_statement_resp.operationHandle get_profile_req.sessionHandle = self.session_handle get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req) TestHS2.check_response(get_profile_resp) assert execute_statement_req.statement in get_profile_resp.profile close_operation_req = TCLIService.TCloseOperationReq() close_operation_req.operationHandle = execute_statement_resp.operationHandle TestHS2.check_response( self.hs2_client.CloseOperation(close_operation_req)) get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req) TestHS2.check_response(get_profile_resp) assert execute_statement_req.statement in get_profile_resp.profile
def fetch(self, handle, orientation, size, expected_num_rows=None): """Fetches at most size number of rows from the query identified by the given operation handle. Uses the given fetch orientation. Asserts that the fetch returns a success status, and that the number of rows returned is equal to size, or equal to the given expected_num_rows (it one was given).""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows = size if expected_num_rows is not None: num_rows = expected_num_rows assert len(fetch_results_resp.results.rows) == num_rows return fetch_results_resp
def test_constant_query_stmts(self): """Tests query stmts that return a constant result set. These queries are handled somewhat specially by Impala, therefore, we test them separately. We expect FETCH_FIRST to always succeed if result caching is enabled.""" # Tests a query with limit 0. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.confOverlay = dict() execute_statement_req.confOverlay[ self.IMPALA_RESULT_CACHING_OPT] = "10" execute_statement_req.statement =\ "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 0" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) for i in xrange(0, 3): # Fetch some rows. Expect to get 0 rows. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, i * 10, 0) self.__verify_num_cached_rows(0) # Fetch some rows with FETCH_FIRST. Expect to get 0 rows. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, i * 10, 0) self.__verify_num_cached_rows(0) self.close(execute_statement_resp.operationHandle) # Tests a constant select. execute_statement_req.confOverlay[ self.IMPALA_RESULT_CACHING_OPT] = "10" execute_statement_req.statement = "SELECT 1, 1.0, 'a', trim('abc'), NULL" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) # Fetch 100 rows with FETCH_FIRST. Expect to get 1 row. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, 100, 1) self.__verify_num_cached_rows(1) for i in xrange(0, 3): # Fetch some rows with FETCH_FIRST. Expect to get 1 row. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, i * 10, 1) self.__verify_num_cached_rows(1) # Fetch some more rows. Expect to get 1 row. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, i * 10, 0) self.__verify_num_cached_rows(1) self.close(execute_statement_resp.operationHandle)
def test_parallel_insert(self): """Tests parallel inserts with result set caching on. Parallel inserts have a coordinator instance but no coordinator fragment, so the query mem tracker is initialized differently. (IMPALA-963) """ self.client.set_configuration({'sync_ddl': 1}) self.client.execute("create database %s" % self.TEST_DB) self.client.execute("create table %s.orderclone like tpch.orders" % self.TEST_DB) execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.confOverlay = dict() execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "10" execute_statement_req.statement = ("insert overwrite %s.orderclone " "select * from tpch.orders " "where o_orderkey < 0" % self.TEST_DB) execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) HS2TestSuite.check_response(execute_statement_resp)
def test_multiple_sessions(self): """Test that multiple sessions on the same socket connection are allowed""" num_sessions = self.impalad_test_service.get_metric_value( "impala-server.num-open-hiveserver2-sessions") session_ids = [] for _ in xrange(5): open_session_req = TCLIService.TOpenSessionReq() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Check that all sessions get different IDs assert resp.sessionHandle not in session_ids session_ids.append(resp.sessionHandle) self.impalad_test_service.wait_for_metric_value( "impala-server.num-open-hiveserver2-sessions", num_sessions + 5) self.socket.close() self.socket = None self.impalad_test_service.wait_for_metric_value( "impala-server.num-open-hiveserver2-sessions", num_sessions)
def test_query_stmts(self): execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement =\ "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 1" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) HS2TestSuite.check_response(execute_statement_resp) results = self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1) assert len(results.results.rows) == 1 self.close(execute_statement_resp.operationHandle) execute_statement_req.statement =\ "SELECT * FROM functional.decimal_tbl ORDER BY d1 LIMIT 1" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) HS2TestSuite.check_response(execute_statement_resp) results = self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1) assert len(results.results.rows) == 1 self.close(execute_statement_resp.operationHandle)
def test_impersonation(self): """End-to-end impersonation + authorization test. Expects authorization to be configured before running this test""" # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import # the module within this test function, rather than as a top-level import. This way # the tests in that module will not get pulled when executing this test suite. The fix # is to split the utility code out of the TestHS2 class and support HS2 as a first # class citizen in our test framework. from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() # Connected user is 'hue' open_session_req.username = '******' open_session_req.configuration = dict() # Delegated user is the current user open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Try to query a table we are not authorized to access. self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch_seq.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) assert 'User \'%s\' does not have privileges to access' % getuser() in\ str(execute_statement_resp) assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\ 'No matching audit event recorded in time window' # Now try the same operation on a table we are authorized to access. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # Verify the correct user information is in the runtime profile query_id = operation_id_to_query_id( execute_statement_resp.operationHandle.operationId) profile_page = self.cluster.impalads[ 0].service.read_query_profile_page(query_id) self.__verify_profile_user_fields(profile_page, effective_user=getuser(), delegated_user=getuser(), connected_user='******') # Try to user we are not authorized to delegate to. open_session_req.configuration['impala.doas.user'] = '******' resp = self.hs2_client.OpenSession(open_session_req) assert 'User \'hue\' is not authorized to delegate to \'some_user\'' in str( resp) # Create a new session which does not have a do_as_user. open_session_req.username = '******' open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Run a simple query, which should succeed. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = resp.sessionHandle execute_statement_req.statement = "select 1" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # Verify the correct user information is in the runtime profile. Since there is # no do_as_user the Delegated User field should be empty. query_id = operation_id_to_query_id( execute_statement_resp.operationHandle.operationId) profile_page = self.cluster.impalads[ 0].service.read_query_profile_page(query_id) self.__verify_profile_user_fields(profile_page, effective_user='******', delegated_user='', connected_user='******') self.socket.close() self.socket = None
def test_non_query_stmts(self): """Tests Impala's limited support for the FETCH_FIRST fetch orientation for non-query stmts that return a result set, such as SHOW, COMPUTE STATS, etc. The results of non-query statements are always cached entirely, and therefore, the cache can never be exhausted, i.e., FETCH_FIRST should always succeed. However, we only allow FETCH_FIRST on non-query stmts if query caching was enabled by the client for consistency. We use a 'show stats' stmt as a representative of these types of non-query stmts. """ # Negative tests for the result caching option. self.__test_invalid_result_caching( "show table stats functional.alltypes") # Test that FETCH_NEXT without result caching succeeds and FETCH_FIRST fails. # The show stmt returns exactly 25 results. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.confOverlay = dict() execute_statement_req.statement = "show table stats functional.alltypes" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) HS2TestSuite.check_response(execute_statement_resp) for i in xrange(1, 5): # Fetch 10 rows with the FETCH_NEXT orientation. expected_num_rows = 10 if i == 3: expected_num_rows = 5 if i == 4: expected_num_rows = 0 self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 10, expected_num_rows) # Fetch 10 rows with the FETCH_FIRST orientation, expecting an error. # After a failed FETCH_FIRST, the client can still resume FETCH_NEXT. self.fetch_fail( execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, "Restarting of fetch requires enabling of query result caching" ) # The results of non-query stmts are not counted as 'cached'. self.__verify_num_cached_rows(0) # Tests that FETCH_FIRST always succeeds as long as result caching is enabled. # The show stmt returns exactly 25 results. The cache cannot be exhausted. execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "1" execute_statement_req.statement = "show table stats functional.alltypes" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) HS2TestSuite.check_response(execute_statement_resp) for _ in xrange(1, 5): self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, 30, 25) # The results of non-query stmts are not counted as 'cached'. self.__verify_num_cached_rows(0) # Test combinations of FETCH_FIRST and FETCH_NEXT. # The show stmt returns exactly 25 results. execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "1" execute_statement_req.statement = "show table stats functional.alltypes" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) HS2TestSuite.check_response(execute_statement_resp) # Fetch 10 rows. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 10) # Restart the fetch asking for 20 rows. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, 20) # FETCH_NEXT asking for 100 rows. There are only 5 remaining rows. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 100, 5) # Restart the fetch asking for 10 rows. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, 5) # FETCH_NEXT asking for 100 rows. There are only 20 remaining rows. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 100, 20)
def test_query_stmts(self): """Tests Impala's limited support for the FETCH_FIRST fetch orientation for queries. Impala permits FETCH_FIRST for a particular query iff result caching is enabled via the 'impala.resultset.cache.size' confOverlay option. FETCH_FIRST will succeed as long as all previously fetched rows fit into the bounded result cache. Regardless of whether a FETCH_FIRST succeeds or not, clients may always resume fetching with FETCH_NEXT. """ # Negative tests for the result caching option. self.__test_invalid_result_caching( "SELECT COUNT(*) FROM functional.alltypes") # Test that FETCH_NEXT without result caching succeeds and FETCH_FIRST fails. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.confOverlay = dict() execute_statement_req.statement =\ "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) HS2TestSuite.check_response(execute_statement_resp) for i in xrange(1, 5): # Fetch 10 rows with the FETCH_NEXT orientation. expected_num_rows = 10 if i == 4: expected_num_rows = 0 self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 10, expected_num_rows) # Fetch 10 rows with the FETCH_FIRST orientation, expecting an error. # After a failed FETCH_FIRST, the client can still resume FETCH_NEXT. self.fetch_fail( execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, "Restarting of fetch requires enabling of query result caching" ) self.__verify_num_cached_rows(0) self.close(execute_statement_resp.operationHandle) # Basic test of FETCH_FIRST where the entire result set is cached, and we repeatedly # fetch all results. execute_statement_req.confOverlay[ self.IMPALA_RESULT_CACHING_OPT] = "30" execute_statement_req.statement =\ "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) for _ in xrange(1, 5): self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, 30) self.__verify_num_cached_rows(30) self.close(execute_statement_resp.operationHandle) # Test FETCH_NEXT and FETCH_FIRST where the entire result set does not fit into # the cache. FETCH_FIRST will succeed as long as the fetched results # fit into the cache. execute_statement_req.confOverlay[ self.IMPALA_RESULT_CACHING_OPT] = "29" execute_statement_req.statement =\ "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) # Fetch 10 rows. They fit in the result cache. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 10) self.__verify_num_cached_rows(10) # Restart the fetch and expect success. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, 10) # Fetch 10 more rows. The result cache has 20 rows total now. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 10) self.__verify_num_cached_rows(20) # Restart the fetch and expect success. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, 10) self.__verify_num_cached_rows(20) # Fetch 10 more rows from the cache. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 10) self.__verify_num_cached_rows(20) # This fetch exhausts the result cache. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 10) self.__verify_num_cached_rows(0) # Since the cache is exhausted, FETCH_FIRST will fail. self.fetch_fail( execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, "The query result cache exceeded its limit of 29 rows. " "Restarting the fetch is not possible") self.__verify_num_cached_rows(0) # This fetch should succeed but return 0 rows because the stream is eos. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 10, 0) self.__verify_num_cached_rows(0) self.close(execute_statement_resp.operationHandle) # Test that FETCH_FIRST serves results from the cache as well as the query # coordinator in a single fetch request. execute_statement_req.confOverlay[ self.IMPALA_RESULT_CACHING_OPT] = "29" execute_statement_req.statement =\ "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) # Fetch 7 rows. They fit in the result cache. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 7) self.__verify_num_cached_rows(7) # Restart the fetch asking for 12 rows, 7 of which are served from the cache and 5 # from the coordinator. The result cache should have 12 rows total now. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, 12) self.__verify_num_cached_rows(12) # Restart the fetch asking for 40 rows. We expect 30 results returned and that the # cache is exhausted. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, 40, 30) self.__verify_num_cached_rows(0) # Fetch next should succeed and return 0 rows (eos). self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 7, 0) self.__verify_num_cached_rows(0) # Since the cache is exhausted, FETCH_FIRST will fail. self.fetch_fail( execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, "The query result cache exceeded its limit of 29 rows. " "Restarting the fetch is not possible") self.__verify_num_cached_rows(0) self.close(execute_statement_resp.operationHandle) # Test that resuming FETCH_NEXT after a failed FETCH_FIRST works. execute_statement_req.confOverlay[ self.IMPALA_RESULT_CACHING_OPT] = "10" execute_statement_req.statement =\ "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) # Fetch 9 rows. They fit in the result cache. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 9) self.__verify_num_cached_rows(9) # Fetch 9 rows. Cache is exhausted now. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 9) self.__verify_num_cached_rows(0) # Restarting the fetch should fail. self.fetch_fail( execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, "The query result cache exceeded its limit of 10 rows. " "Restarting the fetch is not possible") self.__verify_num_cached_rows(0) # Resuming FETCH_NEXT should succeed. There are 12 remaining rows to fetch. self.fetch(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, 100, 12) self.__verify_num_cached_rows(0) self.close(execute_statement_resp.operationHandle)
def close(self, op_handle): close_op_req = TCLIService.TCloseOperationReq() close_op_req.operationHandle = op_handle close_op_resp = self.hs2_client.CloseOperation(close_op_req) assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS
from cli_service import TCLIService from thrift.transport import TTransport, TSocket from thrift.protocol import TBinaryProtocol parser = OptionParser() parser.add_option("--hs2_hostport", dest="hs2_hostport", default="localhost:11050", help="HiveServer2 hostport to wait for.") parser.add_option("--use_kerberos", action="store_true", default=False, help="Indicates whether the cluster is kerberized.") options, args = parser.parse_args() hs2_host, hs2_port = options.hs2_hostport.split(':') hs2_transport = create_transport(use_kerberos=options.use_kerberos, host=hs2_host, port=hs2_port, service="hiveserver2") protocol = TBinaryProtocol.TBinaryProtocol(hs2_transport) hs2_client = TCLIService.Client(protocol) # Try to connect to the HiveServer2 service and create a session now = time.time() TIMEOUT_SECONDS = 30.0 while time.time() - now < TIMEOUT_SECONDS: try: hs2_transport.open() open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = getpass.getuser() resp = hs2_client.OpenSession(open_session_req) if resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS: close_session_req = TCLIService.TCloseSessionReq() close_session_req.sessionHandle = resp.sessionHandle hs2_client.CloseSession(close_session_req) print "HiveServer2 service is up at %s." % options.hs2_hostport
def test_open_session(self): """Check that a session can be opened""" open_session_req = TCLIService.TOpenSessionReq() TestHS2.check_response(self.hs2_client.OpenSession(open_session_req))