Example #1
0
    def test_custom_authorization_provider(self):
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        # User is 'test_user' (defined in the authorization policy file)
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Try to query a table we are not authorized to access.
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch_seq.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        assert 'User \'%s\' does not have privileges to access' % 'test_user' in\
            str(execute_statement_resp)

        # Now try the same operation on a table we are authorized to access.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)
Example #2
0
    def test_get_exec_summary(self):
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "SELECT COUNT(1) FROM functional.alltypes"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        exec_summary_req = ImpalaHiveServer2Service.TGetExecSummaryReq()
        exec_summary_req.operationHandle = execute_statement_resp.operationHandle
        exec_summary_req.sessionHandle = self.session_handle
        exec_summary_resp = self.hs2_client.GetExecSummary(exec_summary_req)

        # GetExecSummary() only works for closed queries
        TestHS2.check_response(exec_summary_resp,
                               TCLIService.TStatusCode.ERROR_STATUS)

        close_operation_req = TCLIService.TCloseOperationReq()
        close_operation_req.operationHandle = execute_statement_resp.operationHandle
        TestHS2.check_response(
            self.hs2_client.CloseOperation(close_operation_req))

        exec_summary_resp = self.hs2_client.GetExecSummary(exec_summary_req)
        TestHS2.check_response(exec_summary_resp)
        assert len(exec_summary_resp.summary.nodes) > 0
Example #3
0
    def test_close_session(self):
        """Test that an open session can be closed"""
        open_session_req = TCLIService.TOpenSessionReq()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        close_session_req = TCLIService.TCloseSessionReq()
        close_session_req.sessionHandle = resp.sessionHandle
        TestHS2.check_response(self.hs2_client.CloseSession(close_session_req))
Example #4
0
    def test_double_close_session(self):
        """Test that an already closed session cannot be closed a second time"""
        open_session_req = TCLIService.TOpenSessionReq()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        close_session_req = TCLIService.TCloseSessionReq()
        close_session_req.sessionHandle = resp.sessionHandle
        TestHS2.check_response(self.hs2_client.CloseSession(close_session_req))

        # Double close should be an error
        TestHS2.check_response(self.hs2_client.CloseSession(close_session_req),
                               TCLIService.TStatusCode.ERROR_STATUS)
Example #5
0
 def add_session(self):
     open_session_req = TCLIService.TOpenSessionReq()
     open_session_req.username = getuser()
     open_session_req.configuration = dict()
     resp = self.hs2_client.OpenSession(open_session_req)
     HS2TestSuite.check_response(resp)
     self.session_handle = resp.sessionHandle
     try:
         fn(self)
     finally:
         close_session_req = TCLIService.TCloseSessionReq()
         close_session_req.sessionHandle = resp.sessionHandle
         HS2TestSuite.check_response(
             self.hs2_client.CloseSession(close_session_req))
         self.session_handle = None
Example #6
0
 def setup(self):
     host, port = IMPALAD_HS2_HOST_PORT.split(":")
     self.socket = TSocket(host, port)
     self.transport = TBufferedTransport(self.socket)
     self.transport.open()
     self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
     self.hs2_client = TCLIService.Client(self.protocol)
Example #7
0
    def __test_invalid_result_caching(self, sql_stmt):
        """ Tests that invalid requests for query-result caching fail
    using the given sql_stmt."""
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = sql_stmt
        execute_statement_req.confOverlay = dict()

        # Test that a malformed result-cache size returns an error.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "bad_number"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(
            execute_statement_resp, TCLIService.TStatusCode.ERROR_STATUS,
            "Invalid value 'bad_number' for 'impala.resultset.cache.size' option"
        )
        self.__verify_num_cached_rows(0)

        # Test that a result-cache size exceeding the per-Impalad maximum returns an error.
        # The default maximum result-cache size is 100000.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "100001"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(
            execute_statement_resp, TCLIService.TStatusCode.ERROR_STATUS,
            "Requested result-cache size of 100001 exceeds Impala's maximum of 100000"
        )
        self.__verify_num_cached_rows(0)
Example #8
0
    def test_impersonation(self):
        """End-to-end impersonation + authorization test. Expects authorization to be
    configured before running this test"""
        # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import
        # the module within this test function, rather than as a top-level import. This way
        # the tests in that module will not get pulled when executing this test suite. The fix
        # is to split the utility code out of the TestHS2 class and support HS2 as a first
        # class citizen in our test framework.
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        open_session_req.configuration['impala.doas.user'] = getuser()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Try to query a table we are not authorized to access.
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch_seq.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        assert 'User \'%s\' does not have privileges to access' % getuser() in\
            str(execute_statement_resp)

        assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\
            'No matching audit event recorded in time window'

        # Now try the same operation on a table we are authorized to access.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)

        TestHS2.check_response(execute_statement_resp)

        # Try to impersonate as a user we are not authorized to impersonate.
        open_session_req.configuration['impala.doas.user'] = '******'
        resp = self.hs2_client.OpenSession(open_session_req)
        assert 'User \'hue\' is not authorized to impersonate \'some_user\'' in str(
            resp)

        self.socket.close()
        self.socket = None
Example #9
0
    def test_get_schemas(self):
        get_schemas_req = TCLIService.TGetSchemasReq()
        get_schemas_req.sessionHandle = self.session_handle
        get_schemas_resp = self.hs2_client.GetSchemas(get_schemas_req)
        TestHS2.check_response(get_schemas_resp)
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = get_schemas_resp.operationHandle
        fetch_results_req.maxRows = 100
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        TestHS2.check_response(fetch_results_resp)
        query_id = operation_id_to_query_id(
            get_schemas_resp.operationHandle.operationId)
        profile_page = self.impalad_test_service.read_query_profile_page(
            query_id)

        # Test fix for IMPALA-619
        assert "Sql Statement: GET_SCHEMAS" in profile_page
        assert "Query Type: DDL" in profile_page
Example #10
0
    def get_log(self, query_stmt):
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = query_stmt
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        # Fetch results to make sure errors are generated
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = execute_statement_resp.operationHandle
        fetch_results_req.maxRows = 100
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        TestHS2.check_response(fetch_results_resp)

        get_log_req = TCLIService.TGetLogReq()
        get_log_req.operationHandle = execute_statement_resp.operationHandle
        get_log_resp = self.hs2_client.GetLog(get_log_req)
        TestHS2.check_response(get_log_resp)
        return get_log_resp.log
Example #11
0
    def test_get_operation_status(self):
        """Tests that GetOperationStatus returns a valid result for a running query"""
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        get_operation_status_req = TCLIService.TGetOperationStatusReq()
        get_operation_status_req.operationHandle = execute_statement_resp.operationHandle

        get_operation_status_resp = \
            self.hs2_client.GetOperationStatus(get_operation_status_req)
        TestHS2.check_response(get_operation_status_resp)

        assert get_operation_status_resp.operationState in \
            [TCLIService.TOperationState.INITIALIZED_STATE,
             TCLIService.TOperationState.RUNNING_STATE,
             TCLIService.TOperationState.FINISHED_STATE]
Example #12
0
    def test_execute_select(self):
        """Test that a simple select statement works"""
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = execute_statement_resp.operationHandle
        fetch_results_req.maxRows = 100
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        TestHS2.check_response(fetch_results_resp)

        assert len(fetch_results_resp.results.rows) == 1
        assert fetch_results_resp.results.startRowOffset == 0

        try:
            assert not fetch_results_resp.hasMoreRows
        except AssertionError:
            pytest.xfail("IMPALA-558")
Example #13
0
 def fetch_fail(self, handle, orientation, expected_error_prefix):
     """Attempts to fetch rows from the query identified by the given operation handle.
 Asserts that the fetch returns an error with an error message matching the given
 expected_error_prefix."""
     fetch_results_req = TCLIService.TFetchResultsReq()
     fetch_results_req.operationHandle = handle
     fetch_results_req.orientation = orientation
     fetch_results_req.maxRows = 100
     fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
     HS2TestSuite.check_response(fetch_results_resp,
                                 TCLIService.TStatusCode.ERROR_STATUS,
                                 expected_error_prefix)
     return fetch_results_resp
Example #14
0
    def test_malformed_get_operation_status(self):
        """Tests that a short guid / secret returns an error (regression would be to crash
    impalad)"""
        operation_handle = TCLIService.TOperationHandle()
        operation_handle.operationId = TCLIService.THandleIdentifier()
        operation_handle.operationId.guid = "short"
        operation_handle.operationId.secret = "short_secret"
        assert len(operation_handle.operationId.guid) != 16
        assert len(operation_handle.operationId.secret) != 16
        operation_handle.operationType = TCLIService.TOperationType.EXECUTE_STATEMENT
        operation_handle.hasResultSet = False

        get_operation_status_req = TCLIService.TGetOperationStatusReq()
        get_operation_status_req.operationHandle = operation_handle

        get_operation_status_resp = \
            self.hs2_client.GetOperationStatus(get_operation_status_req)
        TestHS2.check_response(get_operation_status_resp,
                               TCLIService.TStatusCode.ERROR_STATUS)
        err_msg = "(guid size: %d, expected 16, secret size: %d, expected 16)" \
            % (len(operation_handle.operationId.guid),
               len(operation_handle.operationId.secret))
        assert err_msg in get_operation_status_resp.status.errorMessage
Example #15
0
    def test_socket_close_forces_session_close(self):
        """Test that closing the underlying socket forces the associated session to close.
    See IMPALA-564"""
        open_session_req = TCLIService.TOpenSessionReq()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)
        num_sessions = self.impalad_test_service.get_metric_value(
            "impala-server.num-open-hiveserver2-sessions")

        assert num_sessions > 0

        self.socket.close()
        self.socket = None
        self.impalad_test_service.wait_for_metric_value(
            "impala-server.num-open-hiveserver2-sessions", num_sessions - 1)
Example #16
0
    def test_get_profile(self):
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "SELECT COUNT(2) FROM functional.alltypes"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq()
        get_profile_req.operationHandle = execute_statement_resp.operationHandle
        get_profile_req.sessionHandle = self.session_handle
        get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req)
        TestHS2.check_response(get_profile_resp)
        assert execute_statement_req.statement in get_profile_resp.profile

        close_operation_req = TCLIService.TCloseOperationReq()
        close_operation_req.operationHandle = execute_statement_resp.operationHandle
        TestHS2.check_response(
            self.hs2_client.CloseOperation(close_operation_req))

        get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req)
        TestHS2.check_response(get_profile_resp)

        assert execute_statement_req.statement in get_profile_resp.profile
Example #17
0
 def fetch(self, handle, orientation, size, expected_num_rows=None):
     """Fetches at most size number of rows from the query identified by the given
 operation handle. Uses the given fetch orientation. Asserts that the fetch returns
 a success status, and that the number of rows returned is equal to size, or
 equal to the given expected_num_rows (it one was given)."""
     fetch_results_req = TCLIService.TFetchResultsReq()
     fetch_results_req.operationHandle = handle
     fetch_results_req.orientation = orientation
     fetch_results_req.maxRows = size
     fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
     HS2TestSuite.check_response(fetch_results_resp)
     num_rows = size
     if expected_num_rows is not None:
         num_rows = expected_num_rows
     assert len(fetch_results_resp.results.rows) == num_rows
     return fetch_results_resp
Example #18
0
    def test_constant_query_stmts(self):
        """Tests query stmts that return a constant result set. These queries are handled
    somewhat specially by Impala, therefore, we test them separately. We expect
    FETCH_FIRST to always succeed if result caching is enabled."""
        # Tests a query with limit 0.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.confOverlay = dict()
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "10"
        execute_statement_req.statement =\
          "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 0"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        for i in xrange(0, 3):
            # Fetch some rows. Expect to get 0 rows.
            self.fetch(execute_statement_resp.operationHandle,
                       TCLIService.TFetchOrientation.FETCH_NEXT, i * 10, 0)
            self.__verify_num_cached_rows(0)
            # Fetch some rows with FETCH_FIRST. Expect to get 0 rows.
            self.fetch(execute_statement_resp.operationHandle,
                       TCLIService.TFetchOrientation.FETCH_FIRST, i * 10, 0)
            self.__verify_num_cached_rows(0)
        self.close(execute_statement_resp.operationHandle)

        # Tests a constant select.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "10"
        execute_statement_req.statement = "SELECT 1, 1.0, 'a', trim('abc'), NULL"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        # Fetch 100 rows with FETCH_FIRST. Expect to get 1 row.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_FIRST, 100, 1)
        self.__verify_num_cached_rows(1)
        for i in xrange(0, 3):
            # Fetch some rows with FETCH_FIRST. Expect to get 1 row.
            self.fetch(execute_statement_resp.operationHandle,
                       TCLIService.TFetchOrientation.FETCH_FIRST, i * 10, 1)
            self.__verify_num_cached_rows(1)
            # Fetch some more rows. Expect to get 1 row.
            self.fetch(execute_statement_resp.operationHandle,
                       TCLIService.TFetchOrientation.FETCH_NEXT, i * 10, 0)
            self.__verify_num_cached_rows(1)
        self.close(execute_statement_resp.operationHandle)
Example #19
0
 def test_parallel_insert(self):
   """Tests parallel inserts with result set caching on.
   Parallel inserts have a coordinator instance but no coordinator
   fragment, so the query mem tracker is initialized differently.
   (IMPALA-963)
   """
   self.client.set_configuration({'sync_ddl': 1})
   self.client.execute("create database %s" % self.TEST_DB)
   self.client.execute("create table %s.orderclone like tpch.orders" % self.TEST_DB)
   execute_statement_req = TCLIService.TExecuteStatementReq()
   execute_statement_req.sessionHandle = self.session_handle
   execute_statement_req.confOverlay = dict()
   execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "10"
   execute_statement_req.statement = ("insert overwrite %s.orderclone "
                                     "select * from tpch.orders "
                                     "where o_orderkey < 0" % self.TEST_DB)
   execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
   HS2TestSuite.check_response(execute_statement_resp)
Example #20
0
    def test_multiple_sessions(self):
        """Test that multiple sessions on the same socket connection are allowed"""
        num_sessions = self.impalad_test_service.get_metric_value(
            "impala-server.num-open-hiveserver2-sessions")
        session_ids = []
        for _ in xrange(5):
            open_session_req = TCLIService.TOpenSessionReq()
            resp = self.hs2_client.OpenSession(open_session_req)
            TestHS2.check_response(resp)
            # Check that all sessions get different IDs
            assert resp.sessionHandle not in session_ids
            session_ids.append(resp.sessionHandle)

        self.impalad_test_service.wait_for_metric_value(
            "impala-server.num-open-hiveserver2-sessions", num_sessions + 5)

        self.socket.close()
        self.socket = None
        self.impalad_test_service.wait_for_metric_value(
            "impala-server.num-open-hiveserver2-sessions", num_sessions)
Example #21
0
  def test_query_stmts(self):
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle

    execute_statement_req.statement =\
        "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    results = self.fetch(execute_statement_resp.operationHandle,
                TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
    assert len(results.results.rows) == 1
    self.close(execute_statement_resp.operationHandle)

    execute_statement_req.statement =\
        "SELECT * FROM functional.decimal_tbl ORDER BY d1 LIMIT 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    results = self.fetch(execute_statement_resp.operationHandle,
                TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
    assert len(results.results.rows) == 1
    self.close(execute_statement_resp.operationHandle)
Example #22
0
    def test_impersonation(self):
        """End-to-end impersonation + authorization test. Expects authorization to be
    configured before running this test"""
        # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import
        # the module within this test function, rather than as a top-level import. This way
        # the tests in that module will not get pulled when executing this test suite. The fix
        # is to split the utility code out of the TestHS2 class and support HS2 as a first
        # class citizen in our test framework.
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        # Connected user is 'hue'
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        # Delegated user is the current user
        open_session_req.configuration['impala.doas.user'] = getuser()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Try to query a table we are not authorized to access.
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch_seq.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        assert 'User \'%s\' does not have privileges to access' % getuser() in\
            str(execute_statement_resp)

        assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\
            'No matching audit event recorded in time window'

        # Now try the same operation on a table we are authorized to access.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)

        TestHS2.check_response(execute_statement_resp)

        # Verify the correct user information is in the runtime profile
        query_id = operation_id_to_query_id(
            execute_statement_resp.operationHandle.operationId)
        profile_page = self.cluster.impalads[
            0].service.read_query_profile_page(query_id)
        self.__verify_profile_user_fields(profile_page,
                                          effective_user=getuser(),
                                          delegated_user=getuser(),
                                          connected_user='******')

        # Try to user we are not authorized to delegate to.
        open_session_req.configuration['impala.doas.user'] = '******'
        resp = self.hs2_client.OpenSession(open_session_req)
        assert 'User \'hue\' is not authorized to delegate to \'some_user\'' in str(
            resp)

        # Create a new session which does not have a do_as_user.
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Run a simple query, which should succeed.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = resp.sessionHandle
        execute_statement_req.statement = "select 1"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        # Verify the correct user information is in the runtime profile. Since there is
        # no do_as_user the Delegated User field should be empty.
        query_id = operation_id_to_query_id(
            execute_statement_resp.operationHandle.operationId)
        profile_page = self.cluster.impalads[
            0].service.read_query_profile_page(query_id)
        self.__verify_profile_user_fields(profile_page,
                                          effective_user='******',
                                          delegated_user='',
                                          connected_user='******')

        self.socket.close()
        self.socket = None
Example #23
0
    def test_non_query_stmts(self):
        """Tests Impala's limited support for the FETCH_FIRST fetch orientation for
    non-query stmts that return a result set, such as SHOW, COMPUTE STATS, etc.
    The results of non-query statements are always cached entirely, and therefore,
    the cache can never be exhausted, i.e., FETCH_FIRST should always succeed.
    However, we only allow FETCH_FIRST on non-query stmts if query caching was enabled
    by the client for consistency. We use a 'show stats' stmt as a representative
    of these types of non-query stmts.
    """
        # Negative tests for the result caching option.
        self.__test_invalid_result_caching(
            "show table stats functional.alltypes")

        # Test that FETCH_NEXT without result caching succeeds and FETCH_FIRST fails.
        # The show stmt returns exactly 25 results.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.confOverlay = dict()
        execute_statement_req.statement = "show table stats functional.alltypes"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(execute_statement_resp)
        for i in xrange(1, 5):
            # Fetch 10 rows with the FETCH_NEXT orientation.
            expected_num_rows = 10
            if i == 3:
                expected_num_rows = 5
            if i == 4:
                expected_num_rows = 0
            self.fetch(execute_statement_resp.operationHandle,
                       TCLIService.TFetchOrientation.FETCH_NEXT, 10,
                       expected_num_rows)
            # Fetch 10 rows with the FETCH_FIRST orientation, expecting an error.
            # After a failed FETCH_FIRST, the client can still resume FETCH_NEXT.
            self.fetch_fail(
                execute_statement_resp.operationHandle,
                TCLIService.TFetchOrientation.FETCH_FIRST,
                "Restarting of fetch requires enabling of query result caching"
            )
            # The results of non-query stmts are not counted as 'cached'.
            self.__verify_num_cached_rows(0)

        # Tests that FETCH_FIRST always succeeds as long as result caching is enabled.
        # The show stmt returns exactly 25 results. The cache cannot be exhausted.
        execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "1"
        execute_statement_req.statement = "show table stats functional.alltypes"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(execute_statement_resp)
        for _ in xrange(1, 5):
            self.fetch(execute_statement_resp.operationHandle,
                       TCLIService.TFetchOrientation.FETCH_FIRST, 30, 25)
        # The results of non-query stmts are not counted as 'cached'.
        self.__verify_num_cached_rows(0)

        # Test combinations of FETCH_FIRST and FETCH_NEXT.
        # The show stmt returns exactly 25 results.
        execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "1"
        execute_statement_req.statement = "show table stats functional.alltypes"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(execute_statement_resp)
        # Fetch 10 rows.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_NEXT, 10)
        # Restart the fetch asking for 20 rows.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_FIRST, 20)
        # FETCH_NEXT asking for 100 rows. There are only 5 remaining rows.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_NEXT, 100, 5)
        # Restart the fetch asking for 10 rows.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_FIRST, 5)
        # FETCH_NEXT asking for 100 rows. There are only 20 remaining rows.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_NEXT, 100, 20)
Example #24
0
    def test_query_stmts(self):
        """Tests Impala's limited support for the FETCH_FIRST fetch orientation for queries.
    Impala permits FETCH_FIRST for a particular query iff result caching is enabled
    via the 'impala.resultset.cache.size' confOverlay option. FETCH_FIRST will succeed as
    long as all previously fetched rows fit into the bounded result cache.
    Regardless of whether a FETCH_FIRST succeeds or not, clients may always resume
    fetching with FETCH_NEXT.
    """
        # Negative tests for the result caching option.
        self.__test_invalid_result_caching(
            "SELECT COUNT(*) FROM functional.alltypes")

        # Test that FETCH_NEXT without result caching succeeds and FETCH_FIRST fails.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.confOverlay = dict()
        execute_statement_req.statement =\
          "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(execute_statement_resp)
        for i in xrange(1, 5):
            # Fetch 10 rows with the FETCH_NEXT orientation.
            expected_num_rows = 10
            if i == 4:
                expected_num_rows = 0
            self.fetch(execute_statement_resp.operationHandle,
                       TCLIService.TFetchOrientation.FETCH_NEXT, 10,
                       expected_num_rows)
            # Fetch 10 rows with the FETCH_FIRST orientation, expecting an error.
            # After a failed FETCH_FIRST, the client can still resume FETCH_NEXT.
            self.fetch_fail(
                execute_statement_resp.operationHandle,
                TCLIService.TFetchOrientation.FETCH_FIRST,
                "Restarting of fetch requires enabling of query result caching"
            )
        self.__verify_num_cached_rows(0)
        self.close(execute_statement_resp.operationHandle)

        # Basic test of FETCH_FIRST where the entire result set is cached, and we repeatedly
        # fetch all results.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "30"
        execute_statement_req.statement =\
          "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        for _ in xrange(1, 5):
            self.fetch(execute_statement_resp.operationHandle,
                       TCLIService.TFetchOrientation.FETCH_FIRST, 30)
            self.__verify_num_cached_rows(30)
        self.close(execute_statement_resp.operationHandle)

        # Test FETCH_NEXT and FETCH_FIRST where the entire result set does not fit into
        # the cache. FETCH_FIRST will succeed as long as the fetched results
        # fit into the cache.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "29"
        execute_statement_req.statement =\
          "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        # Fetch 10 rows. They fit in the result cache.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_NEXT, 10)
        self.__verify_num_cached_rows(10)
        # Restart the fetch and expect success.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_FIRST, 10)
        # Fetch 10 more rows. The result cache has 20 rows total now.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_NEXT, 10)
        self.__verify_num_cached_rows(20)
        # Restart the fetch and expect success.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_FIRST, 10)
        self.__verify_num_cached_rows(20)
        # Fetch 10 more rows from the cache.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_NEXT, 10)
        self.__verify_num_cached_rows(20)
        # This fetch exhausts the result cache.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_NEXT, 10)
        self.__verify_num_cached_rows(0)
        # Since the cache is exhausted, FETCH_FIRST will fail.
        self.fetch_fail(
            execute_statement_resp.operationHandle,
            TCLIService.TFetchOrientation.FETCH_FIRST,
            "The query result cache exceeded its limit of 29 rows. "
            "Restarting the fetch is not possible")
        self.__verify_num_cached_rows(0)
        # This fetch should succeed but return 0 rows because the stream is eos.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_NEXT, 10, 0)
        self.__verify_num_cached_rows(0)
        self.close(execute_statement_resp.operationHandle)

        # Test that FETCH_FIRST serves results from the cache as well as the query
        # coordinator in a single fetch request.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "29"
        execute_statement_req.statement =\
          "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        # Fetch 7 rows. They fit in the result cache.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_NEXT, 7)
        self.__verify_num_cached_rows(7)
        # Restart the fetch asking for 12 rows, 7 of which are served from the cache and 5
        # from the coordinator. The result cache should have 12 rows total now.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_FIRST, 12)
        self.__verify_num_cached_rows(12)
        # Restart the fetch asking for 40 rows. We expect 30 results returned and that the
        # cache is exhausted.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_FIRST, 40, 30)
        self.__verify_num_cached_rows(0)
        # Fetch next should succeed and return 0 rows (eos).
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_NEXT, 7, 0)
        self.__verify_num_cached_rows(0)
        # Since the cache is exhausted, FETCH_FIRST will fail.
        self.fetch_fail(
            execute_statement_resp.operationHandle,
            TCLIService.TFetchOrientation.FETCH_FIRST,
            "The query result cache exceeded its limit of 29 rows. "
            "Restarting the fetch is not possible")
        self.__verify_num_cached_rows(0)
        self.close(execute_statement_resp.operationHandle)

        # Test that resuming FETCH_NEXT after a failed FETCH_FIRST works.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "10"
        execute_statement_req.statement =\
          "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        # Fetch 9 rows. They fit in the result cache.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_NEXT, 9)
        self.__verify_num_cached_rows(9)
        # Fetch 9 rows. Cache is exhausted now.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_NEXT, 9)
        self.__verify_num_cached_rows(0)
        # Restarting the fetch should fail.
        self.fetch_fail(
            execute_statement_resp.operationHandle,
            TCLIService.TFetchOrientation.FETCH_FIRST,
            "The query result cache exceeded its limit of 10 rows. "
            "Restarting the fetch is not possible")
        self.__verify_num_cached_rows(0)
        # Resuming FETCH_NEXT should succeed. There are 12 remaining rows to fetch.
        self.fetch(execute_statement_resp.operationHandle,
                   TCLIService.TFetchOrientation.FETCH_NEXT, 100, 12)
        self.__verify_num_cached_rows(0)
        self.close(execute_statement_resp.operationHandle)
Example #25
0
 def close(self, op_handle):
     close_op_req = TCLIService.TCloseOperationReq()
     close_op_req.operationHandle = op_handle
     close_op_resp = self.hs2_client.CloseOperation(close_op_req)
     assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS
Example #26
0
from cli_service import TCLIService
from thrift.transport import TTransport, TSocket
from thrift.protocol import TBinaryProtocol

parser = OptionParser()
parser.add_option("--hs2_hostport", dest="hs2_hostport",
                  default="localhost:11050", help="HiveServer2 hostport to wait for.")
parser.add_option("--use_kerberos", action="store_true", default=False,
                  help="Indicates whether the cluster is kerberized.")
options, args = parser.parse_args()

hs2_host, hs2_port = options.hs2_hostport.split(':')
hs2_transport = create_transport(use_kerberos=options.use_kerberos, host=hs2_host,
                                 port=hs2_port, service="hiveserver2")
protocol = TBinaryProtocol.TBinaryProtocol(hs2_transport)
hs2_client = TCLIService.Client(protocol)

# Try to connect to the HiveServer2 service and create a session
now = time.time()
TIMEOUT_SECONDS = 30.0
while time.time() - now < TIMEOUT_SECONDS:
  try:
    hs2_transport.open()
    open_session_req = TCLIService.TOpenSessionReq()
    open_session_req.username = getpass.getuser()
    resp = hs2_client.OpenSession(open_session_req)
    if resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS:
      close_session_req = TCLIService.TCloseSessionReq()
      close_session_req.sessionHandle = resp.sessionHandle
      hs2_client.CloseSession(close_session_req)
      print "HiveServer2 service is up at %s." % options.hs2_hostport
Example #27
0
 def test_open_session(self):
     """Check that a session can be opened"""
     open_session_req = TCLIService.TOpenSessionReq()
     TestHS2.check_response(self.hs2_client.OpenSession(open_session_req))