class HS2TestSuite(ImpalaTestSuite): def setup(self): host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = TCLIService.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() @staticmethod def check_response( response, expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix=None): assert response.status.statusCode == expected_status_code if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\ and expected_error_prefix is not None: assert response.status.errorMessage.startswith( expected_error_prefix) def close(self, op_handle): close_op_req = TCLIService.TCloseOperationReq() close_op_req.operationHandle = op_handle close_op_resp = self.hs2_client.CloseOperation(close_op_req) assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS def fetch(self, handle, orientation, size, expected_num_rows=None): """Fetches at most size number of rows from the query identified by the given operation handle. Uses the given fetch orientation. Asserts that the fetch returns a success status, and that the number of rows returned is equal to size, or equal to the given expected_num_rows (it one was given).""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows = size if expected_num_rows is not None: num_rows = expected_num_rows assert len(fetch_results_resp.results.rows) == num_rows return fetch_results_resp def fetch_fail(self, handle, orientation, expected_error_prefix): """Attempts to fetch rows from the query identified by the given operation handle. Asserts that the fetch returns an error with an error message matching the given expected_error_prefix.""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix) return fetch_results_resp
class HS2TestSuite(ImpalaTestSuite): def setup(self): host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = TCLIService.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() @staticmethod def check_response(response, expected_status_code = TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix = None): assert response.status.statusCode == expected_status_code if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\ and expected_error_prefix is not None: assert response.status.errorMessage.startswith(expected_error_prefix) def close(self, op_handle): close_op_req = TCLIService.TCloseOperationReq() close_op_req.operationHandle = op_handle close_op_resp = self.hs2_client.CloseOperation(close_op_req) assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS def fetch(self, handle, orientation, size, expected_num_rows = None): """Fetches at most size number of rows from the query identified by the given operation handle. Uses the given fetch orientation. Asserts that the fetch returns a success status, and that the number of rows returned is equal to size, or equal to the given expected_num_rows (it one was given).""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows = size if expected_num_rows is not None: num_rows = expected_num_rows assert len(fetch_results_resp.results.rows) == num_rows return fetch_results_resp def fetch_fail(self, handle, orientation, expected_error_prefix): """Attempts to fetch rows from the query identified by the given operation handle. Asserts that the fetch returns an error with an error message matching the given expected_error_prefix.""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix) return fetch_results_resp
def test_isOpen_checks_for_readability(self): # https://docs.python.org/3/library/socket.html#notes-on-socket-timeouts # https://docs.python.org/3/library/socket.html#socket.socket.settimeout timeouts = [ None, # blocking mode 0, # non-blocking mode 1.0, # timeout mode ] for timeout in timeouts: acc = ServerAcceptor(TServerSocket(port=0)) acc.start() sock = TSocket(host="localhost", port=acc.port) self.assertFalse(sock.isOpen()) sock.open() sock.setTimeout(timeout) # the socket shows as open immediately after connecting self.assertTrue(sock.isOpen()) # and remains open during usage sock.write(b"hello") self.assertTrue(sock.isOpen()) while True: try: sock.read(5) except TTransportException as exc: if exc.inner.errno == errno.EAGAIN: # try again when we're in non-blocking mode continue raise break self.assertTrue(sock.isOpen()) # once the server side closes, it no longer shows open acc.client.close( ) # this also blocks until the other thread is done acc.close() self.assertFalse(sock.isOpen()) sock.close()
class TestAuthorization(CustomClusterTestSuite): AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR')) def setup(self): host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = TCLIService.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=/test-warehouse/authz-policy.ini\ --authorization_policy_provider_class=%s" % "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider") def test_custom_authorization_provider(self): from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() # User is 'test_user' (defined in the authorization policy file) open_session_req.username = '******' open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Try to query a table we are not authorized to access. self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch_seq.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) assert 'User \'%s\' does not have privileges to access' % 'test_user' in\ str(execute_statement_resp) # Now try the same operation on a table we are authorized to access. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) TestHS2.check_response(execute_statement_resp) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=/test-warehouse/authz-policy.ini\ --authorized_proxy_user_config=hue=%s\ --audit_event_log_dir=%s" % (getuser(), AUDIT_LOG_DIR)) def test_impersonation(self): """End-to-end impersonation + authorization test. Expects authorization to be configured before running this test""" # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import # the module within this test function, rather than as a top-level import. This way # the tests in that module will not get pulled when executing this test suite. The fix # is to split the utility code out of the TestHS2 class and support HS2 as a first # class citizen in our test framework. from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() # Connected user is 'hue' open_session_req.username = '******' open_session_req.configuration = dict() # Delegated user is the current user open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Try to query a table we are not authorized to access. self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch_seq.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) assert 'User \'%s\' does not have privileges to access' % getuser() in\ str(execute_statement_resp) assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\ 'No matching audit event recorded in time window' # Now try the same operation on a table we are authorized to access. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) TestHS2.check_response(execute_statement_resp) # Verify the correct user information is in the runtime profile query_id = operation_id_to_query_id( execute_statement_resp.operationHandle.operationId) profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id) self.__verify_profile_user_fields(profile_page, effective_user=getuser(), delegated_user=getuser(), connected_user='******') # Try to user we are not authorized to delegate to. open_session_req.configuration['impala.doas.user'] = '******' resp = self.hs2_client.OpenSession(open_session_req) assert 'User \'hue\' is not authorized to delegate to \'some_user\'' in str(resp) # Create a new session which does not have a do_as_user. open_session_req.username = '******' open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Run a simple query, which should succeed. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = resp.sessionHandle execute_statement_req.statement = "select 1" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) TestHS2.check_response(execute_statement_resp) # Verify the correct user information is in the runtime profile. Since there is # no do_as_user the Delegated User field should be empty. query_id = operation_id_to_query_id( execute_statement_resp.operationHandle.operationId) profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id) self.__verify_profile_user_fields(profile_page, effective_user='******', delegated_user='', connected_user='******') self.socket.close() self.socket = None def __verify_profile_user_fields(self, profile_str, effective_user, connected_user, delegated_user): """Verifies the given runtime profile string contains the specified values for User, Connected User, and Delegated User""" assert '\n User: %s\n' % effective_user in profile_str assert '\n Connected User: %s\n' % connected_user in profile_str assert '\n Delegated User: %s\n' % delegated_user in profile_str def __wait_for_audit_record(self, user, impersonator, timeout_secs=30): """Waits until an audit log record is found that contains the given user and impersonator, or until the timeout is reached. """ # The audit event might not show up immediately (the audit logs are flushed to disk # on regular intervals), so poll the audit event logs until a matching record is # found. start_time = time() while time() - start_time < timeout_secs: for audit_file_name in os.listdir(self.AUDIT_LOG_DIR): if self.__find_matching_audit_record(audit_file_name, user, impersonator): return True sleep(1) return False def __find_matching_audit_record(self, audit_file_name, user, impersonator): with open(os.path.join(self.AUDIT_LOG_DIR, audit_file_name)) as audit_log_file: for line in audit_log_file.readlines(): json_dict = json.loads(line) if len(json_dict) == 0: continue if json_dict[min(json_dict)]['user'] == user and\ json_dict[min(json_dict)]['impersonator'] == impersonator: return True return False
class HS2TestSuite(ImpalaTestSuite): # This DB will be created/dropped for every HS2TestSuite subclass. Make the name unique # so different test suites don't clobber each other's DBs. The [2:] is to remove the # "0." from the random floating-point number. TEST_DB = 'hs2_db' + str(random.random())[2:] HS2_V6_COLUMN_TYPES = [ 'boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val', 'doubleVal', 'binaryVal' ] def setup(self): self.cleanup_db(self.TEST_DB) host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) self.client.execute("create database %s" % self.TEST_DB) def teardown(self): self.cleanup_db(self.TEST_DB) if self.socket: self.socket.close() @staticmethod def check_response( response, expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix=None): assert response.status.statusCode == expected_status_code if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\ and expected_error_prefix is not None: assert response.status.errorMessage.startswith( expected_error_prefix) def close(self, op_handle): close_op_req = TCLIService.TCloseOperationReq() close_op_req.operationHandle = op_handle close_op_resp = self.hs2_client.CloseOperation(close_op_req) assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS def get_num_rows(self, result_set): # rows will always be set, so the only way to tell if we should use it is to see if # any columns are set if result_set.columns is None or len(result_set.columns) == 0: return len(result_set.rows) assert result_set.columns is not None for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(result_set.columns[0], col_type) if typed_col != None: return len(typed_col.values) assert False def fetch(self, handle, orientation, size, expected_num_rows=None): """Fetches at most size number of rows from the query identified by the given operation handle. Uses the given fetch orientation. Asserts that the fetch returns a success status, and that the number of rows returned is equal to size, or equal to the given expected_num_rows (if one was given).""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows = size if expected_num_rows is not None: num_rows = expected_num_rows assert self.get_num_rows(fetch_results_resp.results) == num_rows return fetch_results_resp def fetch_until(self, handle, orientation, size): """Tries to fetch exactly 'size' rows from the given query handle, with the given fetch orientation. If fewer rows than 'size' are returned by the first fetch, repeated fetches are issued until either 0 rows are returned, or the number of rows fetched is equal to 'size'""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows = size num_rows_fetched = self.get_num_rows(fetch_results_resp.results) while num_rows_fetched < size: fetch_results_req.maxRows = size - num_rows_fetched fetch_results_resp = self.hs2_client.FetchResults( fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) last_fetch_size = self.get_num_rows(fetch_results_resp.results) assert last_fetch_size > 0 num_rows_fetched += last_fetch_size assert num_rows_fetched == size def fetch_fail(self, handle, orientation, expected_error_prefix): """Attempts to fetch rows from the query identified by the given operation handle. Asserts that the fetch returns an error with an error message matching the given expected_error_prefix.""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix) return fetch_results_resp
try: # Make socket transport = TSocket("213.100.51.33", 9090) # Buffering is critical. Raw sockets are very slow transport = TBufferedTransport(transport) # Wrap in a protocol protocol = TBinaryProtocol(transport) # Create a client to use the protocol encoder client = Client(protocol) # Connect! transport.open() while True: status = client.getStatus("auth1", "fight1") print "Status update:\n%r" % (status) if status.currentTurn == "player1": print "It's your turn, casting spell!\n" client.castSpell("auth1", "fight1", 1, "player2") print "---------------------" time.sleep(1) # Close! transport.close() except Thrift.TException, e: print e.message
class TestHS2(ImpalaTestSuite): def setup(self): host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = TCLIService.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() @staticmethod def check_response(response, expected = TCLIService.TStatusCode.SUCCESS_STATUS): assert response.status.statusCode == expected def test_open_session(self): """Check that a session can be opened""" open_session_req = TCLIService.TOpenSessionReq() TestHS2.check_response(self.hs2_client.OpenSession(open_session_req)) def test_close_session(self): """Test that an open session can be closed""" open_session_req = TCLIService.TOpenSessionReq() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) close_session_req = TCLIService.TCloseSessionReq() close_session_req.sessionHandle = resp.sessionHandle TestHS2.check_response(self.hs2_client.CloseSession(close_session_req)) def test_double_close_session(self): """Test that an already closed session cannot be closed a second time""" open_session_req = TCLIService.TOpenSessionReq() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) close_session_req = TCLIService.TCloseSessionReq() close_session_req.sessionHandle = resp.sessionHandle TestHS2.check_response(self.hs2_client.CloseSession(close_session_req)) # Double close should be an error TestHS2.check_response(self.hs2_client.CloseSession(close_session_req), TCLIService.TStatusCode.ERROR_STATUS) @needs_session def test_execute_select(self): """Test that a simple select statement works""" execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) TestHS2.check_response(execute_statement_resp) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = execute_statement_resp.operationHandle fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) TestHS2.check_response(fetch_results_resp) assert len(fetch_results_resp.results.rows) == 1 assert fetch_results_resp.results.startRowOffset == 0 try: assert not fetch_results_resp.hasMoreRows except AssertionError: pytest.xfail("IMPALA-558") @needs_session def test_get_operation_status(self): """Tests that GetOperationStatus returns a valid result for a running query""" execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) TestHS2.check_response(execute_statement_resp) get_operation_status_req = TCLIService.TGetOperationStatusReq() get_operation_status_req.operationHandle = execute_statement_resp.operationHandle get_operation_status_resp = \ self.hs2_client.GetOperationStatus(get_operation_status_req) TestHS2.check_response(get_operation_status_resp) assert get_operation_status_resp.operationState in \ [TCLIService.TOperationState.INITIALIZED_STATE, TCLIService.TOperationState.RUNNING_STATE, TCLIService.TOperationState.FINISHED_STATE] @needs_session def test_malformed_get_operation_status(self): """Tests that a short guid / secret returns an error (regression would be to crash impalad)""" operation_handle = TCLIService.TOperationHandle() operation_handle.operationId = TCLIService.THandleIdentifier() operation_handle.operationId.guid = "short" operation_handle.operationId.secret = "short_secret" assert len(operation_handle.operationId.guid) != 16 assert len(operation_handle.operationId.secret) != 16 operation_handle.operationType = TCLIService.TOperationType.EXECUTE_STATEMENT operation_handle.hasResultSet = False get_operation_status_req = TCLIService.TGetOperationStatusReq() get_operation_status_req.operationHandle = operation_handle get_operation_status_resp = \ self.hs2_client.GetOperationStatus(get_operation_status_req) TestHS2.check_response(get_operation_status_resp, TCLIService.TStatusCode.ERROR_STATUS) err_msg = "(guid size: %d, expected 16, secret size: %d, expected 16)" \ % (len(operation_handle.operationId.guid), len(operation_handle.operationId.secret)) assert err_msg in get_operation_status_resp.status.errorMessage @pytest.mark.execute_serially def test_socket_close_forces_session_close(self): """Test that closing the underlying socket forces the associated session to close. See IMPALA-564""" open_session_req = TCLIService.TOpenSessionReq() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) num_sessions = self.impalad_test_service.get_metric_value( "impala-server.num-open-hiveserver2-sessions") assert num_sessions > 0 self.socket.close() self.socket = None self.impalad_test_service.wait_for_metric_value( "impala-server.num-open-hiveserver2-sessions", num_sessions - 1) @pytest.mark.execute_serially def test_multiple_sessions(self): """Test that multiple sessions on the same socket connection are allowed""" num_sessions = self.impalad_test_service.get_metric_value( "impala-server.num-open-hiveserver2-sessions") session_ids = [] for _ in xrange(5): open_session_req = TCLIService.TOpenSessionReq() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Check that all sessions get different IDs assert resp.sessionHandle not in session_ids session_ids.append(resp.sessionHandle) self.impalad_test_service.wait_for_metric_value( "impala-server.num-open-hiveserver2-sessions", num_sessions + 5) self.socket.close() self.socket = None self.impalad_test_service.wait_for_metric_value( "impala-server.num-open-hiveserver2-sessions", num_sessions) @needs_session def test_get_schemas(self): get_schemas_req = TCLIService.TGetSchemasReq() get_schemas_req.sessionHandle = self.session_handle get_schemas_resp = self.hs2_client.GetSchemas(get_schemas_req) TestHS2.check_response(get_schemas_resp) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = get_schemas_resp.operationHandle fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) TestHS2.check_response(fetch_results_resp) query_id = operation_id_to_query_id(get_schemas_resp.operationHandle.operationId) profile_page = self.impalad_test_service.read_query_profile_page(query_id) # Test fix for IMPALA-619 assert "Sql Statement: GET_SCHEMAS" in profile_page assert "Query Type: DDL" in profile_page
class HS2TestSuite(ImpalaTestSuite): TEST_DB = 'hs2_db' HS2_V6_COLUMN_TYPES = [ 'boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val', 'doubleVal', 'binaryVal' ] def setup(self): self.cleanup_db(self.TEST_DB) host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) def teardown(self): self.cleanup_db(self.TEST_DB) if self.socket: self.socket.close() @staticmethod def check_response( response, expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix=None): assert response.status.statusCode == expected_status_code if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\ and expected_error_prefix is not None: assert response.status.errorMessage.startswith( expected_error_prefix) def close(self, op_handle): close_op_req = TCLIService.TCloseOperationReq() close_op_req.operationHandle = op_handle close_op_resp = self.hs2_client.CloseOperation(close_op_req) assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS def get_num_rows(self, result_set): # rows will always be set, so the only way to tell if we should use it is to see if # any columns are set if result_set.columns is None or len(result_set.columns) == 0: return len(result_set.rows) assert result_set.columns is not None for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(result_set.columns[0], col_type) if typed_col != None: return len(typed_col.values) assert False def fetch_at_most(self, handle, orientation, size, expected_num_rows=None): """Fetches at most size number of rows from the query identified by the given operation handle. Uses the given fetch orientation. Asserts that the fetch returns a success status, and that the number of rows returned is equal to given expected_num_rows (if given). It is only safe for expected_num_rows to be 0 or 1: Impala does not guarantee that a larger result set will be returned in one go. Use fetch_until() for repeated fetches.""" assert expected_num_rows is None or expected_num_rows in (0, 1) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows = size if expected_num_rows is not None: assert self.get_num_rows( fetch_results_resp.results) == expected_num_rows return fetch_results_resp def fetch_until(self, handle, orientation, size, expected_num_rows=None): """Tries to fetch exactly 'size' rows from the given query handle, with the given fetch orientation, by repeatedly issuing fetch(size - num rows already fetched) calls. Returns fewer than 'size' rows if either a fetch() returns 0 rows (indicating EOS) or 'expected_num_rows' rows are returned. If 'expected_num_rows' is set to None, it defaults to 'size', so that the effect is to both ask for and expect the same number of rows.""" assert expected_num_rows is None or (size >= expected_num_rows) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows_fetched = self.get_num_rows(fetch_results_resp.results) if expected_num_rows is None: expected_num_rows = size while num_rows_fetched < expected_num_rows: # Always try to fetch at most 'size' fetch_results_req.maxRows = size - num_rows_fetched fetch_results_req.orientation = TCLIService.TFetchOrientation.FETCH_NEXT fetch_results_resp = self.hs2_client.FetchResults( fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) last_fetch_size = self.get_num_rows(fetch_results_resp.results) assert last_fetch_size > 0 num_rows_fetched += last_fetch_size assert num_rows_fetched == expected_num_rows def fetch_fail(self, handle, orientation, expected_error_prefix): """Attempts to fetch rows from the query identified by the given operation handle. Asserts that the fetch returns an error with an error message matching the given expected_error_prefix.""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix) return fetch_results_resp def result_metadata(self, handle): """ Gets the schema for the query identified by the handle """ req = TCLIService.TGetResultSetMetadataReq() req.operationHandle = handle resp = self.hs2_client.GetResultSetMetadata(req) HS2TestSuite.check_response(resp) return resp def column_results_to_string(self, columns): """Quick-and-dirty way to get a readable string to compare the output of a columnar-oriented query to its expected output""" formatted = "" num_rows = 0 # Determine the number of rows by finding the type of the first column for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(columns[0], col_type) if typed_col != None: num_rows = len(typed_col.values) break for i in xrange(num_rows): row = [] for c in columns: for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(c, col_type) if typed_col != None: indicator = ord(typed_col.nulls[i / 8]) if indicator & (1 << (i % 8)): row.append("NULL") else: row.append(str(typed_col.values[i])) break formatted += (", ".join(row) + "\n") return (num_rows, formatted) def get_operation_status(self, operation_handle): """Executes GetOperationStatus with the given operation handle and returns the TGetOperationStatusResp""" get_operation_status_req = TCLIService.TGetOperationStatusReq() get_operation_status_req.operationHandle = operation_handle get_operation_status_resp = \ self.hs2_client.GetOperationStatus(get_operation_status_req) return get_operation_status_resp def wait_for_operation_state(self, operation_handle, expected_state, \ timeout = 10, interval = 1): """Waits for the operation to reach expected_state by polling GetOperationStatus every interval seconds, returning the TGetOperationStatusResp, or raising an assertion after timeout seconds.""" start_time = time() while (time() - start_time < timeout): get_operation_status_resp = self.get_operation_status( operation_handle) HS2TestSuite.check_response(get_operation_status_resp) if get_operation_status_resp.operationState is expected_state: return get_operation_status_resp sleep(interval) assert False, 'Did not reach expected operation state %s in time, actual state was ' \ '%s' % (expected_state, get_operation_status_resp.operationState)
class TestAuthorizedProxy(CustomClusterTestSuite): def setup(self): host, port = (self.cluster.impalads[0].service.hostname, self.cluster.impalads[0].service.hs2_port) self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() shutil.rmtree(AUDIT_LOG_DIR, ignore_errors=True) def _execute_hs2_stmt(self, statement, verify=True): """ Executes an hs2 statement :param statement: the statement to execute :param verify: If set to true, will thrown an exception on a failed hs2 execution :return: the result of execution """ from tests.hs2.test_hs2 import TestHS2 execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = statement result = self.hs2_client.ExecuteStatement(execute_statement_req) if verify: TestHS2.check_response(result) return result def _open_hs2(self, user, configuration, verify=True): """ Open a session with hs2 :param user: the user to open the session :param configuration: the configuration for the session :param verify: If set to true, will thrown an exception on failed session open :return: the result of opening the session """ from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = user open_session_req.configuration = configuration resp = self.hs2_client.OpenSession(open_session_req) if verify: TestHS2.check_response(resp) return resp @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="{0} --authorized_proxy_user_config=foo=bar;hue={1} ". format(SENTRY_IMPALAD_ARGS, getuser()), catalogd_args=SENTRY_CATALOGD_ARGS) def test_authorized_proxy_user_with_sentry(self, unique_role): """Tests authorized proxy user with Sentry using HS2.""" self._test_authorized_proxy_with_sentry(unique_role, self._test_authorized_proxy) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="{0} --authorized_proxy_user_config=foo=bar;hue={1} ". format(RANGER_IMPALAD_ARGS, getuser()), catalogd_args=RANGER_CATALOGD_ARGS) def test_authorized_proxy_user_with_ranger(self): # This test fails due to bumping up the Ranger to a newer version. # TODO(fangyu.rao): Fix in a follow up commit. pytest.xfail("failed due to bumping up the Ranger to a newer version") """Tests authorized proxy user with Ranger using HS2.""" self._test_authorized_proxy_with_ranger(self._test_authorized_proxy) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="{0} --authorized_proxy_user_config=hue=bar " "--authorized_proxy_group_config=foo=bar;hue={1}".format( SENTRY_IMPALAD_ARGS, grp.getgrgid(os.getgid()).gr_name), catalogd_args=SENTRY_CATALOGD_ARGS) def test_authorized_proxy_group_with_sentry(self, unique_role): """Tests authorized proxy group with Sentry using HS2.""" self._test_authorized_proxy_with_sentry(unique_role, self._test_authorized_proxy) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="{0} --authorized_proxy_user_config=hue=bar " "--authorized_proxy_group_config=foo=bar;hue={1}".format( RANGER_IMPALAD_ARGS, grp.getgrgid(os.getgid()).gr_name), catalogd_args=RANGER_CATALOGD_ARGS) def test_authorized_proxy_group_with_ranger(self): # This test fails due to bumping up the Ranger to a newer version. # TODO(fangyu.rao): Fix in a follow up commit. pytest.xfail("failed due to bumping up the Ranger to a newer version") """Tests authorized proxy group with Ranger using HS2.""" self._test_authorized_proxy_with_ranger(self._test_authorized_proxy) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="{0} --authorized_proxy_user_config=foo=bar " "--authorized_proxy_group_config=foo=bar".format(SENTRY_IMPALAD_ARGS), catalogd_args=SENTRY_CATALOGD_ARGS) def test_no_matching_user_and_group_authorized_proxy_with_sentry(self): self._test_no_matching_user_and_group_authorized_proxy() @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="{0} --authorized_proxy_user_config=foo=bar " "--authorized_proxy_group_config=foo=bar".format(RANGER_IMPALAD_ARGS), catalogd_args=RANGER_CATALOGD_ARGS) def test_no_matching_user_and_group_authorized_proxy_with_ranger(self): self._test_no_matching_user_and_group_authorized_proxy() def _test_no_matching_user_and_group_authorized_proxy(self): open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = "******" open_session_req.configuration = dict() open_session_req.configuration["impala.doas.user"] = "******" resp = self.hs2_client.OpenSession(open_session_req) assert "User 'hue' is not authorized to delegate to 'abc'" in str(resp) def _test_authorized_proxy_with_sentry(self, role, test_func): try: self.session_handle = self._open_hs2(getuser(), dict()).sessionHandle self._execute_hs2_stmt("create role {0}".format(role)) self._execute_hs2_stmt( "grant all on table tpch.lineitem to role {0}".format(role)) self._execute_hs2_stmt("grant role {0} to group {1}".format( role, grp.getgrnam(getuser()).gr_name)) self._execute_hs2_stmt("grant role {0} to group {1}".format( role, grp.getgrgid(os.getgid()).gr_name)) test_func() finally: self.session_handle = self._open_hs2(getuser(), dict()).sessionHandle self._execute_hs2_stmt( "grant all on server to role {0}".format(role)) self._execute_hs2_stmt("grant role {0} to group {1}".format( role, grp.getgrnam(getuser()).gr_name)) self._execute_hs2_stmt("drop role {0}".format(role)) def _test_authorized_proxy_with_ranger(self, test_func): try: self.session_handle = self._open_hs2(RANGER_ADMIN_USER, dict()).sessionHandle self._execute_hs2_stmt( "grant all on table tpch.lineitem to user {0}".format( getuser())) test_func() finally: self.session_handle = self._open_hs2(RANGER_ADMIN_USER, dict()).sessionHandle self._execute_hs2_stmt( "revoke all on table tpch.lineitem from user {0}".format( getuser())) def _test_authorized_proxy(self): """End-to-end impersonation + authorization test. Expects authorization to be configured before running this test""" # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import # the module within this test function, rather than as a top-level import. This way # the tests in that module will not get pulled when executing this test suite. The fix # is to split the utility code out of the TestHS2 class and support HS2 as a first # class citizen in our test framework. from tests.hs2.test_hs2 import TestHS2 # Try to query a table we are not authorized to access. self.session_handle = self._open_hs2("hue", { "impala.doas.user": getuser() }).sessionHandle bad_resp = self._execute_hs2_stmt("describe tpch_seq.lineitem", False) assert "User '%s' does not have privileges to access" % getuser() in \ str(bad_resp) assert self._wait_for_audit_record(user=getuser(), impersonator="hue"), \ "No matching audit event recorded in time window" # Now try the same operation on a table we are authorized to access. good_resp = self._execute_hs2_stmt("describe tpch.lineitem") TestHS2.check_response(good_resp) # Verify the correct user information is in the runtime profile. query_id = operation_id_to_query_id( good_resp.operationHandle.operationId) profile_page = self.cluster.impalads[ 0].service.read_query_profile_page(query_id) self._verify_profile_user_fields(profile_page, effective_user=getuser(), delegated_user=getuser(), connected_user="******") # Try to delegate a user we are not authorized to delegate to. resp = self._open_hs2("hue", {"impala.doas.user": "******"}, False) assert "User 'hue' is not authorized to delegate to 'some_user'" in str( resp) # Create a new session which does not have a do_as_user and run a simple query. self.session_handle = self._open_hs2("hue", dict()).sessionHandle resp = self._execute_hs2_stmt("select 1") # Verify the correct user information is in the runtime profile. Since there is # no do_as_user the Delegated User field should be empty. query_id = operation_id_to_query_id(resp.operationHandle.operationId) profile_page = self.cluster.impalads[ 0].service.read_query_profile_page(query_id) self._verify_profile_user_fields(profile_page, effective_user="******", delegated_user="", connected_user="******") def _verify_profile_user_fields(self, profile_str, effective_user, connected_user, delegated_user): """Verifies the given runtime profile string contains the specified values for User, Connected User, and Delegated User""" assert "\n User: {0}\n".format(effective_user) in profile_str assert "\n Connected User: {0}\n".format( connected_user) in profile_str assert "\n Delegated User: {0}\n".format( delegated_user) in profile_str def _wait_for_audit_record(self, user, impersonator, timeout_secs=30): """Waits until an audit log record is found that contains the given user and impersonator, or until the timeout is reached. """ # The audit event might not show up immediately (the audit logs are flushed to disk # on regular intervals), so poll the audit event logs until a matching record is # found. start_time = time.time() while time.time() - start_time < timeout_secs: for audit_file_name in os.listdir(AUDIT_LOG_DIR): if self._find_matching_audit_record(audit_file_name, user, impersonator): return True time.sleep(1) return False def _find_matching_audit_record(self, audit_file_name, user, impersonator): with open(os.path.join(AUDIT_LOG_DIR, audit_file_name)) as audit_log_file: for line in audit_log_file.readlines(): json_dict = json.loads(line) if len(json_dict) == 0: continue if json_dict[min(json_dict)]["user"] == user and \ json_dict[min(json_dict)]["impersonator"] == impersonator: return True return False
class TestAuthorization(CustomClusterTestSuite): def setup(self): host, port = (self.cluster.impalads[0].service.hostname, self.cluster.impalads[0].service.hs2_port) self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() def __execute_hs2_stmt(self, statement, verify=True): """ Executes an hs2 statement :param statement: the statement to execute :param verify: If set to true, will thrown an exception on a failed hs2 execution :return: the result of execution """ from tests.hs2.test_hs2 import TestHS2 execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = statement result = self.hs2_client.ExecuteStatement(execute_statement_req) if verify: TestHS2.check_response(result) return result def __open_hs2(self, user, configuration, verify=True): """ Open a session with hs2 :param user: the user to open the session :param configuration: the configuration for the session :param verify: If set to true, will thrown an exception on failed session open :return: the result of opening the session """ from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = user open_session_req.configuration = configuration resp = self.hs2_client.OpenSession(open_session_req) if verify: TestHS2.check_response(resp) return resp @SkipIf.sentry_disabled @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 " "--sentry_config={0} " "--authorization_policy_provider_class=" "org.apache.impala.testutil.TestSentryResourceAuthorizationProvider". format(SENTRY_CONFIG_FILE), catalogd_args="--sentry_config={0} " "--authorization_policy_provider_class=" "org.apache.impala.testutil.TestSentryResourceAuthorizationProvider". format(SENTRY_CONFIG_FILE), sentry_config=SENTRY_CONFIG_FILE) def test_custom_authorization_provider(self, unique_role): try: self.session_handle = self.__open_hs2(getuser(), dict()).sessionHandle self.__execute_hs2_stmt("create role {0}".format(unique_role)) self.__execute_hs2_stmt("grant role {0} to group {1}".format( unique_role, grp.getgrnam(getuser()).gr_name)) self.__execute_hs2_stmt( "grant select on table tpch.lineitem to role {0}".format( unique_role)) bad_resp = self.__execute_hs2_stmt("describe tpch_seq.lineitem", False) assert 'User \'%s\' does not have privileges to access' % getuser() in \ str(bad_resp) self.__execute_hs2_stmt("describe tpch.lineitem") finally: self.__execute_hs2_stmt("drop role {0}".format(unique_role)) @SkipIf.sentry_disabled @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 " "--authorized_proxy_user_config=hue={0}".format(getuser()), catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE, sentry_config=SENTRY_CONFIG_FILE) def test_access_runtime_profile(self, unique_role, unique_name): unique_db = unique_name + "_db" try: self.session_handle = self.__open_hs2(getuser(), dict()).sessionHandle self.__execute_hs2_stmt("create role {0}".format(unique_role)) self.__execute_hs2_stmt( "grant create on server to role {0}".format(unique_role)) self.__execute_hs2_stmt( "grant all on database tpch to role {0}".format(unique_role)) self.__execute_hs2_stmt( "grant select on table functional.complex_view to role {0}". format(unique_role)) self.__execute_hs2_stmt("grant role {0} to group {1}".format( unique_role, grp.getgrnam(getuser()).gr_name)) # Create db with permissions self.__execute_hs2_stmt("create database {0}".format(unique_db)) self.__execute_hs2_stmt( "grant all on database {0} to role {1}".format( unique_db, unique_role)) # Current user can't access view's underlying tables bad_resp = self.__execute_hs2_stmt( "explain select * from functional.complex_view", False) assert 'User \'%s\' does not have privileges to EXPLAIN' % getuser() in \ str(bad_resp) # User should not have access to the runtime profile self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, False) self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, True) # Repeat as a delegated user self.session_handle = \ self.__open_hs2('hue', {'impala.doas.user': getuser()}).sessionHandle # User should not have access to the runtime profile self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, False) self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, True) # Create a view for which the user has access to the underlying tables. self.session_handle = self.__open_hs2(getuser(), dict()).sessionHandle self.__execute_hs2_stmt( "create view if not exists {0}.customer_view as select * from tpch.customer " "limit 1".format(unique_db)) # User should be able to run EXPLAIN self.__execute_hs2_stmt( "explain select * from {0}.customer_view".format(unique_db)) # User should have access to the runtime profile and exec summary self.__run_stmt_and_verify_profile_access( "select * from {0}.customer_view".format(unique_db), True, False) self.__run_stmt_and_verify_profile_access( "select * from {0}.customer_view".format(unique_db), True, True) # Repeat as a delegated user self.session_handle = \ self.__open_hs2('hue', {'impala.doas.user': getuser()}).sessionHandle # Delegated user is the current user self.__run_stmt_and_verify_profile_access( "select * from {0}.customer_view".format(unique_db), True, False) self.__run_stmt_and_verify_profile_access( "select * from {0}.customer_view".format(unique_db), True, True) finally: self.__execute_hs2_stmt( "grant all on server to role {0}".format(unique_role)) self.__execute_hs2_stmt( "drop view if exists {0}.customer_view".format(unique_db)) self.__execute_hs2_stmt( "drop table if exists {0}.customer".format(unique_db)) self.__execute_hs2_stmt( "drop database if exists {0}".format(unique_db)) self.__execute_hs2_stmt("drop role {0}".format(unique_role)) def __run_stmt_and_verify_profile_access(self, stmt, has_access, close_operation): """Runs 'stmt' and retrieves the runtime profile and exec summary. If 'has_access' is true, it verifies that no runtime profile or exec summary are returned. If 'close_operation' is true, make sure the operation is closed before retrieving the profile and exec summary.""" from tests.hs2.test_hs2 import TestHS2 execute_statement_resp = self.__execute_hs2_stmt(stmt, False) if close_operation: close_operation_req = TCLIService.TCloseOperationReq() close_operation_req.operationHandle = execute_statement_resp.operationHandle TestHS2.check_response( self.hs2_client.CloseOperation(close_operation_req)) get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq() get_profile_req.operationHandle = execute_statement_resp.operationHandle get_profile_req.sessionHandle = self.session_handle get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req) if has_access: TestHS2.check_response(get_profile_resp) assert "Plan: " in get_profile_resp.profile else: assert "User %s is not authorized to access the runtime profile or "\ "execution summary." % (getuser()) in str(get_profile_resp) exec_summary_req = ImpalaHiveServer2Service.TGetExecSummaryReq() exec_summary_req.operationHandle = execute_statement_resp.operationHandle exec_summary_req.sessionHandle = self.session_handle exec_summary_resp = self.hs2_client.GetExecSummary(exec_summary_req) if has_access: TestHS2.check_response(exec_summary_resp) else: assert "User %s is not authorized to access the runtime profile or "\ "execution summary." % (getuser()) in str(exec_summary_resp) @SkipIf.sentry_disabled @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=" + SENTRY_CONFIG_FILE, catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_deprecated_none_", dir=os.getenv("LOG_DIR"))) def test_deprecated_flag_doesnt_show(self): assert_no_files_in_dir_contain( self.impala_log_dir, "Ignoring removed flag " "authorization_policy_file") @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=ignored_file", impala_log_dir=tempfile.mkdtemp( prefix="test_deprecated_", dir=os.getenv("LOG_DIR"))) def test_deprecated_flags(self): assert_file_in_dir_contains( self.impala_log_dir, "Ignoring removed flag " "authorization_policy_file") @SkipIf.sentry_disabled @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_", dir=os.getenv("LOG_DIR"))) def test_catalog_restart(self, unique_role): """IMPALA-7713: Tests that a catalogd restart when authorization is enabled should reset the previous privileges stored in impalad's catalog to avoid stale privilege data in the impalad's catalog.""" def assert_privileges(): result = self.client.execute("show grant role %s_foo" % unique_role) TestAuthorization._check_privileges( result, [["database", "functional", "", "", "", "all", "false"]]) result = self.client.execute("show grant role %s_bar" % unique_role) TestAuthorization._check_privileges( result, [["database", "functional_kudu", "", "", "", "all", "false"]]) result = self.client.execute("show grant role %s_baz" % unique_role) TestAuthorization._check_privileges( result, [["database", "functional_avro", "", "", "", "all", "false"]]) self.role_cleanup(unique_role) try: self.client.execute("create role %s_foo" % unique_role) self.client.execute("create role %s_bar" % unique_role) self.client.execute("create role %s_baz" % unique_role) self.client.execute( "grant all on database functional to role %s_foo" % unique_role) self.client.execute( "grant all on database functional_kudu to role %s_bar" % unique_role) self.client.execute( "grant all on database functional_avro to role %s_baz" % unique_role) assert_privileges() self._start_impala_cluster([ "--catalogd_args=--sentry_config=%s" % SENTRY_CONFIG_FILE, "--restart_catalogd_only" ]) assert_privileges() finally: self.role_cleanup(unique_role) def role_cleanup(self, role_name_match): """Cleans up any roles that match the given role name.""" for role_name in self.client.execute("show roles").data: if role_name_match in role_name: self.client.execute("drop role %s" % role_name) @staticmethod def _check_privileges(result, expected): def columns(row): cols = row.split("\t") return cols[0:len(cols) - 1] assert map(columns, result.data) == expected @SkipIf.sentry_disabled @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_", dir=os.getenv("LOG_DIR"))) def test_catalog_object(self, unique_role): """IMPALA-7721: Tests /catalog_object web API for principal and privilege""" self.role_cleanup(unique_role) try: self.client.execute("create role %s" % unique_role) self.client.execute( "grant select on database functional to role %s" % unique_role) for service in [ self.cluster.catalogd.service, self.cluster.get_first_impalad().service ]: obj_dump = service.get_catalog_object_dump( "PRINCIPAL", "%s.ROLE" % unique_role) assert "catalog_version" in obj_dump # Get the privilege associated with that principal ID. principal_id = re.search(r"principal_id \(i32\) = (\d+)", obj_dump) assert principal_id is not None obj_dump = service.get_catalog_object_dump( "PRIVILEGE", urllib.quote( "server=server1->db=functional->action=select->grantoption=false.%s.ROLE" % principal_id.group(1))) assert "catalog_version" in obj_dump # Get the principal that does not exist. obj_dump = service.get_catalog_object_dump( "PRINCIPAL", "doesnotexist.ROLE") assert "CatalogException" in obj_dump # Get the privilege that does not exist. obj_dump = service.get_catalog_object_dump( "PRIVILEGE", urllib.quote( "server=server1->db=doesntexist->action=select->grantoption=false.%s.ROLE" % principal_id.group(1))) assert "CatalogException" in obj_dump finally: self.role_cleanup(unique_role) @SkipIf.sentry_disabled @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args= "--sentry_config=%s --sentry_catalog_polling_frequency_s=3600" % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp( prefix="test_invalidate_metadata_sentry_unavailable_", dir=os.getenv("LOG_DIR"))) def test_invalidate_metadata_sentry_unavailable(self, unique_role): """IMPALA-7824: Tests that running INVALIDATE METADATA when Sentry is unavailable should not cause Impala to hang.""" self.role_cleanup(unique_role) try: group_name = grp.getgrnam(getuser()).gr_name self.client.execute("create role %s" % unique_role) self.client.execute("grant all on server to role %s" % unique_role) self.client.execute("grant role %s to group `%s`" % (unique_role, group_name)) self._stop_sentry_service() # Calling INVALIDATE METADATA when Sentry is unavailable should return an error. result = self.execute_query_expect_failure(self.client, "invalidate metadata") result_str = str(result) assert "MESSAGE: CatalogException: Error refreshing authorization policy:" \ in result_str assert "CAUSED BY: ImpalaRuntimeException: Error refreshing authorization policy." \ " Sentry is unavailable. Ensure Sentry is up:" in result_str self._start_sentry_service(SENTRY_CONFIG_FILE) # Calling INVALIDATE METADATA after Sentry is up should not return an error. self.execute_query_expect_success(self.client, "invalidate metadata") finally: self.role_cleanup(unique_role) @SkipIf.sentry_disabled @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args= "--sentry_config=%s --sentry_catalog_polling_frequency_s=3600 " % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_refresh_authorization_", dir=os.getenv("LOG_DIR"))) def test_refresh_authorization(self, unique_role): """Tests refresh authorization statement by adding and removing roles and privileges externally. The long Sentry polling is used so that any authorization metadata updated externally does not get polled by Impala in order to test an an explicit call to refresh authorization statement.""" group_name = grp.getgrnam(getuser()).gr_name self.role_cleanup(unique_role) for sync_ddl in [1, 0]: query_options = {'sync_ddl': sync_ddl} clients = [] if sync_ddl: # When sync_ddl is True, we want to ensure the changes are propagated to all # coordinators. for impalad in self.cluster.impalads: clients.append(impalad.service.create_beeswax_client()) else: clients.append(self.client) try: self.client.execute("create role %s" % unique_role) self.client.execute("grant role %s to group `%s`" % (unique_role, group_name)) self.client.execute("grant refresh on server to %s" % unique_role) self.validate_refresh_authorization_roles( unique_role, query_options, clients) self.validate_refresh_authorization_privileges( unique_role, query_options, clients) finally: self.role_cleanup(unique_role) def validate_refresh_authorization_roles(self, unique_role, query_options, clients): """This method tests refresh authorization statement by adding and removing roles externally.""" try: # Create two roles inside Impala. self.client.execute("create role %s_internal1" % unique_role) self.client.execute("create role %s_internal2" % unique_role) # Drop an existing role (_internal1) outside Impala. role = "%s_internal1" % unique_role subprocess.check_call([ "/bin/bash", "-c", "%s/bin/sentryShell --conf %s/sentry-site.xml -dr -r %s" % (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role) ], stdout=sys.stdout, stderr=sys.stderr) result = self.execute_query_expect_success(self.client, "show roles") assert any(role in x for x in result.data) self.execute_query_expect_success(self.client, "refresh authorization", query_options=query_options) for client in clients: result = self.execute_query_expect_success( client, "show roles") assert not any(role in x for x in result.data) # Add a new role outside Impala. role = "%s_external" % unique_role subprocess.check_call([ "/bin/bash", "-c", "%s/bin/sentryShell --conf %s/sentry-site.xml -cr -r %s" % (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role) ], stdout=sys.stdout, stderr=sys.stderr) result = self.execute_query_expect_success(self.client, "show roles") assert not any(role in x for x in result.data) self.execute_query_expect_success(self.client, "refresh authorization", query_options=query_options) for client in clients: result = self.execute_query_expect_success( client, "show roles") assert any(role in x for x in result.data) finally: for suffix in ["internal1", "internal2", "external"]: self.role_cleanup("%s_%s" % (unique_role, suffix)) def validate_refresh_authorization_privileges(self, unique_role, query_options, clients): """This method tests refresh authorization statement by adding and removing privileges externally.""" # Grant select privilege outside Impala. subprocess.check_call([ "/bin/bash", "-c", "%s/bin/sentryShell --conf %s/sentry-site.xml -gpr -p " "'server=server1->db=functional->table=alltypes->action=select' -r %s" % (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), unique_role) ], stdout=sys.stdout, stderr=sys.stderr) # Before refresh authorization, there should only be one refresh privilege. result = self.execute_query_expect_success( self.client, "show grant role %s" % unique_role) assert len(result.data) == 1 assert any("refresh" in x for x in result.data) for client in clients: self.execute_query_expect_failure( client, "select * from functional.alltypes limit 1") self.execute_query_expect_success(self.client, "refresh authorization", query_options=query_options) for client in clients: # Ensure select privilege was granted after refresh authorization. result = self.execute_query_expect_success( client, "show grant role %s" % unique_role) assert len(result.data) == 2 assert any("select" in x for x in result.data) assert any("refresh" in x for x in result.data) self.execute_query_expect_success( client, "select * from functional.alltypes limit 1") @staticmethod def _verify_show_dbs(result, unique_name, visibility_privileges=PRIVILEGES): """ Helper function for verifying the results of SHOW DATABASES below. Only show databases with privileges implying any of the visibility_privileges. """ for priv in PRIVILEGES: # Result lines are in the format of "db_name\tdb_comment" db_name = 'db_%s_%s\t' % (unique_name, priv) if priv != 'all' and priv not in visibility_privileges: assert db_name not in result.data else: assert db_name in result.data def _test_sentry_show_stmts_helper(self, unique_role, unique_name, visibility_privileges): unique_db = unique_name + "_db" # TODO: can we create and use a temp username instead of using root? another_user = '******' another_user_grp = 'root' self.role_cleanup(unique_role) try: self.client.execute("create role %s" % unique_role) self.client.execute("grant create on server to role %s" % unique_role) self.client.execute("grant drop on server to role %s" % unique_role) self.client.execute("grant role %s to group %s" % (unique_role, grp.getgrnam(getuser()).gr_name)) self.client.execute("drop database if exists %s cascade" % unique_db) self.client.execute("create database %s" % unique_db) for priv in PRIVILEGES: self.client.execute("create database db_%s_%s" % (unique_name, priv)) self.client.execute( "grant {0} on database db_{1}_{2} to role {3}".format( priv, unique_name, priv, unique_role)) self.client.execute("create table %s.tbl_%s (i int)" % (unique_db, priv)) self.client.execute( "grant {0} on table {1}.tbl_{2} to role {3}".format( priv, unique_db, priv, unique_role)) self.client.execute("grant role %s to group %s" % (unique_role, another_user_grp)) # Owner (current user) can still see all the owned databases and tables result = self.client.execute("show databases") TestAuthorization._verify_show_dbs(result, unique_name) result = self.client.execute("show tables in %s" % unique_db) assert result.data == ["tbl_%s" % p for p in PRIVILEGES] # Check SHOW DATABASES and SHOW TABLES using another username # Create another client so we can user another username root_impalad_client = self.create_impala_client() result = self.execute_query_expect_success(root_impalad_client, "show databases", user=another_user) TestAuthorization._verify_show_dbs(result, unique_name, visibility_privileges) result = self.execute_query_expect_success(root_impalad_client, "show tables in %s" % unique_db, user=another_user) # Only show tables with privileges implying any of the visibility privileges assert 'tbl_all' in result.data # ALL can imply to any privilege for p in visibility_privileges: assert 'tbl_%s' % p in result.data finally: self.client.execute("drop database if exists %s cascade" % unique_db) for priv in PRIVILEGES: self.client.execute( "drop database if exists db_%s_%s cascade" % (unique_name, priv)) self.role_cleanup(unique_role) @SkipIf.sentry_disabled @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s " "--authorized_proxy_user_config=%s=* " "--min_privilege_set_for_show_stmts=select" % (SENTRY_CONFIG_FILE, getuser()), catalogd_args="--sentry_config={0}".format(SENTRY_CONFIG_FILE), sentry_config=SENTRY_CONFIG_FILE_OO, # Enable Sentry Object Ownership sentry_log_dir="{0}/test_sentry_show_stmts_with_select".format( SENTRY_BASE_LOG_DIR)) def test_sentry_show_stmts_with_select(self, unique_role, unique_name): self._test_sentry_show_stmts_helper(unique_role, unique_name, ['select']) @SkipIf.sentry_disabled @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s " "--authorized_proxy_user_config=%s=* " "--min_privilege_set_for_show_stmts=select,insert" % (SENTRY_CONFIG_FILE, getuser()), catalogd_args="--sentry_config={0}".format(SENTRY_CONFIG_FILE), sentry_config=SENTRY_CONFIG_FILE_OO, # Enable Sentry Object Ownership sentry_log_dir="{0}/test_sentry_show_stmts_with_select_insert".format( SENTRY_BASE_LOG_DIR)) def test_sentry_show_stmts_with_select_insert(self, unique_role, unique_name): self._test_sentry_show_stmts_helper(unique_role, unique_name, ['select', 'insert']) @SkipIf.sentry_disabled @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s " "--authorized_proxy_user_config=%s=* " "--min_privilege_set_for_show_stmts=any" % (SENTRY_CONFIG_FILE, getuser()), catalogd_args="--sentry_config={0}".format(SENTRY_CONFIG_FILE), sentry_config=SENTRY_CONFIG_FILE_OO, # Enable Sentry Object Ownership sentry_log_dir="{0}/test_sentry_show_stmts_with_any".format( SENTRY_BASE_LOG_DIR)) def test_sentry_show_stmts_with_any(self, unique_role, unique_name): self._test_sentry_show_stmts_helper(unique_role, unique_name, PRIVILEGES) def _test_ranger_show_stmts_helper(self, unique_name, visibility_privileges): unique_db = unique_name + "_db" admin_client = self.create_impala_client() try: admin_client.execute("drop database if exists %s cascade" % unique_db, user=ADMIN) admin_client.execute("create database %s" % unique_db, user=ADMIN) for priv in PRIVILEGES: admin_client.execute("create database db_%s_%s" % (unique_name, priv)) admin_client.execute( "grant {0} on database db_{1}_{2} to user {3}".format( priv, unique_name, priv, getuser())) admin_client.execute("create table %s.tbl_%s (i int)" % (unique_db, priv)) admin_client.execute( "grant {0} on table {1}.tbl_{2} to user {3}".format( priv, unique_db, priv, getuser())) # Admin can still see all the databases and tables result = admin_client.execute("show databases") TestAuthorization._verify_show_dbs(result, unique_name) result = admin_client.execute("show tables in %s" % unique_db) assert result.data == ["tbl_%s" % p for p in PRIVILEGES] # Check SHOW DATABASES and SHOW TABLES using another username result = self.client.execute("show databases") TestAuthorization._verify_show_dbs(result, unique_name, visibility_privileges) result = self.client.execute("show tables in %s" % unique_db) # Only show tables with privileges implying any of the visibility privileges assert 'tbl_all' in result.data # ALL can imply to any privilege for p in visibility_privileges: assert 'tbl_%s' % p in result.data finally: admin_client.execute("drop database if exists %s cascade" % unique_db) for priv in PRIVILEGES: admin_client.execute( "drop database if exists db_%s_%s cascade" % (unique_name, priv)) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger " "--min_privilege_set_for_show_stmts=select", catalogd_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger") def test_ranger_show_stmts_with_select(self, unique_name): self._test_ranger_show_stmts_helper(unique_name, ['select']) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger " "--min_privilege_set_for_show_stmts=select,insert", catalogd_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger") def test_ranger_show_stmts_with_select_insert(self, unique_name): self._test_ranger_show_stmts_helper(unique_name, ['select', 'insert']) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger " "--min_privilege_set_for_show_stmts=any", catalogd_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger") def test_ranger_show_stmts_with_any(self, unique_name): self._test_ranger_show_stmts_helper(unique_name, PRIVILEGES) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger " "--num_check_authorization_threads=%d" % (random.randint(2, 128)), catalogd_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger") def test_num_check_authorization_threads_with_ranger(self, unique_name): self._test_ranger_show_stmts_helper(unique_name, PRIVILEGES) @SkipIf.sentry_disabled @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s " "--authorized_proxy_user_config=%s=* " "--num_check_authorization_threads=%d" % (SENTRY_CONFIG_FILE, getuser(), random.randint(2, 128)), catalogd_args="--sentry_config={0}".format(SENTRY_CONFIG_FILE), sentry_config=SENTRY_CONFIG_FILE_OO, # Enable Sentry Object Ownership sentry_log_dir="{0}/test_num_check_authorization_threads_with_sentry". format(SENTRY_BASE_LOG_DIR)) def test_num_check_authorization_threads_with_sentry( self, unique_role, unique_name): self._test_sentry_show_stmts_helper(unique_role, unique_name, PRIVILEGES)
class HS2TestSuite(ImpalaTestSuite): # This DB will be created/dropped for every HS2TestSuite subclass. Make the name unique # so different test suites don't clobber each other's DBs. The [2:] is to remove the # "0." from the random floating-point number. TEST_DB = "hs2_db" + str(random.random())[2:] HS2_V6_COLUMN_TYPES = ["boolVal", "stringVal", "byteVal", "i16Val", "i32Val", "i64Val", "doubleVal", "binaryVal"] def setup(self): self.cleanup_db(self.TEST_DB) host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) self.client.execute("create database %s" % self.TEST_DB) def teardown(self): self.cleanup_db(self.TEST_DB) if self.socket: self.socket.close() @staticmethod def check_response( response, expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix=None ): assert response.status.statusCode == expected_status_code if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS and expected_error_prefix is not None: assert response.status.errorMessage.startswith(expected_error_prefix) def close(self, op_handle): close_op_req = TCLIService.TCloseOperationReq() close_op_req.operationHandle = op_handle close_op_resp = self.hs2_client.CloseOperation(close_op_req) assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS def get_num_rows(self, result_set): # rows will always be set, so the only way to tell if we should use it is to see if # any columns are set if result_set.columns is None or len(result_set.columns) == 0: return len(result_set.rows) assert result_set.columns is not None for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(result_set.columns[0], col_type) if typed_col != None: return len(typed_col.values) assert False def fetch(self, handle, orientation, size, expected_num_rows=None): """Fetches at most size number of rows from the query identified by the given operation handle. Uses the given fetch orientation. Asserts that the fetch returns a success status, and that the number of rows returned is equal to size, or equal to the given expected_num_rows (if one was given).""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows = size if expected_num_rows is not None: num_rows = expected_num_rows assert self.get_num_rows(fetch_results_resp.results) == num_rows return fetch_results_resp def fetch_until(self, handle, orientation, size): """Tries to fetch exactly 'size' rows from the given query handle, with the given fetch orientation. If fewer rows than 'size' are returned by the first fetch, repeated fetches are issued until either 0 rows are returned, or the number of rows fetched is equal to 'size'""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows = size num_rows_fetched = self.get_num_rows(fetch_results_resp.results) while num_rows_fetched < size: fetch_results_req.maxRows = size - num_rows_fetched fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) last_fetch_size = self.get_num_rows(fetch_results_resp.results) assert last_fetch_size > 0 num_rows_fetched += last_fetch_size assert num_rows_fetched == size def fetch_fail(self, handle, orientation, expected_error_prefix): """Attempts to fetch rows from the query identified by the given operation handle. Asserts that the fetch returns an error with an error message matching the given expected_error_prefix.""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix) return fetch_results_resp
class TestAuthorization(CustomClusterTestSuite): AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR')) def setup(self): host, port = (self.cluster.impalads[0].service.hostname, self.cluster.impalads[0].service.hs2_port) self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True) def __execute_hs2_stmt(self, statement, verify=True): """ Executes an hs2 statement :param statement: the statement to execute :param verify: If set to true, will thrown an exception on a failed hs2 execution :return: the result of execution """ from tests.hs2.test_hs2 import TestHS2 execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = statement result = self.hs2_client.ExecuteStatement(execute_statement_req) if verify: TestHS2.check_response(result) return result def __open_hs2(self, user, configuration, verify=True): """ Open a session with hs2 :param user: the user to open the session :param configuration: the configuration for the session :param verify: If set to true, will thrown an exception on failed session open :return: the result of opening the session """ from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = user open_session_req.configuration = configuration resp = self.hs2_client.OpenSession(open_session_req) if verify: TestHS2.check_response(resp) return resp @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 " "--sentry_config={0} " "--authorization_policy_provider_class=" "org.apache.impala.service.CustomClusterResourceAuthorizationProvider" .format(SENTRY_CONFIG_FILE), catalogd_args="--sentry_config={0} " "--authorization_policy_provider_class=" "org.apache.impala.service.CustomClusterResourceAuthorizationProvider" .format(SENTRY_CONFIG_FILE), sentry_config=SENTRY_CONFIG_FILE) def test_custom_authorization_provider(self, unique_role): try: self.session_handle = self.__open_hs2(getuser(), dict()).sessionHandle self.__execute_hs2_stmt("create role {0}".format(unique_role)) self.__execute_hs2_stmt("grant role {0} to group {1}" .format(unique_role, grp.getgrnam(getuser()).gr_name)) self.__execute_hs2_stmt("grant select on table tpch.lineitem to role {0}" .format(unique_role)) bad_resp = self.__execute_hs2_stmt("describe tpch_seq.lineitem", False) assert 'User \'%s\' does not have privileges to access' % getuser() in \ str(bad_resp) self.__execute_hs2_stmt("describe tpch.lineitem") finally: self.__execute_hs2_stmt("drop role {0}".format(unique_role)) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 " "--authorized_proxy_user_config=hue={0}".format(getuser()), catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE, sentry_config=SENTRY_CONFIG_FILE) def test_access_runtime_profile(self, unique_role, unique_name): unique_db = unique_name + "_db" try: self.session_handle = self.__open_hs2(getuser(), dict()).sessionHandle self.__execute_hs2_stmt("create role {0}".format(unique_role)) self.__execute_hs2_stmt("grant create on server to role {0}".format(unique_role)) self.__execute_hs2_stmt("grant all on database tpch to role {0}" .format(unique_role)) self.__execute_hs2_stmt("grant select on table functional.complex_view to role {0}" .format(unique_role)) self.__execute_hs2_stmt("grant role {0} to group {1}" .format(unique_role, grp.getgrnam(getuser()).gr_name)) # Create db with permissions self.__execute_hs2_stmt("create database {0}".format(unique_db)) self.__execute_hs2_stmt("grant all on database {0} to role {1}" .format(unique_db, unique_role)) # Current user can't access view's underlying tables bad_resp = self.__execute_hs2_stmt("explain select * from functional.complex_view", False) assert 'User \'%s\' does not have privileges to EXPLAIN' % getuser() in \ str(bad_resp) # User should not have access to the runtime profile self.__run_stmt_and_verify_profile_access("select * from functional.complex_view", False, False) self.__run_stmt_and_verify_profile_access("select * from functional.complex_view", False, True) # Repeat as a delegated user self.session_handle = \ self.__open_hs2('hue', {'impala.doas.user': getuser()}).sessionHandle # User should not have access to the runtime profile self.__run_stmt_and_verify_profile_access("select * from functional.complex_view", False, False) self.__run_stmt_and_verify_profile_access("select * from functional.complex_view", False, True) # Create a view for which the user has access to the underlying tables. self.session_handle = self.__open_hs2(getuser(), dict()).sessionHandle self.__execute_hs2_stmt( "create view if not exists {0}.customer_view as select * from tpch.customer " "limit 1".format(unique_db)) # User should be able to run EXPLAIN self.__execute_hs2_stmt("explain select * from {0}.customer_view" .format(unique_db)) # User should have access to the runtime profile and exec summary self.__run_stmt_and_verify_profile_access("select * from {0}.customer_view" .format(unique_db), True, False) self.__run_stmt_and_verify_profile_access("select * from {0}.customer_view" .format(unique_db), True, True) # Repeat as a delegated user self.session_handle = \ self.__open_hs2('hue', {'impala.doas.user': getuser()}).sessionHandle # Delegated user is the current user self.__run_stmt_and_verify_profile_access("select * from {0}.customer_view" .format(unique_db), True, False) self.__run_stmt_and_verify_profile_access("select * from {0}.customer_view" .format(unique_db), True, True) finally: self.__execute_hs2_stmt("grant all on server to role {0}".format(unique_role)) self.__execute_hs2_stmt("drop view if exists {0}.customer_view".format(unique_db)) self.__execute_hs2_stmt("drop table if exists {0}.customer".format(unique_db)) self.__execute_hs2_stmt("drop database if exists {0}".format(unique_db)) self.__execute_hs2_stmt("drop role {0}".format(unique_role)) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 " "--authorized_proxy_user_config=foo=bar;hue={0} " "--abort_on_failed_audit_event=false " "--audit_event_log_dir={1}" .format(getuser(), AUDIT_LOG_DIR), catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE, sentry_config=SENTRY_CONFIG_FILE) def test_user_impersonation(self, unique_role): """End-to-end user impersonation + authorization test""" self.__test_impersonation(unique_role) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 " "--authorized_proxy_user_config=hue=bar " "--authorized_proxy_group_config=foo=bar;hue={0} " "--abort_on_failed_audit_event=false " "--audit_event_log_dir={1}" .format(grp.getgrgid(os.getgid()).gr_name, AUDIT_LOG_DIR), catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE, sentry_config=SENTRY_CONFIG_FILE) def test_group_impersonation(self, unique_role): """End-to-end group impersonation + authorization test""" self.__test_impersonation(unique_role) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s\ --authorized_proxy_user_config=foo=bar\ --authorized_proxy_group_config=foo=bar\ --abort_on_failed_audit_event=false\ --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, AUDIT_LOG_DIR)) def test_no_matching_user_and_group_impersonation(self): open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = '******' open_session_req.configuration = dict() open_session_req.configuration['impala.doas.user'] = '******' resp = self.hs2_client.OpenSession(open_session_req) assert 'User \'hue\' is not authorized to delegate to \'abc\'' in str(resp) def __test_impersonation(self, role): """End-to-end impersonation + authorization test. Expects authorization to be configured before running this test""" # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import # the module within this test function, rather than as a top-level import. This way # the tests in that module will not get pulled when executing this test suite. The fix # is to split the utility code out of the TestHS2 class and support HS2 as a first # class citizen in our test framework. from tests.hs2.test_hs2 import TestHS2 try: self.session_handle = self.__open_hs2(getuser(), dict()).sessionHandle self.__execute_hs2_stmt("create role {0}".format(role)) self.__execute_hs2_stmt("grant all on table tpch.lineitem to role {0}" .format(role)) self.__execute_hs2_stmt("grant role {0} to group {1}" .format(role, grp.getgrnam(getuser()).gr_name)) self.__execute_hs2_stmt("grant role {0} to group {1}" .format(role, grp.getgrgid(os.getgid()).gr_name)) # Try to query a table we are not authorized to access self.session_handle = self.__open_hs2('hue', {'impala.doas.user': getuser()}).sessionHandle bad_resp = self.__execute_hs2_stmt("describe tpch_seq.lineitem", False) assert 'User \'%s\' does not have privileges to access' % getuser() in\ str(bad_resp) assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\ 'No matching audit event recorded in time window' # Now try the same operation on a table we are authorized to access. good_resp = self.__execute_hs2_stmt("describe tpch.lineitem") TestHS2.check_response(good_resp) # Verify the correct user information is in the runtime profile query_id = operation_id_to_query_id( good_resp.operationHandle.operationId) profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id) self.__verify_profile_user_fields(profile_page, effective_user=getuser(), delegated_user=getuser(), connected_user='******') # Try to user we are not authorized to delegate to. resp = self.__open_hs2('hue', {'impala.doas.user': '******'}, False) assert 'User \'hue\' is not authorized to delegate to \'some_user\'' in str(resp) # Create a new session which does not have a do_as_user and run a simple query. self.session_handle = self.__open_hs2('hue', dict()).sessionHandle resp = self.__execute_hs2_stmt("select 1") # Verify the correct user information is in the runtime profile. Since there is # no do_as_user the Delegated User field should be empty. query_id = operation_id_to_query_id(resp.operationHandle.operationId) profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id) self.__verify_profile_user_fields(profile_page, effective_user='******', delegated_user='', connected_user='******') finally: self.session_handle = self.__open_hs2(getuser(), dict()).sessionHandle self.__execute_hs2_stmt("grant all on server to role {0}".format(role)) self.__execute_hs2_stmt("grant role {0} to group {1}" .format(role, grp.getgrnam(getuser()).gr_name)) self.__execute_hs2_stmt("drop role {0}".format(role)) def __verify_profile_user_fields(self, profile_str, effective_user, connected_user, delegated_user): """Verifies the given runtime profile string contains the specified values for User, Connected User, and Delegated User""" assert '\n User: %s\n' % effective_user in profile_str assert '\n Connected User: %s\n' % connected_user in profile_str assert '\n Delegated User: %s\n' % delegated_user in profile_str def __wait_for_audit_record(self, user, impersonator, timeout_secs=30): """Waits until an audit log record is found that contains the given user and impersonator, or until the timeout is reached. """ # The audit event might not show up immediately (the audit logs are flushed to disk # on regular intervals), so poll the audit event logs until a matching record is # found. start_time = time() while time() - start_time < timeout_secs: for audit_file_name in os.listdir(self.AUDIT_LOG_DIR): if self.__find_matching_audit_record(audit_file_name, user, impersonator): return True sleep(1) return False def __find_matching_audit_record(self, audit_file_name, user, impersonator): with open(os.path.join(self.AUDIT_LOG_DIR, audit_file_name)) as audit_log_file: for line in audit_log_file.readlines(): json_dict = json.loads(line) if len(json_dict) == 0: continue if json_dict[min(json_dict)]['user'] == user and\ json_dict[min(json_dict)]['impersonator'] == impersonator: return True return False def __run_stmt_and_verify_profile_access(self, stmt, has_access, close_operation): """Runs 'stmt' and retrieves the runtime profile and exec summary. If 'has_access' is true, it verifies that no runtime profile or exec summary are returned. If 'close_operation' is true, make sure the operation is closed before retrieving the profile and exec summary.""" from tests.hs2.test_hs2 import TestHS2 execute_statement_resp = self.__execute_hs2_stmt(stmt, False) if close_operation: close_operation_req = TCLIService.TCloseOperationReq() close_operation_req.operationHandle = execute_statement_resp.operationHandle TestHS2.check_response(self.hs2_client.CloseOperation(close_operation_req)) get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq() get_profile_req.operationHandle = execute_statement_resp.operationHandle get_profile_req.sessionHandle = self.session_handle get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req) if has_access: TestHS2.check_response(get_profile_resp) assert "Plan: " in get_profile_resp.profile else: assert "User %s is not authorized to access the runtime profile or "\ "execution summary." % (getuser()) in str(get_profile_resp) exec_summary_req = ImpalaHiveServer2Service.TGetExecSummaryReq() exec_summary_req.operationHandle = execute_statement_resp.operationHandle exec_summary_req.sessionHandle = self.session_handle exec_summary_resp = self.hs2_client.GetExecSummary(exec_summary_req) if has_access: TestHS2.check_response(exec_summary_resp) else: assert "User %s is not authorized to access the runtime profile or "\ "execution summary." % (getuser()) in str(exec_summary_resp) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=" + SENTRY_CONFIG_FILE, catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_deprecated_none_", dir=os.getenv("LOG_DIR"))) def test_deprecated_flag_doesnt_show(self): assert_no_files_in_dir_contain(self.impala_log_dir, "Ignoring removed flag " "authorization_policy_file") @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s" % (AUTH_POLICY_FILE), impala_log_dir=tempfile.mkdtemp(prefix="test_deprecated_", dir=os.getenv("LOG_DIR"))) def test_deprecated_flags(self): assert_file_in_dir_contains(self.impala_log_dir, "Ignoring removed flag " "authorization_policy_file") @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_", dir=os.getenv("LOG_DIR"))) def test_catalog_restart(self, unique_role): """IMPALA-7713: Tests that a catalogd restart when authorization is enabled should reset the previous privileges stored in impalad's catalog to avoid stale privilege data in the impalad's catalog.""" def assert_privileges(): result = self.client.execute("show grant role %s_foo" % unique_role) TestAuthorization._check_privileges(result, [["database", "functional", "", "", "", "all", "false"]]) result = self.client.execute("show grant role %s_bar" % unique_role) TestAuthorization._check_privileges(result, [["database", "functional_kudu", "", "", "", "all", "false"]]) result = self.client.execute("show grant role %s_baz" % unique_role) TestAuthorization._check_privileges(result, [["database", "functional_avro", "", "", "", "all", "false"]]) self.role_cleanup(unique_role) try: self.client.execute("create role %s_foo" % unique_role) self.client.execute("create role %s_bar" % unique_role) self.client.execute("create role %s_baz" % unique_role) self.client.execute("grant all on database functional to role %s_foo" % unique_role) self.client.execute("grant all on database functional_kudu to role %s_bar" % unique_role) self.client.execute("grant all on database functional_avro to role %s_baz" % unique_role) assert_privileges() self._start_impala_cluster(["--catalogd_args=--sentry_config=%s" % SENTRY_CONFIG_FILE, "--restart_catalogd_only"]) assert_privileges() finally: self.role_cleanup(unique_role) def role_cleanup(self, role_name_match): """Cleans up any roles that match the given role name.""" for role_name in self.client.execute("show roles").data: if role_name_match in role_name: self.client.execute("drop role %s" % role_name) @staticmethod def _check_privileges(result, expected): def columns(row): cols = row.split("\t") return cols[0:len(cols) - 1] assert map(columns, result.data) == expected @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_", dir=os.getenv("LOG_DIR"))) def test_catalog_object(self, unique_role): """IMPALA-7721: Tests /catalog_object web API for principal and privilege""" self.role_cleanup(unique_role) try: self.client.execute("create role %s" % unique_role) self.client.execute("grant select on database functional to role %s" % unique_role) for service in [self.cluster.catalogd.service, self.cluster.get_first_impalad().service]: obj_dump = service.get_catalog_object_dump("PRINCIPAL", "%s.ROLE" % unique_role) assert "catalog_version" in obj_dump # Get the privilege associated with that principal ID. principal_id = re.search(r"principal_id \(i32\) = (\d+)", obj_dump) assert principal_id is not None obj_dump = service.get_catalog_object_dump("PRIVILEGE", urllib.quote( "server=server1->db=functional->action=select->grantoption=false.%s.ROLE" % principal_id.group(1))) assert "catalog_version" in obj_dump # Get the principal that does not exist. obj_dump = service.get_catalog_object_dump("PRINCIPAL", "doesnotexist.ROLE") assert "CatalogException" in obj_dump # Get the privilege that does not exist. obj_dump = service.get_catalog_object_dump("PRIVILEGE", urllib.quote( "server=server1->db=doesntexist->action=select->grantoption=false.%s.ROLE" % principal_id.group(1))) assert "CatalogException" in obj_dump finally: self.role_cleanup(unique_role) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args="--sentry_config=%s --sentry_catalog_polling_frequency_s=3600" % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_invalidate_metadata_sentry_unavailable_", dir=os.getenv("LOG_DIR"))) def test_invalidate_metadata_sentry_unavailable(self, unique_role): """IMPALA-7824: Tests that running INVALIDATE METADATA when Sentry is unavailable should not cause Impala to hang.""" self.role_cleanup(unique_role) try: group_name = grp.getgrnam(getuser()).gr_name self.client.execute("create role %s" % unique_role) self.client.execute("grant all on server to role %s" % unique_role) self.client.execute("grant role %s to group `%s`" % (unique_role, group_name)) self._stop_sentry_service() # Calling INVALIDATE METADATA when Sentry is unavailable should return an error. result = self.execute_query_expect_failure(self.client, "invalidate metadata") result_str = str(result) assert "MESSAGE: CatalogException: Error refreshing authorization policy:" \ in result_str assert "CAUSED BY: ImpalaRuntimeException: Error refreshing authorization policy." \ " Sentry is unavailable. Ensure Sentry is up:" in result_str self._start_sentry_service(SENTRY_CONFIG_FILE) # Calling INVALIDATE METADATA after Sentry is up should not return an error. self.execute_query_expect_success(self.client, "invalidate metadata") finally: self.role_cleanup(unique_role) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args="--sentry_config=%s --sentry_catalog_polling_frequency_s=3600 " % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_refresh_authorization_", dir=os.getenv("LOG_DIR"))) def test_refresh_authorization(self, unique_role): """Tests refresh authorization statement by adding and removing roles and privileges externally. The long Sentry polling is used so that any authorization metadata updated externally does not get polled by Impala in order to test an an explicit call to refresh authorization statement.""" group_name = grp.getgrnam(getuser()).gr_name self.role_cleanup(unique_role) for sync_ddl in [1, 0]: query_options = {'sync_ddl': sync_ddl} clients = [] if sync_ddl: # When sync_ddl is True, we want to ensure the changes are propagated to all # coordinators. for impalad in self.cluster.impalads: clients.append(impalad.service.create_beeswax_client()) else: clients.append(self.client) try: self.client.execute("create role %s" % unique_role) self.client.execute("grant role %s to group `%s`" % (unique_role, group_name)) self.client.execute("grant refresh on server to %s" % unique_role) self.validate_refresh_authorization_roles(unique_role, query_options, clients) self.validate_refresh_authorization_privileges(unique_role, query_options, clients) finally: self.role_cleanup(unique_role) def validate_refresh_authorization_roles(self, unique_role, query_options, clients): """This method tests refresh authorization statement by adding and removing roles externally.""" try: # Create two roles inside Impala. self.client.execute("create role %s_internal1" % unique_role) self.client.execute("create role %s_internal2" % unique_role) # Drop an existing role (_internal1) outside Impala. role = "%s_internal1" % unique_role subprocess.check_call( ["/bin/bash", "-c", "%s/bin/sentryShell --conf %s/sentry-site.xml -dr -r %s" % (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role)], stdout=sys.stdout, stderr=sys.stderr) result = self.execute_query_expect_success(self.client, "show roles") assert any(role in x for x in result.data) self.execute_query_expect_success(self.client, "refresh authorization", query_options=query_options) for client in clients: result = self.execute_query_expect_success(client, "show roles") assert not any(role in x for x in result.data) # Add a new role outside Impala. role = "%s_external" % unique_role subprocess.check_call( ["/bin/bash", "-c", "%s/bin/sentryShell --conf %s/sentry-site.xml -cr -r %s" % (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role)], stdout=sys.stdout, stderr=sys.stderr) result = self.execute_query_expect_success(self.client, "show roles") assert not any(role in x for x in result.data) self.execute_query_expect_success(self.client, "refresh authorization", query_options=query_options) for client in clients: result = self.execute_query_expect_success(client, "show roles") assert any(role in x for x in result.data) finally: for suffix in ["internal1", "internal2", "external"]: self.role_cleanup("%s_%s" % (unique_role, suffix)) def validate_refresh_authorization_privileges(self, unique_role, query_options, clients): """This method tests refresh authorization statement by adding and removing privileges externally.""" # Grant select privilege outside Impala. subprocess.check_call( ["/bin/bash", "-c", "%s/bin/sentryShell --conf %s/sentry-site.xml -gpr -p " "'server=server1->db=functional->table=alltypes->action=select' -r %s" % (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), unique_role)], stdout=sys.stdout, stderr=sys.stderr) # Before refresh authorization, there should only be one refresh privilege. result = self.execute_query_expect_success(self.client, "show grant role %s" % unique_role) assert len(result.data) == 1 assert any("refresh" in x for x in result.data) for client in clients: self.execute_query_expect_failure(client, "select * from functional.alltypes limit 1") self.execute_query_expect_success(self.client, "refresh authorization", query_options=query_options) for client in clients: # Ensure select privilege was granted after refresh authorization. result = self.execute_query_expect_success(client, "show grant role %s" % unique_role) assert len(result.data) == 2 assert any("select" in x for x in result.data) assert any("refresh" in x for x in result.data) self.execute_query_expect_success(client, "select * from functional.alltypes limit 1")
class TestAuthorization(CustomClusterTestSuite): AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR')) def setup(self): host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = TCLIService.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( "--server_name=server1\ --authorization_policy_file=/test-warehouse/authz-policy.ini\ --authorization_policy_provider_class=%s" % "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider" ) def test_custom_authorization_provider(self): from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() # User is 'test_user' (defined in the authorization policy file) open_session_req.username = '******' open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Try to query a table we are not authorized to access. self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch_seq.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) assert 'User \'%s\' does not have privileges to access' % 'test_user' in\ str(execute_statement_resp) # Now try the same operation on a table we are authorized to access. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=/test-warehouse/authz-policy.ini\ --authorized_proxy_user_config=hue=%s\ --audit_event_log_dir=%s" % (getuser(), AUDIT_LOG_DIR)) def test_impersonation(self): """End-to-end impersonation + authorization test. Expects authorization to be configured before running this test""" # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import # the module within this test function, rather than as a top-level import. This way # the tests in that module will not get pulled when executing this test suite. The fix # is to split the utility code out of the TestHS2 class and support HS2 as a first # class citizen in our test framework. from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() # Connected user is 'hue' open_session_req.username = '******' open_session_req.configuration = dict() # Delegated user is the current user open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Try to query a table we are not authorized to access. self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch_seq.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) assert 'User \'%s\' does not have privileges to access' % getuser() in\ str(execute_statement_resp) assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\ 'No matching audit event recorded in time window' # Now try the same operation on a table we are authorized to access. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # Verify the correct user information is in the runtime profile query_id = operation_id_to_query_id( execute_statement_resp.operationHandle.operationId) profile_page = self.cluster.impalads[ 0].service.read_query_profile_page(query_id) self.__verify_profile_user_fields(profile_page, effective_user=getuser(), delegated_user=getuser(), connected_user='******') # Try to user we are not authorized to delegate to. open_session_req.configuration['impala.doas.user'] = '******' resp = self.hs2_client.OpenSession(open_session_req) assert 'User \'hue\' is not authorized to delegate to \'some_user\'' in str( resp) # Create a new session which does not have a do_as_user. open_session_req.username = '******' open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Run a simple query, which should succeed. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = resp.sessionHandle execute_statement_req.statement = "select 1" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # Verify the correct user information is in the runtime profile. Since there is # no do_as_user the Delegated User field should be empty. query_id = operation_id_to_query_id( execute_statement_resp.operationHandle.operationId) profile_page = self.cluster.impalads[ 0].service.read_query_profile_page(query_id) self.__verify_profile_user_fields(profile_page, effective_user='******', delegated_user='', connected_user='******') self.socket.close() self.socket = None def __verify_profile_user_fields(self, profile_str, effective_user, connected_user, delegated_user): """Verifies the given runtime profile string contains the specified values for User, Connected User, and Delegated User""" assert '\n User: %s\n' % effective_user in profile_str assert '\n Connected User: %s\n' % connected_user in profile_str assert '\n Delegated User: %s\n' % delegated_user in profile_str def __wait_for_audit_record(self, user, impersonator, timeout_secs=30): """Waits until an audit log record is found that contains the given user and impersonator, or until the timeout is reached. """ # The audit event might not show up immediately (the audit logs are flushed to disk # on regular intervals), so poll the audit event logs until a matching record is # found. start_time = time() while time() - start_time < timeout_secs: for audit_file_name in os.listdir(self.AUDIT_LOG_DIR): if self.__find_matching_audit_record(audit_file_name, user, impersonator): return True sleep(1) return False def __find_matching_audit_record(self, audit_file_name, user, impersonator): with open(os.path.join(self.AUDIT_LOG_DIR, audit_file_name)) as audit_log_file: for line in audit_log_file.readlines(): json_dict = json.loads(line) if len(json_dict) == 0: continue if json_dict[min(json_dict)]['user'] == user and\ json_dict[min(json_dict)]['impersonator'] == impersonator: return True return False
class HS2TestSuite(ImpalaTestSuite): # This DB will be created/dropped for every HS2TestSuite subclass. Make the name unique # so different test suites don't clobber each other's DBs. The [2:] is to remove the # "0." from the random floating-point number. TEST_DB = 'hs2_db' + str(random.random())[2:] HS2_V6_COLUMN_TYPES = [ 'boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val', 'doubleVal', 'binaryVal' ] def setup(self): self.cleanup_db(self.TEST_DB) host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) self.client.execute("create database %s" % self.TEST_DB) def teardown(self): self.cleanup_db(self.TEST_DB) if self.socket: self.socket.close() @staticmethod def check_response( response, expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix=None): assert response.status.statusCode == expected_status_code if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\ and expected_error_prefix is not None: assert response.status.errorMessage.startswith( expected_error_prefix) def close(self, op_handle): close_op_req = TCLIService.TCloseOperationReq() close_op_req.operationHandle = op_handle close_op_resp = self.hs2_client.CloseOperation(close_op_req) assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS def fetch(self, handle, orientation, size, expected_num_rows=None): """Fetches at most size number of rows from the query identified by the given operation handle. Uses the given fetch orientation. Asserts that the fetch returns a success status, and that the number of rows returned is equal to size, or equal to the given expected_num_rows (it one was given).""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows = size if expected_num_rows is not None: num_rows = expected_num_rows assert len(fetch_results_resp.results.rows) == num_rows return fetch_results_resp def fetch_fail(self, handle, orientation, expected_error_prefix): """Attempts to fetch rows from the query identified by the given operation handle. Asserts that the fetch returns an error with an error message matching the given expected_error_prefix.""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix) return fetch_results_resp
class HS2TestSuite(ImpalaTestSuite): TEST_DB = 'hs2_db' HS2_V6_COLUMN_TYPES = ['boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val', 'doubleVal', 'binaryVal'] def setup(self): self.cleanup_db(self.TEST_DB) host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) def teardown(self): self.cleanup_db(self.TEST_DB) if self.socket: self.socket.close() @staticmethod def check_response(response, expected_status_code = TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix = None): assert response.status.statusCode == expected_status_code if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\ and expected_error_prefix is not None: assert response.status.errorMessage.startswith(expected_error_prefix) def close(self, op_handle): close_op_req = TCLIService.TCloseOperationReq() close_op_req.operationHandle = op_handle close_op_resp = self.hs2_client.CloseOperation(close_op_req) assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS def get_num_rows(self, result_set): # rows will always be set, so the only way to tell if we should use it is to see if # any columns are set if result_set.columns is None or len(result_set.columns) == 0: return len(result_set.rows) assert result_set.columns is not None for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(result_set.columns[0], col_type) if typed_col != None: return len(typed_col.values) assert False def fetch_at_most(self, handle, orientation, size, expected_num_rows = None): """Fetches at most size number of rows from the query identified by the given operation handle. Uses the given fetch orientation. Asserts that the fetch returns a success status, and that the number of rows returned is equal to given expected_num_rows (if given). It is only safe for expected_num_rows to be 0 or 1: Impala does not guarantee that a larger result set will be returned in one go. Use fetch_until() for repeated fetches.""" assert expected_num_rows is None or expected_num_rows in (0, 1) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows = size if expected_num_rows is not None: assert self.get_num_rows(fetch_results_resp.results) == expected_num_rows return fetch_results_resp def fetch_until(self, handle, orientation, size, expected_num_rows = None): """Tries to fetch exactly 'size' rows from the given query handle, with the given fetch orientation, by repeatedly issuing fetch(size - num rows already fetched) calls. Returns fewer than 'size' rows if either a fetch() returns 0 rows (indicating EOS) or 'expected_num_rows' rows are returned. If 'expected_num_rows' is set to None, it defaults to 'size', so that the effect is to both ask for and expect the same number of rows.""" assert expected_num_rows is None or (size >= expected_num_rows) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows_fetched = self.get_num_rows(fetch_results_resp.results) if expected_num_rows is None: expected_num_rows = size while num_rows_fetched < expected_num_rows: # Always try to fetch at most 'size' fetch_results_req.maxRows = size - num_rows_fetched fetch_results_req.orientation = TCLIService.TFetchOrientation.FETCH_NEXT fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) last_fetch_size = self.get_num_rows(fetch_results_resp.results) assert last_fetch_size > 0 num_rows_fetched += last_fetch_size assert num_rows_fetched == expected_num_rows def fetch_fail(self, handle, orientation, expected_error_prefix): """Attempts to fetch rows from the query identified by the given operation handle. Asserts that the fetch returns an error with an error message matching the given expected_error_prefix.""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix) return fetch_results_resp def result_metadata(self, handle): """ Gets the schema for the query identified by the handle """ req = TCLIService.TGetResultSetMetadataReq() req.operationHandle = handle resp = self.hs2_client.GetResultSetMetadata(req) HS2TestSuite.check_response(resp) return resp
class TestHS2(ImpalaTestSuite): def setup(self): host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = TCLIService.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() @staticmethod def check_response(response, expected=TCLIService.TStatusCode.SUCCESS_STATUS): assert response.status.statusCode == expected def test_open_session(self): """Check that a session can be opened""" open_session_req = TCLIService.TOpenSessionReq() TestHS2.check_response(self.hs2_client.OpenSession(open_session_req)) def test_close_session(self): """Test that an open session can be closed""" open_session_req = TCLIService.TOpenSessionReq() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) close_session_req = TCLIService.TCloseSessionReq() close_session_req.sessionHandle = resp.sessionHandle TestHS2.check_response(self.hs2_client.CloseSession(close_session_req)) def test_double_close_session(self): """Test that an already closed session cannot be closed a second time""" open_session_req = TCLIService.TOpenSessionReq() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) close_session_req = TCLIService.TCloseSessionReq() close_session_req.sessionHandle = resp.sessionHandle TestHS2.check_response(self.hs2_client.CloseSession(close_session_req)) # Double close should be an error TestHS2.check_response(self.hs2_client.CloseSession(close_session_req), TCLIService.TStatusCode.ERROR_STATUS) @needs_session def test_execute_select(self): """Test that a simple select statement works""" execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = execute_statement_resp.operationHandle fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) TestHS2.check_response(fetch_results_resp) assert len(fetch_results_resp.results.rows) == 1 assert fetch_results_resp.results.startRowOffset == 0 try: assert not fetch_results_resp.hasMoreRows except AssertionError: pytest.xfail("IMPALA-558") @needs_session def test_get_operation_status(self): """Tests that GetOperationStatus returns a valid result for a running query""" execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) get_operation_status_req = TCLIService.TGetOperationStatusReq() get_operation_status_req.operationHandle = execute_statement_resp.operationHandle get_operation_status_resp = \ self.hs2_client.GetOperationStatus(get_operation_status_req) TestHS2.check_response(get_operation_status_resp) assert get_operation_status_resp.operationState in \ [TCLIService.TOperationState.INITIALIZED_STATE, TCLIService.TOperationState.RUNNING_STATE, TCLIService.TOperationState.FINISHED_STATE] @needs_session def test_malformed_get_operation_status(self): """Tests that a short guid / secret returns an error (regression would be to crash impalad)""" operation_handle = TCLIService.TOperationHandle() operation_handle.operationId = TCLIService.THandleIdentifier() operation_handle.operationId.guid = "short" operation_handle.operationId.secret = "short_secret" assert len(operation_handle.operationId.guid) != 16 assert len(operation_handle.operationId.secret) != 16 operation_handle.operationType = TCLIService.TOperationType.EXECUTE_STATEMENT operation_handle.hasResultSet = False get_operation_status_req = TCLIService.TGetOperationStatusReq() get_operation_status_req.operationHandle = operation_handle get_operation_status_resp = \ self.hs2_client.GetOperationStatus(get_operation_status_req) TestHS2.check_response(get_operation_status_resp, TCLIService.TStatusCode.ERROR_STATUS) err_msg = "(guid size: %d, expected 16, secret size: %d, expected 16)" \ % (len(operation_handle.operationId.guid), len(operation_handle.operationId.secret)) assert err_msg in get_operation_status_resp.status.errorMessage @pytest.mark.execute_serially def test_socket_close_forces_session_close(self): """Test that closing the underlying socket forces the associated session to close. See IMPALA-564""" open_session_req = TCLIService.TOpenSessionReq() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) num_sessions = self.impalad_test_service.get_metric_value( "impala-server.num-open-hiveserver2-sessions") assert num_sessions > 0 self.socket.close() self.socket = None self.impalad_test_service.wait_for_metric_value( "impala-server.num-open-hiveserver2-sessions", num_sessions - 1) @pytest.mark.execute_serially def test_multiple_sessions(self): """Test that multiple sessions on the same socket connection are allowed""" num_sessions = self.impalad_test_service.get_metric_value( "impala-server.num-open-hiveserver2-sessions") session_ids = [] for _ in xrange(5): open_session_req = TCLIService.TOpenSessionReq() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Check that all sessions get different IDs assert resp.sessionHandle not in session_ids session_ids.append(resp.sessionHandle) self.impalad_test_service.wait_for_metric_value( "impala-server.num-open-hiveserver2-sessions", num_sessions + 5) self.socket.close() self.socket = None self.impalad_test_service.wait_for_metric_value( "impala-server.num-open-hiveserver2-sessions", num_sessions) @needs_session def test_get_schemas(self): get_schemas_req = TCLIService.TGetSchemasReq() get_schemas_req.sessionHandle = self.session_handle get_schemas_resp = self.hs2_client.GetSchemas(get_schemas_req) TestHS2.check_response(get_schemas_resp) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = get_schemas_resp.operationHandle fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) TestHS2.check_response(fetch_results_resp) query_id = operation_id_to_query_id( get_schemas_resp.operationHandle.operationId) profile_page = self.impalad_test_service.read_query_profile_page( query_id) # Test fix for IMPALA-619 assert "Sql Statement: GET_SCHEMAS" in profile_page assert "Query Type: DDL" in profile_page
class TestAuthorization(CustomClusterTestSuite): AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR')) def setup(self): host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = TCLIService.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=/test-warehouse/authz-policy.ini\ --authorized_proxy_user_config=hue=%s\ --audit_event_log_dir=%s" % (getuser(), AUDIT_LOG_DIR)) def test_impersonation(self): """End-to-end impersonation + authorization test. Expects authorization to be configured before running this test""" # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import # the module within this test function, rather than as a top-level import. This way # the tests in that module will not get pulled when executing this test suite. The fix # is to split the utility code out of the TestHS2 class and support HS2 as a first # class citizen in our test framework. from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = '******' open_session_req.configuration = dict() open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Try to query a table we are not authorized to access. self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch_seq.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) assert 'User \'%s\' does not have privileges to access' % getuser() in\ str(execute_statement_resp) assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\ 'No matching audit event recorded in time window' # Now try the same operation on a table we are authorized to access. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) TestHS2.check_response(execute_statement_resp) # Try to impersonate as a user we are not authorized to impersonate. open_session_req.configuration['impala.doas.user'] = '******' resp = self.hs2_client.OpenSession(open_session_req) assert 'User \'hue\' is not authorized to impersonate \'some_user\'' in str(resp) self.socket.close() self.socket = None def __wait_for_audit_record(self, user, impersonator, timeout_secs=30): """Waits until an audit log record is found that contains the given user and impersonator, or until the timeout is reached. """ # The audit event might not show up immediately (the audit logs are flushed to disk # on regular intervals), so poll the audit event logs until a matching record is # found. start_time = time() while time() - start_time < timeout_secs: for audit_file_name in os.listdir(self.AUDIT_LOG_DIR): if self.__find_matching_audit_record(audit_file_name, user, impersonator): return True sleep(1) return False def __find_matching_audit_record(self, audit_file_name, user, impersonator): with open(os.path.join(self.AUDIT_LOG_DIR, audit_file_name)) as audit_log_file: for line in audit_log_file.readlines(): json_dict = json.loads(line) if len(json_dict) == 0: continue if json_dict[min(json_dict)]['user'] == user and\ json_dict[min(json_dict)]['impersonator'] == impersonator: return True return False
class TestAuthorization(CustomClusterTestSuite): def setup(self): host, port = (self.cluster.impalads[0].service.hostname, self.cluster.impalads[0].service.hs2_port) self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() def __execute_hs2_stmt(self, statement, verify=True): """ Executes an hs2 statement :param statement: the statement to execute :param verify: If set to true, will thrown an exception on a failed hs2 execution :return: the result of execution """ from tests.hs2.test_hs2 import TestHS2 execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = statement result = self.hs2_client.ExecuteStatement(execute_statement_req) if verify: TestHS2.check_response(result) return result def __open_hs2(self, user, configuration, verify=True): """ Open a session with hs2 :param user: the user to open the session :param configuration: the configuration for the session :param verify: If set to true, will thrown an exception on failed session open :return: the result of opening the session """ from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = user open_session_req.configuration = configuration resp = self.hs2_client.OpenSession(open_session_req) if verify: TestHS2.check_response(resp) return resp @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=ignored_file", impala_log_dir=tempfile.mkdtemp( prefix="test_deprecated_", dir=os.getenv("LOG_DIR"))) def test_deprecated_flags(self): assert_file_in_dir_contains( self.impala_log_dir, "Ignoring removed flag " "authorization_policy_file") @staticmethod def _verify_show_dbs(result, unique_name, visibility_privileges=PRIVILEGES): """ Helper function for verifying the results of SHOW DATABASES below. Only show databases with privileges implying any of the visibility_privileges. """ for priv in PRIVILEGES: # Result lines are in the format of "db_name\tdb_comment" db_name = 'db_%s_%s\t' % (unique_name, priv) if priv != 'all' and priv not in visibility_privileges: assert db_name not in result.data else: assert db_name in result.data def _test_ranger_show_stmts_helper(self, unique_name, visibility_privileges): unique_db = unique_name + "_db" admin_client = self.create_impala_client() try: admin_client.execute("drop database if exists %s cascade" % unique_db, user=ADMIN) admin_client.execute("create database %s" % unique_db, user=ADMIN) for priv in PRIVILEGES: admin_client.execute("create database db_%s_%s" % (unique_name, priv)) admin_client.execute( "grant {0} on database db_{1}_{2} to user {3}".format( priv, unique_name, priv, getuser())) admin_client.execute("create table %s.tbl_%s (i int)" % (unique_db, priv)) admin_client.execute( "grant {0} on table {1}.tbl_{2} to user {3}".format( priv, unique_db, priv, getuser())) # Admin can still see all the databases and tables result = admin_client.execute("show databases") TestAuthorization._verify_show_dbs(result, unique_name) result = admin_client.execute("show tables in %s" % unique_db) assert result.data == ["tbl_%s" % p for p in PRIVILEGES] # Check SHOW DATABASES and SHOW TABLES using another username result = self.client.execute("show databases") TestAuthorization._verify_show_dbs(result, unique_name, visibility_privileges) result = self.client.execute("show tables in %s" % unique_db) # Only show tables with privileges implying any of the visibility privileges assert 'tbl_all' in result.data # ALL can imply to any privilege for p in visibility_privileges: assert 'tbl_%s' % p in result.data finally: admin_client.execute("drop database if exists %s cascade" % unique_db) for priv in PRIVILEGES: admin_client.execute( "drop database if exists db_%s_%s cascade" % (unique_name, priv)) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger " "--min_privilege_set_for_show_stmts=select", catalogd_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger") def test_ranger_show_stmts_with_select(self, unique_name): self._test_ranger_show_stmts_helper(unique_name, ['select']) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger " "--min_privilege_set_for_show_stmts=select,insert", catalogd_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger") def test_ranger_show_stmts_with_select_insert(self, unique_name): self._test_ranger_show_stmts_helper(unique_name, ['select', 'insert']) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger " "--min_privilege_set_for_show_stmts=any", catalogd_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger") def test_ranger_show_stmts_with_any(self, unique_name): self._test_ranger_show_stmts_helper(unique_name, PRIVILEGES) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger " "--num_check_authorization_threads=%d" % (random.randint(2, 128)), catalogd_args="--server-name=server1 --ranger_service_type=hive " "--ranger_app_id=impala --authorization_provider=ranger") def test_num_check_authorization_threads_with_ranger(self, unique_name): self._test_ranger_show_stmts_helper(unique_name, PRIVILEGES)
class HS2TestSuite(ImpalaTestSuite): # This DB will be created/dropped for every HS2TestSuite subclass. Make the name unique # so different test suites don't clobber each other's DBs. The [2:] is to remove the # "0." from the random floating-point number. TEST_DB = 'hs2_db' + str(random.random())[2:] HS2_V6_COLUMN_TYPES = ['boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val', 'doubleVal', 'binaryVal'] def setup(self): self.cleanup_db(self.TEST_DB) host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) self.client.execute("create database %s" % self.TEST_DB) def teardown(self): self.cleanup_db(self.TEST_DB) if self.socket: self.socket.close() @staticmethod def check_response(response, expected_status_code = TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix = None): assert response.status.statusCode == expected_status_code if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\ and expected_error_prefix is not None: assert response.status.errorMessage.startswith(expected_error_prefix) def close(self, op_handle): close_op_req = TCLIService.TCloseOperationReq() close_op_req.operationHandle = op_handle close_op_resp = self.hs2_client.CloseOperation(close_op_req) assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS def fetch(self, handle, orientation, size, expected_num_rows = None): """Fetches at most size number of rows from the query identified by the given operation handle. Uses the given fetch orientation. Asserts that the fetch returns a success status, and that the number of rows returned is equal to size, or equal to the given expected_num_rows (it one was given).""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows = size if expected_num_rows is not None: num_rows = expected_num_rows assert len(fetch_results_resp.results.rows) == num_rows return fetch_results_resp def fetch_fail(self, handle, orientation, expected_error_prefix): """Attempts to fetch rows from the query identified by the given operation handle. Asserts that the fetch returns an error with an error message matching the given expected_error_prefix.""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix) return fetch_results_resp
class HS2TestSuite(ImpalaTestSuite): TEST_DB = 'hs2_db' HS2_V6_COLUMN_TYPES = [ 'boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val', 'doubleVal', 'binaryVal' ] def setup(self): self.cleanup_db(self.TEST_DB) host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) def teardown(self): self.cleanup_db(self.TEST_DB) if self.socket: self.socket.close() @staticmethod def check_response( response, expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix=None): assert response.status.statusCode == expected_status_code if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\ and expected_error_prefix is not None: assert response.status.errorMessage.startswith( expected_error_prefix) def close(self, op_handle): close_op_req = TCLIService.TCloseOperationReq() close_op_req.operationHandle = op_handle close_op_resp = self.hs2_client.CloseOperation(close_op_req) assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS def get_num_rows(self, result_set): # rows will always be set, so the only way to tell if we should use it is to see if # any columns are set if result_set.columns is None or len(result_set.columns) == 0: return len(result_set.rows) assert result_set.columns is not None for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(result_set.columns[0], col_type) if typed_col != None: return len(typed_col.values) assert False def fetch_at_most(self, handle, orientation, size, expected_num_rows=None): """Fetches at most size number of rows from the query identified by the given operation handle. Uses the given fetch orientation. Asserts that the fetch returns a success status, and that the number of rows returned is equal to given expected_num_rows (if given). It is only safe for expected_num_rows to be 0 or 1: Impala does not guarantee that a larger result set will be returned in one go. Use fetch_until() for repeated fetches.""" assert expected_num_rows is None or expected_num_rows in (0, 1) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows = size if expected_num_rows is not None: assert self.get_num_rows( fetch_results_resp.results) == expected_num_rows return fetch_results_resp def fetch_until(self, handle, orientation, size, expected_num_rows=None): """Tries to fetch exactly 'size' rows from the given query handle, with the given fetch orientation, by repeatedly issuing fetch(size - num rows already fetched) calls. Returns fewer than 'size' rows if either a fetch() returns 0 rows (indicating EOS) or 'expected_num_rows' rows are returned. If 'expected_num_rows' is set to None, it defaults to 'size', so that the effect is to both ask for and expect the same number of rows.""" assert expected_num_rows is None or (size >= expected_num_rows) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows_fetched = self.get_num_rows(fetch_results_resp.results) if expected_num_rows is None: expected_num_rows = size while num_rows_fetched < expected_num_rows: # Always try to fetch at most 'size' fetch_results_req.maxRows = size - num_rows_fetched fetch_results_req.orientation = TCLIService.TFetchOrientation.FETCH_NEXT fetch_results_resp = self.hs2_client.FetchResults( fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) last_fetch_size = self.get_num_rows(fetch_results_resp.results) assert last_fetch_size > 0 num_rows_fetched += last_fetch_size assert num_rows_fetched == expected_num_rows def fetch_fail(self, handle, orientation, expected_error_prefix): """Attempts to fetch rows from the query identified by the given operation handle. Asserts that the fetch returns an error with an error message matching the given expected_error_prefix.""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix) return fetch_results_resp def result_metadata(self, handle): """ Gets the schema for the query identified by the handle """ req = TCLIService.TGetResultSetMetadataReq() req.operationHandle = handle resp = self.hs2_client.GetResultSetMetadata(req) HS2TestSuite.check_response(resp) return resp
class TestAuthorization(CustomClusterTestSuite): AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR')) def setup(self): host, port = (self.cluster.impalads[0].service.hostname, self.cluster.impalads[0].service.hs2_port) self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s\ --authorization_policy_provider_class=%s" %\ (AUTH_POLICY_FILE, "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider")) def test_custom_authorization_provider(self): from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() # User is 'test_user' (defined in the authorization policy file) open_session_req.username = '******' open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Try to query a table we are not authorized to access. self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch_seq.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) assert 'User \'%s\' does not have privileges to access' % 'test_user' in\ str(execute_statement_resp) # Now try the same operation on a table we are authorized to access. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s\ --authorized_proxy_user_config=hue=%s" % (AUTH_POLICY_FILE, getuser())) def test_access_runtime_profile(self): from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = getuser() open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Current user can't access view's underlying tables self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "explain select * from functional.complex_view" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) assert 'User \'%s\' does not have privileges to EXPLAIN' % getuser() in\ str(execute_statement_resp) # User should not have access to the runtime profile self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, False) self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, True) # Repeat as a delegated user open_session_req.username = '******' open_session_req.configuration = dict() # Delegated user is the current user open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) self.session_handle = resp.sessionHandle # User should not have access to the runtime profile self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, False) self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, True) # Create a view for which the user has access to the underlying tables. open_session_req.username = getuser() open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = """create view if not exists tpch.customer_view as select * from tpch.customer limit 1""" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # User should be able to run EXPLAIN execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = """explain select * from tpch.customer_view""" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # User should have access to the runtime profile and exec summary self.__run_stmt_and_verify_profile_access( "select * from tpch.customer_view", True, False) self.__run_stmt_and_verify_profile_access( "select * from tpch.customer_view", True, True) # Repeat as a delegated user open_session_req.username = '******' open_session_req.configuration = dict() # Delegated user is the current user open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) self.session_handle = resp.sessionHandle # User should have access to the runtime profile and exec summary self.__run_stmt_and_verify_profile_access( "select * from tpch.customer_view", True, False) self.__run_stmt_and_verify_profile_access( "select * from tpch.customer_view", True, True) # Clean up execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "drop view if exists tpch.customer_view" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s\ --authorized_proxy_user_config=foo=bar;hue=%s\ --abort_on_failed_audit_event=false\ --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, getuser(), AUDIT_LOG_DIR)) def test_user_impersonation(self): """End-to-end user impersonation + authorization test""" self.__test_impersonation() @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( "--server_name=server1\ --authorization_policy_file=%s\ --authorized_proxy_user_config=hue=bar\ --authorized_proxy_group_config=foo=bar;hue=%s\ --abort_on_failed_audit_event=false\ --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, ','.join(get_groups()), AUDIT_LOG_DIR)) def test_group_impersonation(self): """End-to-end group impersonation + authorization test""" self.__test_impersonation() @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s\ --authorized_proxy_user_config=foo=bar\ --authorized_proxy_group_config=foo=bar\ --abort_on_failed_audit_event=false\ --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, AUDIT_LOG_DIR)) def test_no_matching_user_and_group_impersonation(self): open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = '******' open_session_req.configuration = dict() open_session_req.configuration['impala.doas.user'] = '******' resp = self.hs2_client.OpenSession(open_session_req) assert 'User \'hue\' is not authorized to delegate to \'abc\'' in str( resp) def __test_impersonation(self): """End-to-end impersonation + authorization test. Expects authorization to be configured before running this test""" # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import # the module within this test function, rather than as a top-level import. This way # the tests in that module will not get pulled when executing this test suite. The fix # is to split the utility code out of the TestHS2 class and support HS2 as a first # class citizen in our test framework. from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() # Connected user is 'hue' open_session_req.username = '******' open_session_req.configuration = dict() # Delegated user is the current user open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Try to query a table we are not authorized to access. self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch_seq.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) assert 'User \'%s\' does not have privileges to access' % getuser() in\ str(execute_statement_resp) assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\ 'No matching audit event recorded in time window' # Now try the same operation on a table we are authorized to access. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # Verify the correct user information is in the runtime profile query_id = operation_id_to_query_id( execute_statement_resp.operationHandle.operationId) profile_page = self.cluster.impalads[ 0].service.read_query_profile_page(query_id) self.__verify_profile_user_fields(profile_page, effective_user=getuser(), delegated_user=getuser(), connected_user='******') # Try to user we are not authorized to delegate to. open_session_req.configuration['impala.doas.user'] = '******' resp = self.hs2_client.OpenSession(open_session_req) assert 'User \'hue\' is not authorized to delegate to \'some_user\'' in str( resp) # Create a new session which does not have a do_as_user. open_session_req.username = '******' open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Run a simple query, which should succeed. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = resp.sessionHandle execute_statement_req.statement = "select 1" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # Verify the correct user information is in the runtime profile. Since there is # no do_as_user the Delegated User field should be empty. query_id = operation_id_to_query_id( execute_statement_resp.operationHandle.operationId) profile_page = self.cluster.impalads[ 0].service.read_query_profile_page(query_id) self.__verify_profile_user_fields(profile_page, effective_user='******', delegated_user='', connected_user='******') self.socket.close() self.socket = None def __verify_profile_user_fields(self, profile_str, effective_user, connected_user, delegated_user): """Verifies the given runtime profile string contains the specified values for User, Connected User, and Delegated User""" assert '\n User: %s\n' % effective_user in profile_str assert '\n Connected User: %s\n' % connected_user in profile_str assert '\n Delegated User: %s\n' % delegated_user in profile_str def __wait_for_audit_record(self, user, impersonator, timeout_secs=30): """Waits until an audit log record is found that contains the given user and impersonator, or until the timeout is reached. """ # The audit event might not show up immediately (the audit logs are flushed to disk # on regular intervals), so poll the audit event logs until a matching record is # found. start_time = time() while time() - start_time < timeout_secs: for audit_file_name in os.listdir(self.AUDIT_LOG_DIR): if self.__find_matching_audit_record(audit_file_name, user, impersonator): return True sleep(1) return False def __find_matching_audit_record(self, audit_file_name, user, impersonator): with open(os.path.join(self.AUDIT_LOG_DIR, audit_file_name)) as audit_log_file: for line in audit_log_file.readlines(): json_dict = json.loads(line) if len(json_dict) == 0: continue if json_dict[min(json_dict)]['user'] == user and\ json_dict[min(json_dict)]['impersonator'] == impersonator: return True return False def __run_stmt_and_verify_profile_access(self, stmt, has_access, close_operation): """Runs 'stmt' and retrieves the runtime profile and exec summary. If 'has_access' is true, it verifies that no runtime profile or exec summary are returned. If 'close_operation' is true, make sure the operation is closed before retrieving the profile and exec summary.""" from tests.hs2.test_hs2 import TestHS2 execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = stmt execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) if close_operation: close_operation_req = TCLIService.TCloseOperationReq() close_operation_req.operationHandle = execute_statement_resp.operationHandle TestHS2.check_response( self.hs2_client.CloseOperation(close_operation_req)) get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq() get_profile_req.operationHandle = execute_statement_resp.operationHandle get_profile_req.sessionHandle = self.session_handle get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req) if has_access: TestHS2.check_response(get_profile_resp) assert "Plan: " in get_profile_resp.profile else: assert "User %s is not authorized to access the runtime profile or "\ "execution summary." % (getuser()) in str(get_profile_resp) exec_summary_req = ImpalaHiveServer2Service.TGetExecSummaryReq() exec_summary_req.operationHandle = execute_statement_resp.operationHandle exec_summary_req.sessionHandle = self.session_handle exec_summary_resp = self.hs2_client.GetExecSummary(exec_summary_req) if has_access: TestHS2.check_response(exec_summary_resp) else: assert "User %s is not authorized to access the runtime profile or "\ "execution summary." % (getuser()) in str(exec_summary_resp)
class HS2TestSuite(ImpalaTestSuite): HS2_V6_COLUMN_TYPES = ['boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val', 'doubleVal', 'binaryVal'] def setup(self): host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() @staticmethod def check_response(response, expected_status_code = TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix = None): assert response.status.statusCode == expected_status_code if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\ and expected_error_prefix is not None: assert response.status.errorMessage.startswith(expected_error_prefix) @staticmethod def check_invalid_session(response): """Checks that the HS2 API response is the correct response if the session is invalid, i.e. the session doesn't exist or the secret is invalid.""" HS2TestSuite.check_response(response, TCLIService.TStatusCode.ERROR_STATUS, "Invalid session id:") @staticmethod def check_invalid_query(response, expect_legacy_err=False): """Checks that the HS2 API response is the correct response if the query is invalid, i.e. the query doesn't exist, doesn't match the session provided, or the secret is invalid. """ if expect_legacy_err: # Some operations return non-standard errors like "Query id ... not found". expected_err = "Query id" else: # We should standardise on this error message. expected_err = "Invalid query handle:" HS2TestSuite.check_response(response, TCLIService.TStatusCode.ERROR_STATUS, expected_err) @staticmethod def check_profile_access_denied(response, user): """Checks that the HS2 API response is the correct response if the user is not authorised to access the query's profile.""" HS2TestSuite.check_response(response, TCLIService.TStatusCode.ERROR_STATUS, "User {0} is not authorized to access the runtime " "profile or execution summary".format(user)) def close(self, op_handle): close_op_req = TCLIService.TCloseOperationReq() close_op_req.operationHandle = op_handle close_op_resp = self.hs2_client.CloseOperation(close_op_req) assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS @staticmethod def get_num_rows(result_set): # rows will always be set, so the only way to tell if we should use it is to see if # any columns are set if result_set.columns is None or len(result_set.columns) == 0: return len(result_set.rows) assert result_set.columns is not None for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(result_set.columns[0], col_type) if typed_col != None: return len(typed_col.values) assert False def fetch(self, fetch_results_req): """Wrapper around ImpalaHiveServer2Service.FetchResults(fetch_results_req) that issues the given fetch request until the TCLIService.TStatusCode transitions from STILL_EXECUTING_STATUS to SUCCESS_STATUS. If a fetch response contains the STILL_EXECUTING_STATUS then rows are not yet available for consumption (e.g. the query is still running and has not produced any rows yet). This status may be returned to the client if the FETCH_ROWS_TIMEOUT_MS is hit.""" fetch_results_resp = None while fetch_results_resp is None or \ fetch_results_resp.status.statusCode == \ TCLIService.TStatusCode.STILL_EXECUTING_STATUS: fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) return fetch_results_resp def fetch_at_most(self, handle, orientation, size, expected_num_rows = None): """Fetches at most size number of rows from the query identified by the given operation handle. Uses the given fetch orientation. Asserts that the fetch returns a success status, and that the number of rows returned is equal to given expected_num_rows (if given). It is only safe for expected_num_rows to be 0 or 1: Impala does not guarantee that a larger result set will be returned in one go. Use fetch_until() for repeated fetches.""" assert expected_num_rows is None or expected_num_rows in (0, 1) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.fetch(fetch_results_req) if expected_num_rows is not None: assert self.get_num_rows(fetch_results_resp.results) == expected_num_rows return fetch_results_resp def fetch_until(self, handle, orientation, size, expected_num_rows = None): """Tries to fetch exactly 'size' rows from the given query handle, with the given fetch orientation, by repeatedly issuing fetch(size - num rows already fetched) calls. Returns fewer than 'size' rows if either a fetch() returns 0 rows (indicating EOS) or 'expected_num_rows' rows are returned. If 'expected_num_rows' is set to None, it defaults to 'size', so that the effect is to both ask for and expect the same number of rows.""" assert expected_num_rows is None or (size >= expected_num_rows) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.fetch(fetch_results_req) num_rows_fetched = self.get_num_rows(fetch_results_resp.results) if expected_num_rows is None: expected_num_rows = size while num_rows_fetched < expected_num_rows: # Always try to fetch at most 'size' fetch_results_req.maxRows = size - num_rows_fetched fetch_results_req.orientation = TCLIService.TFetchOrientation.FETCH_NEXT fetch_results_resp = self.fetch(fetch_results_req) last_fetch_size = self.get_num_rows(fetch_results_resp.results) assert last_fetch_size > 0 num_rows_fetched += last_fetch_size assert num_rows_fetched == expected_num_rows def fetch_fail(self, handle, orientation, expected_error_prefix): """Attempts to fetch rows from the query identified by the given operation handle. Asserts that the fetch returns an error with an error message matching the given expected_error_prefix.""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix) return fetch_results_resp def result_metadata(self, handle): """ Gets the schema for the query identified by the handle """ req = TCLIService.TGetResultSetMetadataReq() req.operationHandle = handle resp = self.hs2_client.GetResultSetMetadata(req) HS2TestSuite.check_response(resp) return resp def column_results_to_string(self, columns): """Quick-and-dirty way to get a readable string to compare the output of a columnar-oriented query to its expected output""" formatted = "" num_rows = 0 # Determine the number of rows by finding the type of the first column for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(columns[0], col_type) if typed_col != None: num_rows = len(typed_col.values) break for i in xrange(num_rows): row = [] for c in columns: for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(c, col_type) if typed_col != None: indicator = ord(typed_col.nulls[i / 8]) if indicator & (1 << (i % 8)): row.append("NULL") else: row.append(str(typed_col.values[i])) break formatted += (", ".join(row) + "\n") return (num_rows, formatted) def get_operation_status(self, operation_handle): """Executes GetOperationStatus with the given operation handle and returns the TGetOperationStatusResp""" get_operation_status_req = TCLIService.TGetOperationStatusReq() get_operation_status_req.operationHandle = operation_handle get_operation_status_resp = \ self.hs2_client.GetOperationStatus(get_operation_status_req) return get_operation_status_resp def wait_for_operation_state(self, operation_handle, expected_state, \ timeout = 10, interval = 1): """Waits for the operation to reach expected_state by polling GetOperationStatus every interval seconds, returning the TGetOperationStatusResp, or raising an assertion after timeout seconds.""" start_time = time() while (time() - start_time < timeout): get_operation_status_resp = self.get_operation_status(operation_handle) HS2TestSuite.check_response(get_operation_status_resp) if get_operation_status_resp.operationState is expected_state: return get_operation_status_resp sleep(interval) assert False, 'Did not reach expected operation state %s in time, actual state was ' \ '%s' % (expected_state, get_operation_status_resp.operationState) def wait_for_admission_control(self, operation_handle, timeout = 10): """Waits for the admission control processing of the query to complete by polling GetOperationStatus every interval seconds, returning the TGetOperationStatusResp, or raising an assertion after timeout seconds.""" start_time = time() while (time() - start_time < timeout): get_operation_status_resp = self.get_operation_status(operation_handle) HS2TestSuite.check_response(get_operation_status_resp) if TCLIService.TOperationState.INITIALIZED_STATE < \ get_operation_status_resp.operationState < \ TCLIService.TOperationState.PENDING_STATE: return get_operation_status_resp sleep(0.05) assert False, 'Did not complete admission control processing in time, current ' \ 'operation state of query: %s' % (get_operation_status_resp.operationState) def execute_statement(self, statement, conf_overlay=None, expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix=None): """Executes statement and checks if the response meets the expectations. If so, it returns the response.""" execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = statement if conf_overlay: execute_statement_req.confOverlay = conf_overlay execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) HS2TestSuite.check_response(execute_statement_resp, expected_status_code, expected_error_prefix) return execute_statement_resp
class ClientEx: def __init__(self, address): self.tr = TSocket(address[0], address[1]) self.protocol = TBinaryProtocol(self.tr) self.client = Client(self.protocol) self.tr.open() def close(self): self.tr.close() def wallet_balance_get(self, pub_key_bytes): return self.client.WalletBalanceGet(pub_key_bytes) def __fee(self, value): sign = 0 if value < 0.0: value = 1 value = abs(value) expf = 0 if value != 0.0: expf = math.log10(value) if expf >= 0: expf = expf + .5 else: expf = expf - .5 expi = int(expf) value /= math.pow(10, expi) if value >= 1.0: value *= 0.1 expi = expi + 1 exp = expi + 18 if exp < 0 or exp > 28: print('exponent value {0} out of range [0, 28]'.format(exp)) return -1 frac = round(value * 1024) return sign * 32768 + exp * 1024 + frac def transfer_coins(self, integral, fraction, fee, keys): res = self.client.TransactionFlow(self.create_transaction(integral, fraction, fee, keys)) print(res) def create_transaction(self, integral, fraction, fee, keys): tr = Transaction() tr.id = self.client.WalletTransactionsCountGet(keys.public_key_bytes).lastTransactionInnerId + 1 tr.source = keys.public_key_bytes tr.target = keys.target_public_key_bytes tr.amount = Amount() tr.amount.integral = integral tr.amount.fraction = fraction tr.currency = 1 tr.fee = AmountCommission() tr.fee.commission = self.__fee(fee) serial_transaction = pack('=6s32s32slqhbb', # '=' - without alignment' bytearray(tr.id.to_bytes(6, 'little')), # 6s - 6 byte InnerID (char[] C Type) tr.source, # 32s - 32 byte source public key (char[] C Type) tr.target, # 32s - 32 byte target pyblic key (char[] C Type) tr.amount.integral, # i - 4 byte integer(int C Type) tr.amount.fraction, # q - 8 byte integer(long long C Type) tr.fee.commission, # h - 2 byte integer (short C Type) tr.currency, # b - 1 byte integer (signed char C Type) 0 # b - 1 byte userfield_num ) signing_key = ed25519.SigningKey(keys.private_key_bytes) sign = signing_key.sign(serial_transaction) tr.signature = sign return tr def deploy_smart_contract(self, code, fee, keys): res = self.client.TransactionFlow(self.create_transaction_with_smart_contract(code, fee, keys)) print(res) def create_transaction_with_smart_contract(self, code, fee, keys): if code == "": code = 'import com.credits.scapi.annotations.*; import com.credits.scapi.v0.*; public class ' \ 'MySmartContract extends SmartContract { public MySmartContract() {} public String hello2(String ' \ 'say) { return \"Hello\" + say; } }'; tr = Transaction() tr.id = self.client.WalletTransactionsCountGet(keys.public_key_bytes).lastTransactionInnerId + 1 tr.source = keys.public_key_bytes tr.target = keys.target_public_key_bytes tr.amount = Amount() tr.amount.integral = 0 tr.amount.fraction = 0 tr.currency = 1 tr.fee = AmountCommission() tr.fee.commission = self.__fee(fee) serial_transaction = pack('=6s32s32slqhbb', # '=' - without alignment' bytearray(tr.id.to_bytes(6, 'little')), # 6s - 6 byte InnerID (char[] C Type) tr.source, # 32s - 32 byte source public key (char[] C Type) tr.target, # 32s - 32 byte target pyblic key (char[] C Type) tr.amount.integral, # i - 4 byte integer(int C Type) tr.amount.fraction, # q - 8 byte integer(long long C Type) tr.fee.commission, # h - 2 byte integer (short C Type) tr.currency, # b - 1 byte integer (signed char C Type) 1 # b - 1 byte userfield_num ) target = pack('=6s', bytearray(tr.id.to_bytes(6, 'little'))) byte_code = self.client.SmartContractCompile(code) if byte_code.status.code == 0: for bco in byte_code.byteCodeObjects: target = target + bco.byteCode else: print(byte_code.Status.Message) return 'compile error' tr.smartContract = SmartContractInvocation() tr.smartContract.smartContractDeploy = SmartContractDeploy() tr.smartContract.smartContractDeploy.sourceCode = code tr.smartContract.ForgetNewState = False tr.target = hashlib.blake2s(target).hexdigest() uf = bytearray(b'\x11\x00\x01\x00\x00\x00\x00\x015\x00\x02\x12\x00\x00\x00\x00\x15\x00\x03\x11\x00\x00\x00\x00\x02\x00\x04\x00\x12\x00\x05\x11\x00\x01') uf = uf + pack('=6s', self.reverse(len(code))) uf = uf + bytearray(code.encode()) uf = uf + bytearray(b'\x15\x00\x02\x12') uf = uf + self.reverse(len(byte_code.byteCodeObjects)) for bco in byte_code.byteCodeObjects: uf = uf + b'1101' uf = uf + self.reverse(len(bco.name)) uf = uf + bytearray(bco.name.encode()) uf = uf + b'1102' uf = uf + self.reverse(len(bco.byteCode)) uf = uf + bco.byteCode nbco = ByteCodeObject() nbco.name = bco.name nbco.byteCode = bco.byteCode tr.smartContract.smartContractDeploy.byteCodeObjects = [nbco] uf = uf + b'\x00' uf = uf + b'\x11\x00\x03\x00\x00\x00\x00\x08\x00\x04\x00\x00\x00\x00\x00' uf = uf + b'\x00' serial_transaction = serial_transaction + self.reverse(len(uf)) serial_transaction = serial_transaction + uf signing_key = ed25519.SigningKey(keys.private_key_bytes) sign = signing_key.sign(serial_transaction) tr.signature = sign return tr def reverse(self, a): a = a.to_bytes(6, 'little') a = bytearray(a) a.reverse() return a
class HS2TestSuite(ImpalaTestSuite): HS2_V6_COLUMN_TYPES = ['boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val', 'doubleVal', 'binaryVal'] def setup(self): host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() @staticmethod def check_response(response, expected_status_code = TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix = None): assert response.status.statusCode == expected_status_code if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\ and expected_error_prefix is not None: assert response.status.errorMessage.startswith(expected_error_prefix) def close(self, op_handle): close_op_req = TCLIService.TCloseOperationReq() close_op_req.operationHandle = op_handle close_op_resp = self.hs2_client.CloseOperation(close_op_req) assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS def get_num_rows(self, result_set): # rows will always be set, so the only way to tell if we should use it is to see if # any columns are set if result_set.columns is None or len(result_set.columns) == 0: return len(result_set.rows) assert result_set.columns is not None for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(result_set.columns[0], col_type) if typed_col != None: return len(typed_col.values) assert False def fetch_at_most(self, handle, orientation, size, expected_num_rows = None): """Fetches at most size number of rows from the query identified by the given operation handle. Uses the given fetch orientation. Asserts that the fetch returns a success status, and that the number of rows returned is equal to given expected_num_rows (if given). It is only safe for expected_num_rows to be 0 or 1: Impala does not guarantee that a larger result set will be returned in one go. Use fetch_until() for repeated fetches.""" assert expected_num_rows is None or expected_num_rows in (0, 1) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) if expected_num_rows is not None: assert self.get_num_rows(fetch_results_resp.results) == expected_num_rows return fetch_results_resp def fetch_until(self, handle, orientation, size, expected_num_rows = None): """Tries to fetch exactly 'size' rows from the given query handle, with the given fetch orientation, by repeatedly issuing fetch(size - num rows already fetched) calls. Returns fewer than 'size' rows if either a fetch() returns 0 rows (indicating EOS) or 'expected_num_rows' rows are returned. If 'expected_num_rows' is set to None, it defaults to 'size', so that the effect is to both ask for and expect the same number of rows.""" assert expected_num_rows is None or (size >= expected_num_rows) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows_fetched = self.get_num_rows(fetch_results_resp.results) if expected_num_rows is None: expected_num_rows = size while num_rows_fetched < expected_num_rows: # Always try to fetch at most 'size' fetch_results_req.maxRows = size - num_rows_fetched fetch_results_req.orientation = TCLIService.TFetchOrientation.FETCH_NEXT fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) last_fetch_size = self.get_num_rows(fetch_results_resp.results) assert last_fetch_size > 0 num_rows_fetched += last_fetch_size assert num_rows_fetched == expected_num_rows def fetch_fail(self, handle, orientation, expected_error_prefix): """Attempts to fetch rows from the query identified by the given operation handle. Asserts that the fetch returns an error with an error message matching the given expected_error_prefix.""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix) return fetch_results_resp def result_metadata(self, handle): """ Gets the schema for the query identified by the handle """ req = TCLIService.TGetResultSetMetadataReq() req.operationHandle = handle resp = self.hs2_client.GetResultSetMetadata(req) HS2TestSuite.check_response(resp) return resp def column_results_to_string(self, columns): """Quick-and-dirty way to get a readable string to compare the output of a columnar-oriented query to its expected output""" formatted = "" num_rows = 0 # Determine the number of rows by finding the type of the first column for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(columns[0], col_type) if typed_col != None: num_rows = len(typed_col.values) break for i in xrange(num_rows): row = [] for c in columns: for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(c, col_type) if typed_col != None: indicator = ord(typed_col.nulls[i / 8]) if indicator & (1 << (i % 8)): row.append("NULL") else: row.append(str(typed_col.values[i])) break formatted += (", ".join(row) + "\n") return (num_rows, formatted) def get_operation_status(self, operation_handle): """Executes GetOperationStatus with the given operation handle and returns the TGetOperationStatusResp""" get_operation_status_req = TCLIService.TGetOperationStatusReq() get_operation_status_req.operationHandle = operation_handle get_operation_status_resp = \ self.hs2_client.GetOperationStatus(get_operation_status_req) return get_operation_status_resp def wait_for_operation_state(self, operation_handle, expected_state, \ timeout = 10, interval = 1): """Waits for the operation to reach expected_state by polling GetOperationStatus every interval seconds, returning the TGetOperationStatusResp, or raising an assertion after timeout seconds.""" start_time = time() while (time() - start_time < timeout): get_operation_status_resp = self.get_operation_status(operation_handle) HS2TestSuite.check_response(get_operation_status_resp) if get_operation_status_resp.operationState is expected_state: return get_operation_status_resp sleep(interval) assert False, 'Did not reach expected operation state %s in time, actual state was ' \ '%s' % (expected_state, get_operation_status_resp.operationState) def wait_for_admission_control(self, operation_handle, timeout = 10): """Waits for the admission control processing of the query to complete by polling GetOperationStatus every interval seconds, returning the TGetOperationStatusResp, or raising an assertion after timeout seconds.""" start_time = time() while (time() - start_time < timeout): get_operation_status_resp = self.get_operation_status(operation_handle) HS2TestSuite.check_response(get_operation_status_resp) if TCLIService.TOperationState.INITIALIZED_STATE < \ get_operation_status_resp.operationState < \ TCLIService.TOperationState.PENDING_STATE: return get_operation_status_resp sleep(0.05) assert False, 'Did not complete admission control processing in time, current ' \ 'operation state of query: %s' % (get_operation_status_resp.operationState) def execute_statement(self, statement, conf_overlay=None, expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix=None): """Executes statement and checks if the response meets the expectations. If so, it returns the response.""" execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = statement if conf_overlay: execute_statement_req.confOverlay = conf_overlay execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) HS2TestSuite.check_response(execute_statement_resp, expected_status_code, expected_error_prefix) return execute_statement_resp
class ClientEx: def __init__(self, address): self.tr = TSocket(address[0], address[1]) self.protocol = TBinaryProtocol(self.tr) self.client = Client(self.protocol) self.tr.open() def close(self): self.tr.close() def wallet_balance_get(self, pub_key_bytes): return self.client.WalletBalanceGet(pub_key_bytes) def double_to_fee(self, value): fee_comission = 0 a = True if value < 0.: fee_comission += 32768 else: fee_comission += (32768 if value < 0. else 0) value = math.fabs(value) expf = (0. if value == 0. else math.log10(value)) expi = int(expf + 0.5 if expf >= 0. else expf - 0.5) value /= math.pow(10, expi) if value >= 1.: value *= 0.1 expi += 1 fee_comission += int(1024 * (expi + 18)) fee_comission += int(value * 1024) return fee_comission def __fee(self, value): sign = 0 if value < 0.0: value = 1 value = abs(value) expf = 0 if value != 0.0: expf = math.log10(value) if expf >= 0: expf = expf + .5 else: expf = expf - .5 expi = int(expf) value /= math.pow(10, expi) if value >= 1.0: value *= 0.1 expi = expi + 1 exp = expi + 18 if exp < 0 or exp > 28: print('exponent value {0} out of range [0, 28]'.format(exp)) return -1 frac = round(value * 1024) return sign * 32768 + exp * 1024 + frac def transfer_coins(self, integral, fraction, fee, keys): res = self.client.TransactionFlow( self.create_transaction(integral, fraction, fee, keys)) print(res) def create_transaction(self, integral, fraction, fee, keys): tr = Transaction() tr.id = self.client.WalletTransactionsCountGet( keys.public_key_bytes).lastTransactionInnerId + 1 tr.source = keys.public_key_bytes tr.target = keys.target_public_key_bytes tr.amount = Amount() tr.amount.integral = integral tr.amount.fraction = fraction tr.currency = 1 tr.fee = AmountCommission() tr.fee.commission = self.__fee(fee) serial_transaction = pack( '=6s32s32slqhbb', # '=' - without alignment' bytearray(tr.id.to_bytes( 6, 'little')), # 6s - 6 byte InnerID (char[] C Type) tr.source, # 32s - 32 byte source public key (char[] C Type) tr.target, # 32s - 32 byte target pyblic key (char[] C Type) tr.amount.integral, # i - 4 byte integer(int C Type) tr.amount.fraction, # q - 8 byte integer(long long C Type) tr.fee.commission, # h - 2 byte integer (short C Type) tr.currency, # b - 1 byte integer (signed char C Type) 0 # b - 1 byte userfield_num ) signing_key = ed25519.SigningKey(keys.private_key_bytes) sign = signing_key.sign(serial_transaction) tr.signature = sign return tr def deploy_smart_contract(self, code, fee, keys): res = self.client.TransactionFlow( self.create_transaction_with_smart_contract(code, fee, keys)) print(res) def createContractAddress(self, source, tId, contract): tmpBytes = bytearray() tmpBytes.extend(source) tmpBytes.extend(tId) for a in contract.smartContractDeploy.byteCodeObjects: tmpBytes.extend(a.byteCode) res = hashlib.blake2s() res.update(tmpBytes) return res.digest() def normalizeCode(self, javaText): javaText = javaText.replace('\r', ' ').replace('\t', ' ').replace('{', ' {') while ' ' in javaText: javaText = javaText.replace(' ', ' ') return javaText def compile_smart(self, contract_body): if self.client == None: return None res = self.client.SmartContractCompile(contract_body) return res def create_transaction_with_smart_contract(self, code, fee, keys): tr = Transaction() contract = SmartContractInvocation() contract.smartContractDeploy = SmartContractDeploy() if code == "": code = 'import com.credits.scapi.annotations.*; import com.credits.scapi.v0.*; public class ' \ 'MySmartContract extends SmartContract { public MySmartContract() {} public String hello2(String ' \ 'say) { return \"Hello\" + say; } }' contractText = self.normalizeCode(code) result = self.compile_smart(contractText) contract.smartContractDeploy.byteCodeObjects = result.byteCodeObjects tr.smartContract = contract tr.smartContract.smartContractDeploy.sourceCode = contractText tr.source = keys.public_key_bytes w = self.client.WalletTransactionsCountGet(tr.source) lastInnerId = bytearray( (w.lastTransactionInnerId + 1).to_bytes(6, 'little')) tr.id = int.from_bytes(lastInnerId, byteorder='little', signed=False) tr.target = self.createContractAddress(tr.source, lastInnerId, contract) tr.amount = Amount() tr.amount.integral = 0 tr.amount.fraction = 0 tr.balance = Amount() tr.balance.integral = 0 tr.balance.fraction = 0 tr.currency = 1 tr.fee = AmountCommission() tr.fee.commission = self.double_to_fee(fee) tr.userFields = "" ufNum1 = bytearray(b'\x01') contract.smartContractDeploy.hashState = "" contract.smartContractDeploy.tokenStandard = 0 contract.method = "" contract.params = [] contract.usedContracts = [] contract.forgetNewState = False transportOut = TMemoryBuffer() protocolOut = TBinaryProtocol(transportOut) contract.write(protocolOut) scBytes = transportOut.getvalue() sMap = '=6s32s32slqhb1s4s' + str( len(scBytes) ) + 's' #4s' + str(scriptLength) + 's4s' + str(codeNameLength) + 's4s' + str(codeLength) + 's' #len(userField_bytes) serial_transaction_for_sign = pack( sMap, #'=' - without alignment lastInnerId, #6s - 6 byte InnerID (char[] C Type) tr.source, #32s - 32 byte source public key (char[] C Type) tr.target, #32s - 32 byte target pyblic key (char[] C Type) tr.amount.integral, #i - 4 byte integer(int C Type) tr.amount.fraction, #q - 8 byte integer(long long C Type) tr.fee.commission, #h - 2 byte integer (short C Type) tr.currency, #b - 1 byte integer (signed char C Type) ufNum1, bytes(len(scBytes).to_bytes(4, byteorder="little")), scBytes) signing_key = ed25519.SigningKey( keys.private_key_bytes) # Create object for calulate signing tr.signature = signing_key.sign(serial_transaction_for_sign) return tr def reverse(self, a): a = a.to_bytes(6, 'little') a = bytearray(a) a.reverse() return a
class TestAuthorization(CustomClusterTestSuite): AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR')) def setup(self): host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = TCLIService.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=/test-warehouse/authz-policy.ini\ --authorized_proxy_user_config=hue=%s\ --audit_event_log_dir=%s" % (getuser(), AUDIT_LOG_DIR)) def test_impersonation(self): """End-to-end impersonation + authorization test. Expects authorization to be configured before running this test""" # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import # the module within this test function, rather than as a top-level import. This way # the tests in that module will not get pulled when executing this test suite. The fix # is to split the utility code out of the TestHS2 class and support HS2 as a first # class citizen in our test framework. from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = '******' open_session_req.configuration = dict() open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Try to query a table we are not authorized to access. self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch_seq.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) assert 'User \'%s\' does not have privileges to access' % getuser() in\ str(execute_statement_resp) assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\ 'No matching audit event recorded in time window' # Now try the same operation on a table we are authorized to access. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # Try to impersonate as a user we are not authorized to impersonate. open_session_req.configuration['impala.doas.user'] = '******' resp = self.hs2_client.OpenSession(open_session_req) assert 'User \'hue\' is not authorized to impersonate \'some_user\'' in str( resp) self.socket.close() self.socket = None def __wait_for_audit_record(self, user, impersonator, timeout_secs=30): """Waits until an audit log record is found that contains the given user and impersonator, or until the timeout is reached. """ # The audit event might not show up immediately (the audit logs are flushed to disk # on regular intervals), so poll the audit event logs until a matching record is # found. start_time = time() while time() - start_time < timeout_secs: for audit_file_name in os.listdir(self.AUDIT_LOG_DIR): if self.__find_matching_audit_record(audit_file_name, user, impersonator): return True sleep(1) return False def __find_matching_audit_record(self, audit_file_name, user, impersonator): with open(os.path.join(self.AUDIT_LOG_DIR, audit_file_name)) as audit_log_file: for line in audit_log_file.readlines(): json_dict = json.loads(line) if len(json_dict) == 0: continue if json_dict[min(json_dict)]['user'] == user and\ json_dict[min(json_dict)]['impersonator'] == impersonator: return True return False
class TestAuthorization(CustomClusterTestSuite): AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR')) def setup(self): host, port = (self.cluster.impalads[0].service.hostname, self.cluster.impalads[0].service.hs2_port) self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s\ --authorization_policy_provider_class=%s" %\ (AUTH_POLICY_FILE, "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider")) def test_custom_authorization_provider(self): from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() # User is 'test_user' (defined in the authorization policy file) open_session_req.username = '******' open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Try to query a table we are not authorized to access. self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch_seq.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) assert 'User \'%s\' does not have privileges to access' % 'test_user' in\ str(execute_statement_resp) # Now try the same operation on a table we are authorized to access. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s\ --authorized_proxy_user_config=hue=%s" % (AUTH_POLICY_FILE, getuser())) def test_access_runtime_profile(self): from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = getuser() open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Current user can't access view's underlying tables self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "explain select * from functional.complex_view" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) assert 'User \'%s\' does not have privileges to EXPLAIN' % getuser() in\ str(execute_statement_resp) # User should not have access to the runtime profile self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, False) self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, True) # Repeat as a delegated user open_session_req.username = '******' open_session_req.configuration = dict() # Delegated user is the current user open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) self.session_handle = resp.sessionHandle # User should not have access to the runtime profile self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, False) self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, True) # Create a view for which the user has access to the underlying tables. open_session_req.username = getuser() open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = """create view if not exists tpch.customer_view as select * from tpch.customer limit 1""" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # User should be able to run EXPLAIN execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = """explain select * from tpch.customer_view""" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # User should have access to the runtime profile and exec summary self.__run_stmt_and_verify_profile_access( "select * from tpch.customer_view", True, False) self.__run_stmt_and_verify_profile_access( "select * from tpch.customer_view", True, True) # Repeat as a delegated user open_session_req.username = '******' open_session_req.configuration = dict() # Delegated user is the current user open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) self.session_handle = resp.sessionHandle # User should have access to the runtime profile and exec summary self.__run_stmt_and_verify_profile_access( "select * from tpch.customer_view", True, False) self.__run_stmt_and_verify_profile_access( "select * from tpch.customer_view", True, True) # Clean up execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "drop view if exists tpch.customer_view" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s\ --authorized_proxy_user_config=foo=bar;hue=%s\ --abort_on_failed_audit_event=false\ --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, getuser(), AUDIT_LOG_DIR)) def test_user_impersonation(self): """End-to-end user impersonation + authorization test""" self.__test_impersonation() @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( "--server_name=server1\ --authorization_policy_file=%s\ --authorized_proxy_user_config=hue=bar\ --authorized_proxy_group_config=foo=bar;hue=%s\ --abort_on_failed_audit_event=false\ --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, grp.getgrgid(os.getgid()).gr_name, AUDIT_LOG_DIR)) def test_group_impersonation(self): """End-to-end group impersonation + authorization test""" self.__test_impersonation() @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s\ --authorized_proxy_user_config=foo=bar\ --authorized_proxy_group_config=foo=bar\ --abort_on_failed_audit_event=false\ --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, AUDIT_LOG_DIR)) def test_no_matching_user_and_group_impersonation(self): open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = '******' open_session_req.configuration = dict() open_session_req.configuration['impala.doas.user'] = '******' resp = self.hs2_client.OpenSession(open_session_req) assert 'User \'hue\' is not authorized to delegate to \'abc\'' in str( resp) def __test_impersonation(self): """End-to-end impersonation + authorization test. Expects authorization to be configured before running this test""" # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import # the module within this test function, rather than as a top-level import. This way # the tests in that module will not get pulled when executing this test suite. The fix # is to split the utility code out of the TestHS2 class and support HS2 as a first # class citizen in our test framework. from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() # Connected user is 'hue' open_session_req.username = '******' open_session_req.configuration = dict() # Delegated user is the current user open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Try to query a table we are not authorized to access. self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch_seq.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) assert 'User \'%s\' does not have privileges to access' % getuser() in\ str(execute_statement_resp) assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\ 'No matching audit event recorded in time window' # Now try the same operation on a table we are authorized to access. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # Verify the correct user information is in the runtime profile query_id = operation_id_to_query_id( execute_statement_resp.operationHandle.operationId) profile_page = self.cluster.impalads[ 0].service.read_query_profile_page(query_id) self.__verify_profile_user_fields(profile_page, effective_user=getuser(), delegated_user=getuser(), connected_user='******') # Try to user we are not authorized to delegate to. open_session_req.configuration['impala.doas.user'] = '******' resp = self.hs2_client.OpenSession(open_session_req) assert 'User \'hue\' is not authorized to delegate to \'some_user\'' in str( resp) # Create a new session which does not have a do_as_user. open_session_req.username = '******' open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Run a simple query, which should succeed. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = resp.sessionHandle execute_statement_req.statement = "select 1" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # Verify the correct user information is in the runtime profile. Since there is # no do_as_user the Delegated User field should be empty. query_id = operation_id_to_query_id( execute_statement_resp.operationHandle.operationId) profile_page = self.cluster.impalads[ 0].service.read_query_profile_page(query_id) self.__verify_profile_user_fields(profile_page, effective_user='******', delegated_user='', connected_user='******') self.socket.close() self.socket = None def __verify_profile_user_fields(self, profile_str, effective_user, connected_user, delegated_user): """Verifies the given runtime profile string contains the specified values for User, Connected User, and Delegated User""" assert '\n User: %s\n' % effective_user in profile_str assert '\n Connected User: %s\n' % connected_user in profile_str assert '\n Delegated User: %s\n' % delegated_user in profile_str def __wait_for_audit_record(self, user, impersonator, timeout_secs=30): """Waits until an audit log record is found that contains the given user and impersonator, or until the timeout is reached. """ # The audit event might not show up immediately (the audit logs are flushed to disk # on regular intervals), so poll the audit event logs until a matching record is # found. start_time = time() while time() - start_time < timeout_secs: for audit_file_name in os.listdir(self.AUDIT_LOG_DIR): if self.__find_matching_audit_record(audit_file_name, user, impersonator): return True sleep(1) return False def __find_matching_audit_record(self, audit_file_name, user, impersonator): with open(os.path.join(self.AUDIT_LOG_DIR, audit_file_name)) as audit_log_file: for line in audit_log_file.readlines(): json_dict = json.loads(line) if len(json_dict) == 0: continue if json_dict[min(json_dict)]['user'] == user and\ json_dict[min(json_dict)]['impersonator'] == impersonator: return True return False def __run_stmt_and_verify_profile_access(self, stmt, has_access, close_operation): """Runs 'stmt' and retrieves the runtime profile and exec summary. If 'has_access' is true, it verifies that no runtime profile or exec summary are returned. If 'close_operation' is true, make sure the operation is closed before retrieving the profile and exec summary.""" from tests.hs2.test_hs2 import TestHS2 execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = stmt execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) if close_operation: close_operation_req = TCLIService.TCloseOperationReq() close_operation_req.operationHandle = execute_statement_resp.operationHandle TestHS2.check_response( self.hs2_client.CloseOperation(close_operation_req)) get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq() get_profile_req.operationHandle = execute_statement_resp.operationHandle get_profile_req.sessionHandle = self.session_handle get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req) if has_access: TestHS2.check_response(get_profile_resp) assert "Plan: " in get_profile_resp.profile else: assert "User %s is not authorized to access the runtime profile or "\ "execution summary." % (getuser()) in str(get_profile_resp) exec_summary_req = ImpalaHiveServer2Service.TGetExecSummaryReq() exec_summary_req.operationHandle = execute_statement_resp.operationHandle exec_summary_req.sessionHandle = self.session_handle exec_summary_resp = self.hs2_client.GetExecSummary(exec_summary_req) if has_access: TestHS2.check_response(exec_summary_resp) else: assert "User %s is not authorized to access the runtime profile or "\ "execution summary." % (getuser()) in str(exec_summary_resp) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=" + SENTRY_CONFIG_FILE, catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_deprecated_none_", dir=os.getenv("LOG_DIR"))) def test_deprecated_flag_doesnt_show(self): assert_no_files_in_dir_contain( self.impala_log_dir, "authorization_policy_file " + "flag is deprecated. Object Ownership feature is not supported") @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s\ --authorization_policy_provider_class=%s" % ( AUTH_POLICY_FILE, "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider" ), impala_log_dir=tempfile.mkdtemp( prefix="test_deprecated_", dir=os.getenv("LOG_DIR"))) def test_deprecated_flags(self): assert_file_in_dir_contains( self.impala_log_dir, "authorization_policy_file flag" + " is deprecated. Object Ownership feature is not supported") @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_", dir=os.getenv("LOG_DIR"))) def test_catalog_restart(self, unique_role): """IMPALA-7713: Tests that a catalogd restart when authorization is enabled should reset the previous privileges stored in impalad's catalog to avoid stale privilege data in the impalad's catalog.""" def assert_privileges(): result = self.client.execute("show grant role %s_foo" % unique_role) TestAuthorization._check_privileges( result, [["database", "functional", "", "", "", "all", "false"]]) result = self.client.execute("show grant role %s_bar" % unique_role) TestAuthorization._check_privileges( result, [["database", "functional_kudu", "", "", "", "all", "false"]]) result = self.client.execute("show grant role %s_baz" % unique_role) TestAuthorization._check_privileges( result, [["database", "functional_avro", "", "", "", "all", "false"]]) self.role_cleanup(unique_role) try: self.client.execute("create role %s_foo" % unique_role) self.client.execute("create role %s_bar" % unique_role) self.client.execute("create role %s_baz" % unique_role) self.client.execute( "grant all on database functional to role %s_foo" % unique_role) self.client.execute( "grant all on database functional_kudu to role %s_bar" % unique_role) self.client.execute( "grant all on database functional_avro to role %s_baz" % unique_role) assert_privileges() self._start_impala_cluster([ "--catalogd_args=--sentry_config=%s" % SENTRY_CONFIG_FILE, "--restart_catalogd_only" ]) assert_privileges() finally: self.role_cleanup(unique_role) def role_cleanup(self, role_name_match): """Cleans up any roles that match the given role name.""" for role_name in self.client.execute("show roles").data: if role_name_match in role_name: self.client.execute("drop role %s" % role_name) @staticmethod def _check_privileges(result, expected): def columns(row): cols = row.split("\t") return cols[0:len(cols) - 1] assert map(columns, result.data) == expected @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_", dir=os.getenv("LOG_DIR"))) def test_catalog_object(self, unique_role): """IMPALA-7721: Tests /catalog_object web API for principal and privilege""" self.role_cleanup(unique_role) try: self.client.execute("create role %s" % unique_role) self.client.execute( "grant select on database functional to role %s" % unique_role) for service in [ self.cluster.catalogd.service, self.cluster.get_first_impalad().service ]: obj_dump = service.get_catalog_object_dump( "PRINCIPAL", "%s.ROLE" % unique_role) assert "catalog_version" in obj_dump # Get the privilege associated with that principal ID. principal_id = re.search(r"principal_id \(i32\) = (\d+)", obj_dump) assert principal_id is not None obj_dump = service.get_catalog_object_dump( "PRIVILEGE", urllib.quote( "server=server1->db=functional->action=select->grantoption=false.%s.ROLE" % principal_id.group(1))) assert "catalog_version" in obj_dump # Get the principal that does not exist. obj_dump = service.get_catalog_object_dump( "PRINCIPAL", "doesnotexist.ROLE") assert "CatalogException" in obj_dump # Get the privilege that does not exist. obj_dump = service.get_catalog_object_dump( "PRIVILEGE", urllib.quote( "server=server1->db=doesntexist->action=select->grantoption=false.%s.ROLE" % principal_id.group(1))) assert "CatalogException" in obj_dump finally: self.role_cleanup(unique_role) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args= "--sentry_config=%s --sentry_catalog_polling_frequency_s=3600" % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp( prefix="test_invalidate_metadata_sentry_unavailable_", dir=os.getenv("LOG_DIR"))) def test_invalidate_metadata_sentry_unavailable(self, unique_role): """IMPALA-7824: Tests that running INVALIDATE METADATA when Sentry is unavailable should not cause Impala to hang.""" self.role_cleanup(unique_role) try: group_name = grp.getgrnam(getuser()).gr_name self.client.execute("create role %s" % unique_role) self.client.execute("grant all on server to role %s" % unique_role) self.client.execute("grant role %s to group `%s`" % (unique_role, group_name)) self._stop_sentry_service() # Calling INVALIDATE METADATA when Sentry is unavailable should return an error. result = self.execute_query_expect_failure(self.client, "invalidate metadata") result_str = str(result) assert "MESSAGE: CatalogException: Error refreshing authorization policy:" \ in result_str assert "CAUSED BY: ImpalaRuntimeException: Error refreshing authorization policy." \ " Sentry is unavailable. Ensure Sentry is up:" in result_str self._start_sentry_service(SENTRY_CONFIG_FILE) # Calling INVALIDATE METADATA after Sentry is up should not return an error. self.execute_query_expect_success(self.client, "invalidate metadata") finally: self.role_cleanup(unique_role) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args= "--sentry_config=%s --sentry_catalog_polling_frequency_s=3600 " % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_refresh_authorization_", dir=os.getenv("LOG_DIR"))) def test_refresh_authorization(self, unique_role): """Tests refresh authorization statement by adding and removing roles and privileges externally. The long Sentry polling is used so that any authorization metadata updated externally does not get polled by Impala in order to test an an explicit call to refresh authorization statement.""" group_name = grp.getgrnam(getuser()).gr_name self.role_cleanup(unique_role) for sync_ddl in [1, 0]: query_options = {'sync_ddl': sync_ddl} clients = [] if sync_ddl: # When sync_ddl is True, we want to ensure the changes are propagated to all # coordinators. for impalad in self.cluster.impalads: clients.append(impalad.service.create_beeswax_client()) else: clients.append(self.client) try: self.client.execute("create role %s" % unique_role) self.client.execute("grant role %s to group `%s`" % (unique_role, group_name)) self.client.execute("grant refresh on server to %s" % unique_role) self.validate_refresh_authorization_roles( unique_role, query_options, clients) self.validate_refresh_authorization_privileges( unique_role, query_options, clients) finally: self.role_cleanup(unique_role) def validate_refresh_authorization_roles(self, unique_role, query_options, clients): """This method tests refresh authorization statement by adding and removing roles externally.""" try: # Create two roles inside Impala. self.client.execute("create role %s_internal1" % unique_role) self.client.execute("create role %s_internal2" % unique_role) # Drop an existing role (_internal1) outside Impala. role = "%s_internal1" % unique_role subprocess.check_call([ "/bin/bash", "-c", "%s/bin/sentryShell --conf %s/sentry-site.xml -dr -r %s" % (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role) ], stdout=sys.stdout, stderr=sys.stderr) result = self.execute_query_expect_success(self.client, "show roles") assert any(role in x for x in result.data) self.execute_query_expect_success(self.client, "refresh authorization", query_options=query_options) for client in clients: result = self.execute_query_expect_success( client, "show roles") assert not any(role in x for x in result.data) # Add a new role outside Impala. role = "%s_external" % unique_role subprocess.check_call([ "/bin/bash", "-c", "%s/bin/sentryShell --conf %s/sentry-site.xml -cr -r %s" % (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role) ], stdout=sys.stdout, stderr=sys.stderr) result = self.execute_query_expect_success(self.client, "show roles") assert not any(role in x for x in result.data) self.execute_query_expect_success(self.client, "refresh authorization", query_options=query_options) for client in clients: result = self.execute_query_expect_success( client, "show roles") assert any(role in x for x in result.data) finally: for suffix in ["internal1", "internal2", "external"]: self.role_cleanup("%s_%s" % (unique_role, suffix)) def validate_refresh_authorization_privileges(self, unique_role, query_options, clients): """This method tests refresh authorization statement by adding and removing privileges externally.""" # Grant select privilege outside Impala. subprocess.check_call([ "/bin/bash", "-c", "%s/bin/sentryShell --conf %s/sentry-site.xml -gpr -p " "'server=server1->db=functional->table=alltypes->action=select' -r %s" % (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), unique_role) ], stdout=sys.stdout, stderr=sys.stderr) # Before refresh authorization, there should only be one refresh privilege. result = self.execute_query_expect_success( self.client, "show grant role %s" % unique_role) assert len(result.data) == 1 assert any("refresh" in x for x in result.data) for client in clients: self.execute_query_expect_failure( client, "select * from functional.alltypes limit 1") self.execute_query_expect_success(self.client, "refresh authorization", query_options=query_options) for client in clients: # Ensure select privilege was granted after refresh authorization. result = self.execute_query_expect_success( client, "show grant role %s" % unique_role) assert len(result.data) == 2 assert any("select" in x for x in result.data) assert any("refresh" in x for x in result.data) self.execute_query_expect_success( client, "select * from functional.alltypes limit 1")
def main(): args = parser.parse_args() INT_RE = re.compile("^\d+$") FLOAT_RE = re.compile("^\d+?\.\d+$") EXPS = [ "==", "!=", ">=", "<=", ">", "<", "=regexp", ] FORMATS = ["int", "float"] exps = [] for aexp in args.exps: for exp in EXPS: if exp in aexp: aexp = aexp.split(exp) if len(aexp) == 2: if exp == "=regexp": if aexp[1][0] == '(' and aexp[1][-1] == ')': vtype = "string" value = aexp[1][1:-1] try: re.compile(value) except: break exp = "regexp" else: break elif (aexp[1][0] == '"' and aexp[1][-1] == '"') or (aexp[1][0] == "'" and aexp[1][-1] == "'"): vtype = "string" value = aexp[1][1:-1] elif FLOAT_RE.match(aexp[1]): vtype = "float" value = aexp[1] elif INT_RE.match(aexp[1]): vtype = "int" value = aexp[1] else: vtype = "string" value = aexp[1] exps.append(FilterExpression(aexp[0], exp, vtype, value)) break fields = {} for field in args.fields: fields[field] = 1 formats = {} for format in args.formats: format = format.split(":") if len(format) != 2: continue if format[1] in FORMATS: formats[format[0]] = format[1] name = "tail_" + "".join([ random.choice(string.digits + string.ascii_letters) for _ in range(16) ]) filter = Filter(args.collection, name, exps=exps, fields=fields, formats=formats, expried_time=5) transport = TSocket(args.host, args.port) transport = TBufferedTransport(transport) protocol = TBinaryProtocolAccelerated(transport) client = Client(protocol) transport.open() result = client.register_filter(filter) if result.result != 0: print("register error", name, result.msg) exit() print("register", name, filter) try: cursor = client.pull(name) while True: log = cursor.next() if not log: break if args.fields: flogs = [] try: log = json.loads(log) except: print(log.encode("utf-8")) continue for field in args.fields: if field in args.timefields: ts = log.get(field, 0) try: ts = int(ts) except: pass if isinstance(ts, str): flogs.append(ts) else: flogs.append( datetime.datetime.fromtimestamp( ts).isoformat()) elif formats and field in formats: if formats[field] not in ("int", "float"): flogs.append("'%s'" % log.get(field, "")) else: flogs.append(str(log.get(field, 0))) else: flogs.append("'%s'" % log.get(field, "")) print(" ".join(flogs).encode("utf-8")) else: print(log.encode("utf-8")) except KeyboardInterrupt: pass finally: result = client.unregister_filter(name) print("unregister", name, result.msg) transport.close()