Beispiel #1
0
class HS2TestSuite(ImpalaTestSuite):
    def setup(self):
        host, port = IMPALAD_HS2_HOST_PORT.split(":")
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = TCLIService.Client(self.protocol)

    def teardown(self):
        if self.socket:
            self.socket.close()

    @staticmethod
    def check_response(
            response,
            expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS,
            expected_error_prefix=None):
        assert response.status.statusCode == expected_status_code
        if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\
           and expected_error_prefix is not None:
            assert response.status.errorMessage.startswith(
                expected_error_prefix)

    def close(self, op_handle):
        close_op_req = TCLIService.TCloseOperationReq()
        close_op_req.operationHandle = op_handle
        close_op_resp = self.hs2_client.CloseOperation(close_op_req)
        assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS

    def fetch(self, handle, orientation, size, expected_num_rows=None):
        """Fetches at most size number of rows from the query identified by the given
    operation handle. Uses the given fetch orientation. Asserts that the fetch returns
    a success status, and that the number of rows returned is equal to size, or
    equal to the given expected_num_rows (it one was given)."""
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = size
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp)
        num_rows = size
        if expected_num_rows is not None:
            num_rows = expected_num_rows
        assert len(fetch_results_resp.results.rows) == num_rows
        return fetch_results_resp

    def fetch_fail(self, handle, orientation, expected_error_prefix):
        """Attempts to fetch rows from the query identified by the given operation handle.
    Asserts that the fetch returns an error with an error message matching the given
    expected_error_prefix."""
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = 100
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp,
                                    TCLIService.TStatusCode.ERROR_STATUS,
                                    expected_error_prefix)
        return fetch_results_resp
Beispiel #2
0
class HS2TestSuite(ImpalaTestSuite):
  def setup(self):
    host, port = IMPALAD_HS2_HOST_PORT.split(":")
    self.socket = TSocket(host, port)
    self.transport = TBufferedTransport(self.socket)
    self.transport.open()
    self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
    self.hs2_client = TCLIService.Client(self.protocol)

  def teardown(self):
    if self.socket:
      self.socket.close()

  @staticmethod
  def check_response(response,
                       expected_status_code = TCLIService.TStatusCode.SUCCESS_STATUS,
                       expected_error_prefix = None):
    assert response.status.statusCode == expected_status_code
    if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\
       and expected_error_prefix is not None:
      assert response.status.errorMessage.startswith(expected_error_prefix)

  def close(self, op_handle):
    close_op_req = TCLIService.TCloseOperationReq()
    close_op_req.operationHandle = op_handle
    close_op_resp = self.hs2_client.CloseOperation(close_op_req)
    assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS

  def fetch(self, handle, orientation, size, expected_num_rows = None):
    """Fetches at most size number of rows from the query identified by the given
    operation handle. Uses the given fetch orientation. Asserts that the fetch returns
    a success status, and that the number of rows returned is equal to size, or
    equal to the given expected_num_rows (it one was given)."""
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = handle
    fetch_results_req.orientation = orientation
    fetch_results_req.maxRows = size
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp)
    num_rows = size
    if expected_num_rows is not None:
      num_rows = expected_num_rows
    assert len(fetch_results_resp.results.rows) == num_rows
    return fetch_results_resp

  def fetch_fail(self, handle, orientation, expected_error_prefix):
    """Attempts to fetch rows from the query identified by the given operation handle.
    Asserts that the fetch returns an error with an error message matching the given
    expected_error_prefix."""
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = handle
    fetch_results_req.orientation = orientation
    fetch_results_req.maxRows = 100
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS,
                                expected_error_prefix)
    return fetch_results_resp
Beispiel #3
0
    def test_isOpen_checks_for_readability(self):
        # https://docs.python.org/3/library/socket.html#notes-on-socket-timeouts
        # https://docs.python.org/3/library/socket.html#socket.socket.settimeout
        timeouts = [
            None,  # blocking mode
            0,  # non-blocking mode
            1.0,  # timeout mode
        ]

        for timeout in timeouts:
            acc = ServerAcceptor(TServerSocket(port=0))
            acc.start()

            sock = TSocket(host="localhost", port=acc.port)
            self.assertFalse(sock.isOpen())
            sock.open()
            sock.setTimeout(timeout)

            # the socket shows as open immediately after connecting
            self.assertTrue(sock.isOpen())

            # and remains open during usage
            sock.write(b"hello")
            self.assertTrue(sock.isOpen())
            while True:
                try:
                    sock.read(5)
                except TTransportException as exc:
                    if exc.inner.errno == errno.EAGAIN:
                        # try again when we're in non-blocking mode
                        continue
                    raise
                break
            self.assertTrue(sock.isOpen())

            # once the server side closes, it no longer shows open
            acc.client.close(
            )  # this also blocks until the other thread is done
            acc.close()
            self.assertFalse(sock.isOpen())

            sock.close()
Beispiel #4
0
class TestAuthorization(CustomClusterTestSuite):
  AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR'))

  def setup(self):
    host, port = IMPALAD_HS2_HOST_PORT.split(":")
    self.socket = TSocket(host, port)
    self.transport = TBufferedTransport(self.socket)
    self.transport.open()
    self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
    self.hs2_client = TCLIService.Client(self.protocol)

  def teardown(self):
    if self.socket:
      self.socket.close()
    shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True)

  @pytest.mark.execute_serially
  @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=/test-warehouse/authz-policy.ini\
      --authorization_policy_provider_class=%s" %
      "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider")
  def test_custom_authorization_provider(self):
    from tests.hs2.test_hs2 import TestHS2
    open_session_req = TCLIService.TOpenSessionReq()
    # User is 'test_user' (defined in the authorization policy file)
    open_session_req.username = '******'
    open_session_req.configuration = dict()
    resp = self.hs2_client.OpenSession(open_session_req)
    TestHS2.check_response(resp)

    # Try to query a table we are not authorized to access.
    self.session_handle = resp.sessionHandle
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = "describe tpch_seq.lineitem"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    assert 'User \'%s\' does not have privileges to access' % 'test_user' in\
        str(execute_statement_resp)

    # Now try the same operation on a table we are authorized to access.
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = "describe tpch.lineitem"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    TestHS2.check_response(execute_statement_resp)

  @pytest.mark.execute_serially
  @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=/test-warehouse/authz-policy.ini\
      --authorized_proxy_user_config=hue=%s\
      --audit_event_log_dir=%s" % (getuser(), AUDIT_LOG_DIR))
  def test_impersonation(self):
    """End-to-end impersonation + authorization test. Expects authorization to be
    configured before running this test"""
    # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import
    # the module within this test function, rather than as a top-level import. This way
    # the tests in that module will not get pulled when executing this test suite. The fix
    # is to split the utility code out of the TestHS2 class and support HS2 as a first
    # class citizen in our test framework.
    from tests.hs2.test_hs2 import TestHS2
    open_session_req = TCLIService.TOpenSessionReq()
    # Connected user is 'hue'
    open_session_req.username = '******'
    open_session_req.configuration = dict()
    # Delegated user is the current user
    open_session_req.configuration['impala.doas.user'] = getuser()
    resp = self.hs2_client.OpenSession(open_session_req)
    TestHS2.check_response(resp)

    # Try to query a table we are not authorized to access.
    self.session_handle = resp.sessionHandle
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = "describe tpch_seq.lineitem"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    assert 'User \'%s\' does not have privileges to access' % getuser() in\
        str(execute_statement_resp)

    assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\
        'No matching audit event recorded in time window'

    # Now try the same operation on a table we are authorized to access.
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = "describe tpch.lineitem"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)

    TestHS2.check_response(execute_statement_resp)

    # Verify the correct user information is in the runtime profile
    query_id = operation_id_to_query_id(
        execute_statement_resp.operationHandle.operationId)
    profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id)
    self.__verify_profile_user_fields(profile_page, effective_user=getuser(),
        delegated_user=getuser(), connected_user='******')

    # Try to user we are not authorized to delegate to.
    open_session_req.configuration['impala.doas.user'] = '******'
    resp = self.hs2_client.OpenSession(open_session_req)
    assert 'User \'hue\' is not authorized to delegate to \'some_user\'' in str(resp)

    # Create a new session which does not have a do_as_user.
    open_session_req.username = '******'
    open_session_req.configuration = dict()
    resp = self.hs2_client.OpenSession(open_session_req)
    TestHS2.check_response(resp)

    # Run a simple query, which should succeed.
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = resp.sessionHandle
    execute_statement_req.statement = "select 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    TestHS2.check_response(execute_statement_resp)

    # Verify the correct user information is in the runtime profile. Since there is
    # no do_as_user the Delegated User field should be empty.
    query_id = operation_id_to_query_id(
        execute_statement_resp.operationHandle.operationId)
    profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id)
    self.__verify_profile_user_fields(profile_page, effective_user='******',
        delegated_user='', connected_user='******')

    self.socket.close()
    self.socket = None

  def __verify_profile_user_fields(self, profile_str, effective_user, connected_user,
      delegated_user):
    """Verifies the given runtime profile string contains the specified values for
    User, Connected User, and Delegated User"""
    assert '\n    User: %s\n' % effective_user in profile_str
    assert '\n    Connected User: %s\n' % connected_user in profile_str
    assert '\n    Delegated User: %s\n' % delegated_user in profile_str

  def __wait_for_audit_record(self, user, impersonator, timeout_secs=30):
    """Waits until an audit log record is found that contains the given user and
    impersonator, or until the timeout is reached.
    """
    # The audit event might not show up immediately (the audit logs are flushed to disk
    # on regular intervals), so poll the audit event logs until a matching record is
    # found.
    start_time = time()
    while time() - start_time < timeout_secs:
      for audit_file_name in os.listdir(self.AUDIT_LOG_DIR):
        if self.__find_matching_audit_record(audit_file_name, user, impersonator):
          return True
      sleep(1)
    return False

  def __find_matching_audit_record(self, audit_file_name, user, impersonator):
    with open(os.path.join(self.AUDIT_LOG_DIR, audit_file_name)) as audit_log_file:
      for line in audit_log_file.readlines():
          json_dict = json.loads(line)
          if len(json_dict) == 0: continue
          if json_dict[min(json_dict)]['user'] == user and\
              json_dict[min(json_dict)]['impersonator'] == impersonator:
            return True
    return False
class HS2TestSuite(ImpalaTestSuite):
    # This DB will be created/dropped for every HS2TestSuite subclass. Make the name unique
    # so different test suites don't clobber each other's DBs. The [2:] is to remove the
    # "0." from the random floating-point number.
    TEST_DB = 'hs2_db' + str(random.random())[2:]

    HS2_V6_COLUMN_TYPES = [
        'boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val',
        'doubleVal', 'binaryVal'
    ]

    def setup(self):
        self.cleanup_db(self.TEST_DB)
        host, port = IMPALAD_HS2_HOST_PORT.split(":")
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)
        self.client.execute("create database %s" % self.TEST_DB)

    def teardown(self):
        self.cleanup_db(self.TEST_DB)
        if self.socket:
            self.socket.close()

    @staticmethod
    def check_response(
            response,
            expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS,
            expected_error_prefix=None):
        assert response.status.statusCode == expected_status_code
        if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\
           and expected_error_prefix is not None:
            assert response.status.errorMessage.startswith(
                expected_error_prefix)

    def close(self, op_handle):
        close_op_req = TCLIService.TCloseOperationReq()
        close_op_req.operationHandle = op_handle
        close_op_resp = self.hs2_client.CloseOperation(close_op_req)
        assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS

    def get_num_rows(self, result_set):
        # rows will always be set, so the only way to tell if we should use it is to see if
        # any columns are set
        if result_set.columns is None or len(result_set.columns) == 0:
            return len(result_set.rows)

        assert result_set.columns is not None
        for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
            typed_col = getattr(result_set.columns[0], col_type)
            if typed_col != None:
                return len(typed_col.values)

        assert False

    def fetch(self, handle, orientation, size, expected_num_rows=None):
        """Fetches at most size number of rows from the query identified by the given
    operation handle. Uses the given fetch orientation. Asserts that the fetch returns
    a success status, and that the number of rows returned is equal to size, or
    equal to the given expected_num_rows (if one was given)."""
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = size
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp)
        num_rows = size
        if expected_num_rows is not None:
            num_rows = expected_num_rows
        assert self.get_num_rows(fetch_results_resp.results) == num_rows
        return fetch_results_resp

    def fetch_until(self, handle, orientation, size):
        """Tries to fetch exactly 'size' rows from the given query handle, with the given
    fetch orientation. If fewer rows than 'size' are returned by the first fetch, repeated
    fetches are issued until either 0 rows are returned, or the number of rows fetched is
    equal to 'size'"""
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = size
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp)
        num_rows = size
        num_rows_fetched = self.get_num_rows(fetch_results_resp.results)
        while num_rows_fetched < size:
            fetch_results_req.maxRows = size - num_rows_fetched
            fetch_results_resp = self.hs2_client.FetchResults(
                fetch_results_req)
            HS2TestSuite.check_response(fetch_results_resp)
            last_fetch_size = self.get_num_rows(fetch_results_resp.results)
            assert last_fetch_size > 0
            num_rows_fetched += last_fetch_size

        assert num_rows_fetched == size

    def fetch_fail(self, handle, orientation, expected_error_prefix):
        """Attempts to fetch rows from the query identified by the given operation handle.
    Asserts that the fetch returns an error with an error message matching the given
    expected_error_prefix."""
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = 100
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp,
                                    TCLIService.TStatusCode.ERROR_STATUS,
                                    expected_error_prefix)
        return fetch_results_resp
Beispiel #6
0
try:
    # Make socket
    transport = TSocket("213.100.51.33", 9090)

    # Buffering is critical. Raw sockets are very slow
    transport = TBufferedTransport(transport)

    # Wrap in a protocol
    protocol = TBinaryProtocol(transport)

    # Create a client to use the protocol encoder
    client = Client(protocol)

    # Connect!
    transport.open()

    while True:
        status = client.getStatus("auth1", "fight1")
        print "Status update:\n%r" % (status)

        if status.currentTurn == "player1":
            print "It's your turn, casting spell!\n"
            client.castSpell("auth1", "fight1", 1, "player2")
        print "---------------------"
        time.sleep(1)

    # Close!
    transport.close()
except Thrift.TException, e:
    print e.message
Beispiel #7
0
class TestHS2(ImpalaTestSuite):
  def setup(self):
    host, port = IMPALAD_HS2_HOST_PORT.split(":")
    self.socket = TSocket(host, port)
    self.transport = TBufferedTransport(self.socket)
    self.transport.open()
    self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
    self.hs2_client = TCLIService.Client(self.protocol)

  def teardown(self):
    if self.socket:
      self.socket.close()

  @staticmethod
  def check_response(response, expected = TCLIService.TStatusCode.SUCCESS_STATUS):
    assert response.status.statusCode == expected

  def test_open_session(self):
    """Check that a session can be opened"""
    open_session_req = TCLIService.TOpenSessionReq()
    TestHS2.check_response(self.hs2_client.OpenSession(open_session_req))

  def test_close_session(self):
    """Test that an open session can be closed"""
    open_session_req = TCLIService.TOpenSessionReq()
    resp = self.hs2_client.OpenSession(open_session_req)
    TestHS2.check_response(resp)

    close_session_req = TCLIService.TCloseSessionReq()
    close_session_req.sessionHandle = resp.sessionHandle
    TestHS2.check_response(self.hs2_client.CloseSession(close_session_req))

  def test_double_close_session(self):
    """Test that an already closed session cannot be closed a second time"""
    open_session_req = TCLIService.TOpenSessionReq()
    resp = self.hs2_client.OpenSession(open_session_req)
    TestHS2.check_response(resp)

    close_session_req = TCLIService.TCloseSessionReq()
    close_session_req.sessionHandle = resp.sessionHandle
    TestHS2.check_response(self.hs2_client.CloseSession(close_session_req))

    # Double close should be an error
    TestHS2.check_response(self.hs2_client.CloseSession(close_session_req),
                           TCLIService.TStatusCode.ERROR_STATUS)

  @needs_session
  def test_execute_select(self):
    """Test that a simple select statement works"""
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    TestHS2.check_response(execute_statement_resp)

    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = execute_statement_resp.operationHandle
    fetch_results_req.maxRows = 100
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    TestHS2.check_response(fetch_results_resp)

    assert len(fetch_results_resp.results.rows) == 1
    assert fetch_results_resp.results.startRowOffset == 0

    try:
      assert not fetch_results_resp.hasMoreRows
    except AssertionError:
      pytest.xfail("IMPALA-558")

  @needs_session
  def test_get_operation_status(self):
    """Tests that GetOperationStatus returns a valid result for a running query"""
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    TestHS2.check_response(execute_statement_resp)

    get_operation_status_req = TCLIService.TGetOperationStatusReq()
    get_operation_status_req.operationHandle = execute_statement_resp.operationHandle

    get_operation_status_resp = \
        self.hs2_client.GetOperationStatus(get_operation_status_req)
    TestHS2.check_response(get_operation_status_resp)

    assert get_operation_status_resp.operationState in \
        [TCLIService.TOperationState.INITIALIZED_STATE,
         TCLIService.TOperationState.RUNNING_STATE,
         TCLIService.TOperationState.FINISHED_STATE]

  @needs_session
  def test_malformed_get_operation_status(self):
    """Tests that a short guid / secret returns an error (regression would be to crash
    impalad)"""
    operation_handle = TCLIService.TOperationHandle()
    operation_handle.operationId = TCLIService.THandleIdentifier()
    operation_handle.operationId.guid = "short"
    operation_handle.operationId.secret = "short_secret"
    assert len(operation_handle.operationId.guid) != 16
    assert len(operation_handle.operationId.secret) != 16
    operation_handle.operationType = TCLIService.TOperationType.EXECUTE_STATEMENT
    operation_handle.hasResultSet = False

    get_operation_status_req = TCLIService.TGetOperationStatusReq()
    get_operation_status_req.operationHandle = operation_handle

    get_operation_status_resp = \
        self.hs2_client.GetOperationStatus(get_operation_status_req)
    TestHS2.check_response(get_operation_status_resp,
                           TCLIService.TStatusCode.ERROR_STATUS)
    err_msg = "(guid size: %d, expected 16, secret size: %d, expected 16)" \
        % (len(operation_handle.operationId.guid),
           len(operation_handle.operationId.secret))
    assert err_msg in get_operation_status_resp.status.errorMessage

  @pytest.mark.execute_serially
  def test_socket_close_forces_session_close(self):
    """Test that closing the underlying socket forces the associated session to close.
    See IMPALA-564"""
    open_session_req = TCLIService.TOpenSessionReq()
    resp = self.hs2_client.OpenSession(open_session_req)
    TestHS2.check_response(resp)
    num_sessions = self.impalad_test_service.get_metric_value(
      "impala-server.num-open-hiveserver2-sessions")

    assert num_sessions > 0

    self.socket.close()
    self.socket = None
    self.impalad_test_service.wait_for_metric_value(
      "impala-server.num-open-hiveserver2-sessions", num_sessions - 1)

  @pytest.mark.execute_serially
  def test_multiple_sessions(self):
    """Test that multiple sessions on the same socket connection are allowed"""
    num_sessions = self.impalad_test_service.get_metric_value(
      "impala-server.num-open-hiveserver2-sessions")
    session_ids = []
    for _ in xrange(5):
      open_session_req = TCLIService.TOpenSessionReq()
      resp = self.hs2_client.OpenSession(open_session_req)
      TestHS2.check_response(resp)
      # Check that all sessions get different IDs
      assert resp.sessionHandle not in session_ids
      session_ids.append(resp.sessionHandle)

    self.impalad_test_service.wait_for_metric_value(
      "impala-server.num-open-hiveserver2-sessions", num_sessions + 5)

    self.socket.close()
    self.socket = None
    self.impalad_test_service.wait_for_metric_value(
      "impala-server.num-open-hiveserver2-sessions", num_sessions)

  @needs_session
  def test_get_schemas(self):
    get_schemas_req = TCLIService.TGetSchemasReq()
    get_schemas_req.sessionHandle = self.session_handle
    get_schemas_resp = self.hs2_client.GetSchemas(get_schemas_req)
    TestHS2.check_response(get_schemas_resp)
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = get_schemas_resp.operationHandle
    fetch_results_req.maxRows = 100
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    TestHS2.check_response(fetch_results_resp)
    query_id = operation_id_to_query_id(get_schemas_resp.operationHandle.operationId)
    profile_page = self.impalad_test_service.read_query_profile_page(query_id)

    # Test fix for IMPALA-619
    assert "Sql Statement: GET_SCHEMAS" in profile_page
    assert "Query Type: DDL" in profile_page
Beispiel #8
0
class HS2TestSuite(ImpalaTestSuite):
    TEST_DB = 'hs2_db'

    HS2_V6_COLUMN_TYPES = [
        'boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val',
        'doubleVal', 'binaryVal'
    ]

    def setup(self):
        self.cleanup_db(self.TEST_DB)
        host, port = IMPALAD_HS2_HOST_PORT.split(":")
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

    def teardown(self):
        self.cleanup_db(self.TEST_DB)
        if self.socket:
            self.socket.close()

    @staticmethod
    def check_response(
            response,
            expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS,
            expected_error_prefix=None):
        assert response.status.statusCode == expected_status_code
        if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\
           and expected_error_prefix is not None:
            assert response.status.errorMessage.startswith(
                expected_error_prefix)

    def close(self, op_handle):
        close_op_req = TCLIService.TCloseOperationReq()
        close_op_req.operationHandle = op_handle
        close_op_resp = self.hs2_client.CloseOperation(close_op_req)
        assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS

    def get_num_rows(self, result_set):
        # rows will always be set, so the only way to tell if we should use it is to see if
        # any columns are set
        if result_set.columns is None or len(result_set.columns) == 0:
            return len(result_set.rows)

        assert result_set.columns is not None
        for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
            typed_col = getattr(result_set.columns[0], col_type)
            if typed_col != None:
                return len(typed_col.values)

        assert False

    def fetch_at_most(self, handle, orientation, size, expected_num_rows=None):
        """Fetches at most size number of rows from the query identified by the given
    operation handle. Uses the given fetch orientation. Asserts that the fetch returns a
    success status, and that the number of rows returned is equal to given
    expected_num_rows (if given). It is only safe for expected_num_rows to be 0 or 1:
    Impala does not guarantee that a larger result set will be returned in one go. Use
    fetch_until() for repeated fetches."""
        assert expected_num_rows is None or expected_num_rows in (0, 1)
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = size
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp)
        num_rows = size
        if expected_num_rows is not None:
            assert self.get_num_rows(
                fetch_results_resp.results) == expected_num_rows
        return fetch_results_resp

    def fetch_until(self, handle, orientation, size, expected_num_rows=None):
        """Tries to fetch exactly 'size' rows from the given query handle, with the given
    fetch orientation, by repeatedly issuing fetch(size - num rows already fetched)
    calls. Returns fewer than 'size' rows if either a fetch() returns 0 rows (indicating
    EOS) or 'expected_num_rows' rows are returned. If 'expected_num_rows' is set to None,
    it defaults to 'size', so that the effect is to both ask for and expect the same
    number of rows."""
        assert expected_num_rows is None or (size >= expected_num_rows)
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = size
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp)
        num_rows_fetched = self.get_num_rows(fetch_results_resp.results)
        if expected_num_rows is None: expected_num_rows = size
        while num_rows_fetched < expected_num_rows:
            # Always try to fetch at most 'size'
            fetch_results_req.maxRows = size - num_rows_fetched
            fetch_results_req.orientation = TCLIService.TFetchOrientation.FETCH_NEXT
            fetch_results_resp = self.hs2_client.FetchResults(
                fetch_results_req)
            HS2TestSuite.check_response(fetch_results_resp)
            last_fetch_size = self.get_num_rows(fetch_results_resp.results)
            assert last_fetch_size > 0
            num_rows_fetched += last_fetch_size

        assert num_rows_fetched == expected_num_rows

    def fetch_fail(self, handle, orientation, expected_error_prefix):
        """Attempts to fetch rows from the query identified by the given operation handle.
    Asserts that the fetch returns an error with an error message matching the given
    expected_error_prefix."""
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = 100
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp,
                                    TCLIService.TStatusCode.ERROR_STATUS,
                                    expected_error_prefix)
        return fetch_results_resp

    def result_metadata(self, handle):
        """ Gets the schema for the query identified by the handle """
        req = TCLIService.TGetResultSetMetadataReq()
        req.operationHandle = handle
        resp = self.hs2_client.GetResultSetMetadata(req)
        HS2TestSuite.check_response(resp)
        return resp

    def column_results_to_string(self, columns):
        """Quick-and-dirty way to get a readable string to compare the output of a
    columnar-oriented query to its expected output"""
        formatted = ""
        num_rows = 0
        # Determine the number of rows by finding the type of the first column
        for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
            typed_col = getattr(columns[0], col_type)
            if typed_col != None:
                num_rows = len(typed_col.values)
                break

        for i in xrange(num_rows):
            row = []
            for c in columns:
                for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
                    typed_col = getattr(c, col_type)
                    if typed_col != None:
                        indicator = ord(typed_col.nulls[i / 8])
                        if indicator & (1 << (i % 8)):
                            row.append("NULL")
                        else:
                            row.append(str(typed_col.values[i]))
                        break
            formatted += (", ".join(row) + "\n")
        return (num_rows, formatted)

    def get_operation_status(self, operation_handle):
        """Executes GetOperationStatus with the given operation handle and returns the
    TGetOperationStatusResp"""
        get_operation_status_req = TCLIService.TGetOperationStatusReq()
        get_operation_status_req.operationHandle = operation_handle
        get_operation_status_resp = \
            self.hs2_client.GetOperationStatus(get_operation_status_req)
        return get_operation_status_resp

    def wait_for_operation_state(self, operation_handle, expected_state, \
                                 timeout = 10, interval = 1):
        """Waits for the operation to reach expected_state by polling GetOperationStatus every
    interval seconds, returning the TGetOperationStatusResp, or raising an assertion after
    timeout seconds."""
        start_time = time()
        while (time() - start_time < timeout):
            get_operation_status_resp = self.get_operation_status(
                operation_handle)
            HS2TestSuite.check_response(get_operation_status_resp)
            if get_operation_status_resp.operationState is expected_state:
                return get_operation_status_resp
            sleep(interval)
        assert False, 'Did not reach expected operation state %s in time, actual state was ' \
            '%s' % (expected_state, get_operation_status_resp.operationState)
Beispiel #9
0
class TestAuthorizedProxy(CustomClusterTestSuite):
    def setup(self):
        host, port = (self.cluster.impalads[0].service.hostname,
                      self.cluster.impalads[0].service.hs2_port)
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

    def teardown(self):
        if self.socket:
            self.socket.close()
        shutil.rmtree(AUDIT_LOG_DIR, ignore_errors=True)

    def _execute_hs2_stmt(self, statement, verify=True):
        """
    Executes an hs2 statement

    :param statement: the statement to execute
    :param verify: If set to true, will thrown an exception on a failed hs2 execution
    :return: the result of execution
    """
        from tests.hs2.test_hs2 import TestHS2
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = statement
        result = self.hs2_client.ExecuteStatement(execute_statement_req)
        if verify:
            TestHS2.check_response(result)
        return result

    def _open_hs2(self, user, configuration, verify=True):
        """
    Open a session with hs2

    :param user: the user to open the session
    :param configuration: the configuration for the session
    :param verify: If set to true, will thrown an exception on failed session open
    :return: the result of opening the session
    """
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_req.username = user
        open_session_req.configuration = configuration
        resp = self.hs2_client.OpenSession(open_session_req)
        if verify:
            TestHS2.check_response(resp)
        return resp

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="{0} --authorized_proxy_user_config=foo=bar;hue={1} ".
        format(SENTRY_IMPALAD_ARGS, getuser()),
        catalogd_args=SENTRY_CATALOGD_ARGS)
    def test_authorized_proxy_user_with_sentry(self, unique_role):
        """Tests authorized proxy user with Sentry using HS2."""
        self._test_authorized_proxy_with_sentry(unique_role,
                                                self._test_authorized_proxy)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="{0} --authorized_proxy_user_config=foo=bar;hue={1} ".
        format(RANGER_IMPALAD_ARGS, getuser()),
        catalogd_args=RANGER_CATALOGD_ARGS)
    def test_authorized_proxy_user_with_ranger(self):
        # This test fails due to bumping up the Ranger to a newer version.
        # TODO(fangyu.rao): Fix in a follow up commit.
        pytest.xfail("failed due to bumping up the Ranger to a newer version")
        """Tests authorized proxy user with Ranger using HS2."""
        self._test_authorized_proxy_with_ranger(self._test_authorized_proxy)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="{0} --authorized_proxy_user_config=hue=bar "
        "--authorized_proxy_group_config=foo=bar;hue={1}".format(
            SENTRY_IMPALAD_ARGS,
            grp.getgrgid(os.getgid()).gr_name),
        catalogd_args=SENTRY_CATALOGD_ARGS)
    def test_authorized_proxy_group_with_sentry(self, unique_role):
        """Tests authorized proxy group with Sentry using HS2."""
        self._test_authorized_proxy_with_sentry(unique_role,
                                                self._test_authorized_proxy)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="{0} --authorized_proxy_user_config=hue=bar "
        "--authorized_proxy_group_config=foo=bar;hue={1}".format(
            RANGER_IMPALAD_ARGS,
            grp.getgrgid(os.getgid()).gr_name),
        catalogd_args=RANGER_CATALOGD_ARGS)
    def test_authorized_proxy_group_with_ranger(self):
        # This test fails due to bumping up the Ranger to a newer version.
        # TODO(fangyu.rao): Fix in a follow up commit.
        pytest.xfail("failed due to bumping up the Ranger to a newer version")
        """Tests authorized proxy group with Ranger using HS2."""
        self._test_authorized_proxy_with_ranger(self._test_authorized_proxy)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="{0} --authorized_proxy_user_config=foo=bar "
        "--authorized_proxy_group_config=foo=bar".format(SENTRY_IMPALAD_ARGS),
        catalogd_args=SENTRY_CATALOGD_ARGS)
    def test_no_matching_user_and_group_authorized_proxy_with_sentry(self):
        self._test_no_matching_user_and_group_authorized_proxy()

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="{0} --authorized_proxy_user_config=foo=bar "
        "--authorized_proxy_group_config=foo=bar".format(RANGER_IMPALAD_ARGS),
        catalogd_args=RANGER_CATALOGD_ARGS)
    def test_no_matching_user_and_group_authorized_proxy_with_ranger(self):
        self._test_no_matching_user_and_group_authorized_proxy()

    def _test_no_matching_user_and_group_authorized_proxy(self):
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_req.username = "******"
        open_session_req.configuration = dict()
        open_session_req.configuration["impala.doas.user"] = "******"
        resp = self.hs2_client.OpenSession(open_session_req)
        assert "User 'hue' is not authorized to delegate to 'abc'" in str(resp)

    def _test_authorized_proxy_with_sentry(self, role, test_func):
        try:
            self.session_handle = self._open_hs2(getuser(),
                                                 dict()).sessionHandle
            self._execute_hs2_stmt("create role {0}".format(role))
            self._execute_hs2_stmt(
                "grant all on table tpch.lineitem to role {0}".format(role))
            self._execute_hs2_stmt("grant role {0} to group {1}".format(
                role,
                grp.getgrnam(getuser()).gr_name))
            self._execute_hs2_stmt("grant role {0} to group {1}".format(
                role,
                grp.getgrgid(os.getgid()).gr_name))
            test_func()
        finally:
            self.session_handle = self._open_hs2(getuser(),
                                                 dict()).sessionHandle
            self._execute_hs2_stmt(
                "grant all on server to role {0}".format(role))
            self._execute_hs2_stmt("grant role {0} to group {1}".format(
                role,
                grp.getgrnam(getuser()).gr_name))
            self._execute_hs2_stmt("drop role {0}".format(role))

    def _test_authorized_proxy_with_ranger(self, test_func):
        try:
            self.session_handle = self._open_hs2(RANGER_ADMIN_USER,
                                                 dict()).sessionHandle
            self._execute_hs2_stmt(
                "grant all on table tpch.lineitem to user {0}".format(
                    getuser()))
            test_func()
        finally:
            self.session_handle = self._open_hs2(RANGER_ADMIN_USER,
                                                 dict()).sessionHandle
            self._execute_hs2_stmt(
                "revoke all on table tpch.lineitem from user {0}".format(
                    getuser()))

    def _test_authorized_proxy(self):
        """End-to-end impersonation + authorization test. Expects authorization to be
       configured before running this test"""
        # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import
        # the module within this test function, rather than as a top-level import. This way
        # the tests in that module will not get pulled when executing this test suite. The fix
        # is to split the utility code out of the TestHS2 class and support HS2 as a first
        # class citizen in our test framework.
        from tests.hs2.test_hs2 import TestHS2

        # Try to query a table we are not authorized to access.
        self.session_handle = self._open_hs2("hue", {
            "impala.doas.user": getuser()
        }).sessionHandle
        bad_resp = self._execute_hs2_stmt("describe tpch_seq.lineitem", False)
        assert "User '%s' does not have privileges to access" % getuser() in \
               str(bad_resp)

        assert self._wait_for_audit_record(user=getuser(), impersonator="hue"), \
               "No matching audit event recorded in time window"

        # Now try the same operation on a table we are authorized to access.
        good_resp = self._execute_hs2_stmt("describe tpch.lineitem")
        TestHS2.check_response(good_resp)

        # Verify the correct user information is in the runtime profile.
        query_id = operation_id_to_query_id(
            good_resp.operationHandle.operationId)
        profile_page = self.cluster.impalads[
            0].service.read_query_profile_page(query_id)
        self._verify_profile_user_fields(profile_page,
                                         effective_user=getuser(),
                                         delegated_user=getuser(),
                                         connected_user="******")

        # Try to delegate a user we are not authorized to delegate to.
        resp = self._open_hs2("hue", {"impala.doas.user": "******"}, False)
        assert "User 'hue' is not authorized to delegate to 'some_user'" in str(
            resp)

        # Create a new session which does not have a do_as_user and run a simple query.
        self.session_handle = self._open_hs2("hue", dict()).sessionHandle
        resp = self._execute_hs2_stmt("select 1")

        # Verify the correct user information is in the runtime profile. Since there is
        # no do_as_user the Delegated User field should be empty.
        query_id = operation_id_to_query_id(resp.operationHandle.operationId)

        profile_page = self.cluster.impalads[
            0].service.read_query_profile_page(query_id)
        self._verify_profile_user_fields(profile_page,
                                         effective_user="******",
                                         delegated_user="",
                                         connected_user="******")

    def _verify_profile_user_fields(self, profile_str, effective_user,
                                    connected_user, delegated_user):
        """Verifies the given runtime profile string contains the specified values for
       User, Connected User, and Delegated User"""
        assert "\n    User: {0}\n".format(effective_user) in profile_str
        assert "\n    Connected User: {0}\n".format(
            connected_user) in profile_str
        assert "\n    Delegated User: {0}\n".format(
            delegated_user) in profile_str

    def _wait_for_audit_record(self, user, impersonator, timeout_secs=30):
        """Waits until an audit log record is found that contains the given user and
       impersonator, or until the timeout is reached.
    """
        # The audit event might not show up immediately (the audit logs are flushed to disk
        # on regular intervals), so poll the audit event logs until a matching record is
        # found.
        start_time = time.time()
        while time.time() - start_time < timeout_secs:
            for audit_file_name in os.listdir(AUDIT_LOG_DIR):
                if self._find_matching_audit_record(audit_file_name, user,
                                                    impersonator):
                    return True
            time.sleep(1)
        return False

    def _find_matching_audit_record(self, audit_file_name, user, impersonator):
        with open(os.path.join(AUDIT_LOG_DIR,
                               audit_file_name)) as audit_log_file:
            for line in audit_log_file.readlines():
                json_dict = json.loads(line)
                if len(json_dict) == 0: continue
                if json_dict[min(json_dict)]["user"] == user and \
                    json_dict[min(json_dict)]["impersonator"] == impersonator:
                    return True
        return False
class TestAuthorization(CustomClusterTestSuite):
    def setup(self):
        host, port = (self.cluster.impalads[0].service.hostname,
                      self.cluster.impalads[0].service.hs2_port)
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

    def teardown(self):
        if self.socket:
            self.socket.close()

    def __execute_hs2_stmt(self, statement, verify=True):
        """
    Executes an hs2 statement

    :param statement: the statement to execute
    :param verify: If set to true, will thrown an exception on a failed hs2 execution
    :return: the result of execution
    """
        from tests.hs2.test_hs2 import TestHS2
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = statement
        result = self.hs2_client.ExecuteStatement(execute_statement_req)
        if verify:
            TestHS2.check_response(result)
        return result

    def __open_hs2(self, user, configuration, verify=True):
        """
    Open a session with hs2

    :param user: the user to open the session
    :param configuration: the configuration for the session
    :param verify: If set to true, will thrown an exception on failed session open
    :return: the result of opening the session
    """
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_req.username = user
        open_session_req.configuration = configuration
        resp = self.hs2_client.OpenSession(open_session_req)
        if verify:
            TestHS2.check_response(resp)
        return resp

    @SkipIf.sentry_disabled
    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 "
        "--sentry_config={0} "
        "--authorization_policy_provider_class="
        "org.apache.impala.testutil.TestSentryResourceAuthorizationProvider".
        format(SENTRY_CONFIG_FILE),
        catalogd_args="--sentry_config={0} "
        "--authorization_policy_provider_class="
        "org.apache.impala.testutil.TestSentryResourceAuthorizationProvider".
        format(SENTRY_CONFIG_FILE),
        sentry_config=SENTRY_CONFIG_FILE)
    def test_custom_authorization_provider(self, unique_role):
        try:
            self.session_handle = self.__open_hs2(getuser(),
                                                  dict()).sessionHandle
            self.__execute_hs2_stmt("create role {0}".format(unique_role))
            self.__execute_hs2_stmt("grant role {0} to group {1}".format(
                unique_role,
                grp.getgrnam(getuser()).gr_name))
            self.__execute_hs2_stmt(
                "grant select on table tpch.lineitem to role {0}".format(
                    unique_role))

            bad_resp = self.__execute_hs2_stmt("describe tpch_seq.lineitem",
                                               False)
            assert 'User \'%s\' does not have privileges to access' % getuser() in \
                   str(bad_resp)
            self.__execute_hs2_stmt("describe tpch.lineitem")
        finally:
            self.__execute_hs2_stmt("drop role {0}".format(unique_role))

    @SkipIf.sentry_disabled
    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 "
        "--authorized_proxy_user_config=hue={0}".format(getuser()),
        catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE,
        sentry_config=SENTRY_CONFIG_FILE)
    def test_access_runtime_profile(self, unique_role, unique_name):
        unique_db = unique_name + "_db"

        try:
            self.session_handle = self.__open_hs2(getuser(),
                                                  dict()).sessionHandle
            self.__execute_hs2_stmt("create role {0}".format(unique_role))
            self.__execute_hs2_stmt(
                "grant create on server to role {0}".format(unique_role))
            self.__execute_hs2_stmt(
                "grant all on database tpch to role {0}".format(unique_role))
            self.__execute_hs2_stmt(
                "grant select on table functional.complex_view to role {0}".
                format(unique_role))
            self.__execute_hs2_stmt("grant role {0} to group {1}".format(
                unique_role,
                grp.getgrnam(getuser()).gr_name))
            # Create db with permissions
            self.__execute_hs2_stmt("create database {0}".format(unique_db))
            self.__execute_hs2_stmt(
                "grant all on database {0} to role {1}".format(
                    unique_db, unique_role))

            # Current user can't access view's underlying tables
            bad_resp = self.__execute_hs2_stmt(
                "explain select * from functional.complex_view", False)
            assert 'User \'%s\' does not have privileges to EXPLAIN' % getuser() in \
                   str(bad_resp)
            # User should not have access to the runtime profile
            self.__run_stmt_and_verify_profile_access(
                "select * from functional.complex_view", False, False)
            self.__run_stmt_and_verify_profile_access(
                "select * from functional.complex_view", False, True)

            # Repeat as a delegated user
            self.session_handle = \
                self.__open_hs2('hue', {'impala.doas.user': getuser()}).sessionHandle
            # User should not have access to the runtime profile
            self.__run_stmt_and_verify_profile_access(
                "select * from functional.complex_view", False, False)
            self.__run_stmt_and_verify_profile_access(
                "select * from functional.complex_view", False, True)

            # Create a view for which the user has access to the underlying tables.
            self.session_handle = self.__open_hs2(getuser(),
                                                  dict()).sessionHandle
            self.__execute_hs2_stmt(
                "create view if not exists {0}.customer_view as select * from tpch.customer "
                "limit 1".format(unique_db))

            # User should be able to run EXPLAIN
            self.__execute_hs2_stmt(
                "explain select * from {0}.customer_view".format(unique_db))

            # User should have access to the runtime profile and exec summary
            self.__run_stmt_and_verify_profile_access(
                "select * from {0}.customer_view".format(unique_db), True,
                False)
            self.__run_stmt_and_verify_profile_access(
                "select * from {0}.customer_view".format(unique_db), True,
                True)

            # Repeat as a delegated user
            self.session_handle = \
                self.__open_hs2('hue', {'impala.doas.user': getuser()}).sessionHandle
            # Delegated user is the current user
            self.__run_stmt_and_verify_profile_access(
                "select * from {0}.customer_view".format(unique_db), True,
                False)
            self.__run_stmt_and_verify_profile_access(
                "select * from {0}.customer_view".format(unique_db), True,
                True)
        finally:
            self.__execute_hs2_stmt(
                "grant all on server to role {0}".format(unique_role))
            self.__execute_hs2_stmt(
                "drop view if exists {0}.customer_view".format(unique_db))
            self.__execute_hs2_stmt(
                "drop table if exists {0}.customer".format(unique_db))
            self.__execute_hs2_stmt(
                "drop database if exists {0}".format(unique_db))
            self.__execute_hs2_stmt("drop role {0}".format(unique_role))

    def __run_stmt_and_verify_profile_access(self, stmt, has_access,
                                             close_operation):
        """Runs 'stmt' and retrieves the runtime profile and exec summary. If
      'has_access' is true, it verifies that no runtime profile or exec summary are
      returned. If 'close_operation' is true, make sure the operation is closed before
      retrieving the profile and exec summary."""
        from tests.hs2.test_hs2 import TestHS2
        execute_statement_resp = self.__execute_hs2_stmt(stmt, False)

        if close_operation:
            close_operation_req = TCLIService.TCloseOperationReq()
            close_operation_req.operationHandle = execute_statement_resp.operationHandle
            TestHS2.check_response(
                self.hs2_client.CloseOperation(close_operation_req))

        get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq()
        get_profile_req.operationHandle = execute_statement_resp.operationHandle
        get_profile_req.sessionHandle = self.session_handle
        get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req)

        if has_access:
            TestHS2.check_response(get_profile_resp)
            assert "Plan: " in get_profile_resp.profile
        else:
            assert "User %s is not authorized to access the runtime profile or "\
                "execution summary." % (getuser()) in str(get_profile_resp)

        exec_summary_req = ImpalaHiveServer2Service.TGetExecSummaryReq()
        exec_summary_req.operationHandle = execute_statement_resp.operationHandle
        exec_summary_req.sessionHandle = self.session_handle
        exec_summary_resp = self.hs2_client.GetExecSummary(exec_summary_req)

        if has_access:
            TestHS2.check_response(exec_summary_resp)
        else:
            assert "User %s is not authorized to access the runtime profile or "\
                "execution summary." % (getuser()) in str(exec_summary_resp)

    @SkipIf.sentry_disabled
    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=" +
        SENTRY_CONFIG_FILE,
        catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE,
        impala_log_dir=tempfile.mkdtemp(prefix="test_deprecated_none_",
                                        dir=os.getenv("LOG_DIR")))
    def test_deprecated_flag_doesnt_show(self):
        assert_no_files_in_dir_contain(
            self.impala_log_dir, "Ignoring removed flag "
            "authorization_policy_file")

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=ignored_file",
                                      impala_log_dir=tempfile.mkdtemp(
                                          prefix="test_deprecated_",
                                          dir=os.getenv("LOG_DIR")))
    def test_deprecated_flags(self):
        assert_file_in_dir_contains(
            self.impala_log_dir, "Ignoring removed flag "
            "authorization_policy_file")

    @SkipIf.sentry_disabled
    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s" %
        SENTRY_CONFIG_FILE,
        catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE,
        impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_",
                                        dir=os.getenv("LOG_DIR")))
    def test_catalog_restart(self, unique_role):
        """IMPALA-7713: Tests that a catalogd restart when authorization is enabled should
    reset the previous privileges stored in impalad's catalog to avoid stale privilege
    data in the impalad's catalog."""
        def assert_privileges():
            result = self.client.execute("show grant role %s_foo" %
                                         unique_role)
            TestAuthorization._check_privileges(
                result,
                [["database", "functional", "", "", "", "all", "false"]])

            result = self.client.execute("show grant role %s_bar" %
                                         unique_role)
            TestAuthorization._check_privileges(
                result,
                [["database", "functional_kudu", "", "", "", "all", "false"]])

            result = self.client.execute("show grant role %s_baz" %
                                         unique_role)
            TestAuthorization._check_privileges(
                result,
                [["database", "functional_avro", "", "", "", "all", "false"]])

        self.role_cleanup(unique_role)
        try:
            self.client.execute("create role %s_foo" % unique_role)
            self.client.execute("create role %s_bar" % unique_role)
            self.client.execute("create role %s_baz" % unique_role)
            self.client.execute(
                "grant all on database functional to role %s_foo" %
                unique_role)
            self.client.execute(
                "grant all on database functional_kudu to role %s_bar" %
                unique_role)
            self.client.execute(
                "grant all on database functional_avro to role %s_baz" %
                unique_role)

            assert_privileges()
            self._start_impala_cluster([
                "--catalogd_args=--sentry_config=%s" % SENTRY_CONFIG_FILE,
                "--restart_catalogd_only"
            ])
            assert_privileges()
        finally:
            self.role_cleanup(unique_role)

    def role_cleanup(self, role_name_match):
        """Cleans up any roles that match the given role name."""
        for role_name in self.client.execute("show roles").data:
            if role_name_match in role_name:
                self.client.execute("drop role %s" % role_name)

    @staticmethod
    def _check_privileges(result, expected):
        def columns(row):
            cols = row.split("\t")
            return cols[0:len(cols) - 1]

        assert map(columns, result.data) == expected

    @SkipIf.sentry_disabled
    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s" %
        SENTRY_CONFIG_FILE,
        catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE,
        impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_",
                                        dir=os.getenv("LOG_DIR")))
    def test_catalog_object(self, unique_role):
        """IMPALA-7721: Tests /catalog_object web API for principal and privilege"""
        self.role_cleanup(unique_role)
        try:
            self.client.execute("create role %s" % unique_role)
            self.client.execute(
                "grant select on database functional to role %s" % unique_role)
            for service in [
                    self.cluster.catalogd.service,
                    self.cluster.get_first_impalad().service
            ]:
                obj_dump = service.get_catalog_object_dump(
                    "PRINCIPAL", "%s.ROLE" % unique_role)
                assert "catalog_version" in obj_dump

                # Get the privilege associated with that principal ID.
                principal_id = re.search(r"principal_id \(i32\) = (\d+)",
                                         obj_dump)
                assert principal_id is not None
                obj_dump = service.get_catalog_object_dump(
                    "PRIVILEGE",
                    urllib.quote(
                        "server=server1->db=functional->action=select->grantoption=false.%s.ROLE"
                        % principal_id.group(1)))
                assert "catalog_version" in obj_dump

                # Get the principal that does not exist.
                obj_dump = service.get_catalog_object_dump(
                    "PRINCIPAL", "doesnotexist.ROLE")
                assert "CatalogException" in obj_dump

                # Get the privilege that does not exist.
                obj_dump = service.get_catalog_object_dump(
                    "PRIVILEGE",
                    urllib.quote(
                        "server=server1->db=doesntexist->action=select->grantoption=false.%s.ROLE"
                        % principal_id.group(1)))
                assert "CatalogException" in obj_dump
        finally:
            self.role_cleanup(unique_role)

    @SkipIf.sentry_disabled
    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s" %
        SENTRY_CONFIG_FILE,
        catalogd_args=
        "--sentry_config=%s --sentry_catalog_polling_frequency_s=3600" %
        SENTRY_CONFIG_FILE,
        impala_log_dir=tempfile.mkdtemp(
            prefix="test_invalidate_metadata_sentry_unavailable_",
            dir=os.getenv("LOG_DIR")))
    def test_invalidate_metadata_sentry_unavailable(self, unique_role):
        """IMPALA-7824: Tests that running INVALIDATE METADATA when Sentry is unavailable
    should not cause Impala to hang."""
        self.role_cleanup(unique_role)
        try:
            group_name = grp.getgrnam(getuser()).gr_name
            self.client.execute("create role %s" % unique_role)
            self.client.execute("grant all on server to role %s" % unique_role)
            self.client.execute("grant role %s to group `%s`" %
                                (unique_role, group_name))

            self._stop_sentry_service()
            # Calling INVALIDATE METADATA when Sentry is unavailable should return an error.
            result = self.execute_query_expect_failure(self.client,
                                                       "invalidate metadata")
            result_str = str(result)
            assert "MESSAGE: CatalogException: Error refreshing authorization policy:" \
                   in result_str
            assert "CAUSED BY: ImpalaRuntimeException: Error refreshing authorization policy." \
                   " Sentry is unavailable. Ensure Sentry is up:" in result_str

            self._start_sentry_service(SENTRY_CONFIG_FILE)
            # Calling INVALIDATE METADATA after Sentry is up should not return an error.
            self.execute_query_expect_success(self.client,
                                              "invalidate metadata")
        finally:
            self.role_cleanup(unique_role)

    @SkipIf.sentry_disabled
    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s" %
        SENTRY_CONFIG_FILE,
        catalogd_args=
        "--sentry_config=%s --sentry_catalog_polling_frequency_s=3600 " %
        SENTRY_CONFIG_FILE,
        impala_log_dir=tempfile.mkdtemp(prefix="test_refresh_authorization_",
                                        dir=os.getenv("LOG_DIR")))
    def test_refresh_authorization(self, unique_role):
        """Tests refresh authorization statement by adding and removing roles and privileges
       externally. The long Sentry polling is used so that any authorization metadata
       updated externally does not get polled by Impala in order to test an an explicit
       call to refresh authorization statement."""
        group_name = grp.getgrnam(getuser()).gr_name
        self.role_cleanup(unique_role)
        for sync_ddl in [1, 0]:
            query_options = {'sync_ddl': sync_ddl}
            clients = []
            if sync_ddl:
                # When sync_ddl is True, we want to ensure the changes are propagated to all
                # coordinators.
                for impalad in self.cluster.impalads:
                    clients.append(impalad.service.create_beeswax_client())
            else:
                clients.append(self.client)
            try:
                self.client.execute("create role %s" % unique_role)
                self.client.execute("grant role %s to group `%s`" %
                                    (unique_role, group_name))
                self.client.execute("grant refresh on server to %s" %
                                    unique_role)

                self.validate_refresh_authorization_roles(
                    unique_role, query_options, clients)
                self.validate_refresh_authorization_privileges(
                    unique_role, query_options, clients)
            finally:
                self.role_cleanup(unique_role)

    def validate_refresh_authorization_roles(self, unique_role, query_options,
                                             clients):
        """This method tests refresh authorization statement by adding and removing
       roles externally."""
        try:
            # Create two roles inside Impala.
            self.client.execute("create role %s_internal1" % unique_role)
            self.client.execute("create role %s_internal2" % unique_role)
            # Drop an existing role (_internal1) outside Impala.
            role = "%s_internal1" % unique_role
            subprocess.check_call([
                "/bin/bash", "-c",
                "%s/bin/sentryShell --conf %s/sentry-site.xml -dr -r %s" %
                (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role)
            ],
                                  stdout=sys.stdout,
                                  stderr=sys.stderr)

            result = self.execute_query_expect_success(self.client,
                                                       "show roles")
            assert any(role in x for x in result.data)
            self.execute_query_expect_success(self.client,
                                              "refresh authorization",
                                              query_options=query_options)
            for client in clients:
                result = self.execute_query_expect_success(
                    client, "show roles")
                assert not any(role in x for x in result.data)

            # Add a new role outside Impala.
            role = "%s_external" % unique_role
            subprocess.check_call([
                "/bin/bash", "-c",
                "%s/bin/sentryShell --conf %s/sentry-site.xml -cr -r %s" %
                (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role)
            ],
                                  stdout=sys.stdout,
                                  stderr=sys.stderr)

            result = self.execute_query_expect_success(self.client,
                                                       "show roles")
            assert not any(role in x for x in result.data)
            self.execute_query_expect_success(self.client,
                                              "refresh authorization",
                                              query_options=query_options)
            for client in clients:
                result = self.execute_query_expect_success(
                    client, "show roles")
                assert any(role in x for x in result.data)
        finally:
            for suffix in ["internal1", "internal2", "external"]:
                self.role_cleanup("%s_%s" % (unique_role, suffix))

    def validate_refresh_authorization_privileges(self, unique_role,
                                                  query_options, clients):
        """This method tests refresh authorization statement by adding and removing
       privileges externally."""
        # Grant select privilege outside Impala.
        subprocess.check_call([
            "/bin/bash", "-c",
            "%s/bin/sentryShell --conf %s/sentry-site.xml -gpr -p "
            "'server=server1->db=functional->table=alltypes->action=select' -r %s"
            % (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"),
               unique_role)
        ],
                              stdout=sys.stdout,
                              stderr=sys.stderr)

        # Before refresh authorization, there should only be one refresh privilege.
        result = self.execute_query_expect_success(
            self.client, "show grant role %s" % unique_role)
        assert len(result.data) == 1
        assert any("refresh" in x for x in result.data)

        for client in clients:
            self.execute_query_expect_failure(
                client, "select * from functional.alltypes limit 1")

        self.execute_query_expect_success(self.client,
                                          "refresh authorization",
                                          query_options=query_options)

        for client in clients:
            # Ensure select privilege was granted after refresh authorization.
            result = self.execute_query_expect_success(
                client, "show grant role %s" % unique_role)
            assert len(result.data) == 2
            assert any("select" in x for x in result.data)
            assert any("refresh" in x for x in result.data)
            self.execute_query_expect_success(
                client, "select * from functional.alltypes limit 1")

    @staticmethod
    def _verify_show_dbs(result,
                         unique_name,
                         visibility_privileges=PRIVILEGES):
        """ Helper function for verifying the results of SHOW DATABASES below.
    Only show databases with privileges implying any of the visibility_privileges.
    """
        for priv in PRIVILEGES:
            # Result lines are in the format of "db_name\tdb_comment"
            db_name = 'db_%s_%s\t' % (unique_name, priv)
            if priv != 'all' and priv not in visibility_privileges:
                assert db_name not in result.data
            else:
                assert db_name in result.data

    def _test_sentry_show_stmts_helper(self, unique_role, unique_name,
                                       visibility_privileges):
        unique_db = unique_name + "_db"
        # TODO: can we create and use a temp username instead of using root?
        another_user = '******'
        another_user_grp = 'root'
        self.role_cleanup(unique_role)
        try:
            self.client.execute("create role %s" % unique_role)
            self.client.execute("grant create on server to role %s" %
                                unique_role)
            self.client.execute("grant drop on server to role %s" %
                                unique_role)
            self.client.execute("grant role %s to group %s" %
                                (unique_role, grp.getgrnam(getuser()).gr_name))

            self.client.execute("drop database if exists %s cascade" %
                                unique_db)
            self.client.execute("create database %s" % unique_db)
            for priv in PRIVILEGES:
                self.client.execute("create database db_%s_%s" %
                                    (unique_name, priv))
                self.client.execute(
                    "grant {0} on database db_{1}_{2} to role {3}".format(
                        priv, unique_name, priv, unique_role))
                self.client.execute("create table %s.tbl_%s (i int)" %
                                    (unique_db, priv))
                self.client.execute(
                    "grant {0} on table {1}.tbl_{2} to role {3}".format(
                        priv, unique_db, priv, unique_role))
            self.client.execute("grant role %s to group %s" %
                                (unique_role, another_user_grp))

            # Owner (current user) can still see all the owned databases and tables
            result = self.client.execute("show databases")
            TestAuthorization._verify_show_dbs(result, unique_name)
            result = self.client.execute("show tables in %s" % unique_db)
            assert result.data == ["tbl_%s" % p for p in PRIVILEGES]

            # Check SHOW DATABASES and SHOW TABLES using another username
            # Create another client so we can user another username
            root_impalad_client = self.create_impala_client()
            result = self.execute_query_expect_success(root_impalad_client,
                                                       "show databases",
                                                       user=another_user)
            TestAuthorization._verify_show_dbs(result, unique_name,
                                               visibility_privileges)
            result = self.execute_query_expect_success(root_impalad_client,
                                                       "show tables in %s" %
                                                       unique_db,
                                                       user=another_user)
            # Only show tables with privileges implying any of the visibility privileges
            assert 'tbl_all' in result.data  # ALL can imply to any privilege
            for p in visibility_privileges:
                assert 'tbl_%s' % p in result.data
        finally:
            self.client.execute("drop database if exists %s cascade" %
                                unique_db)
            for priv in PRIVILEGES:
                self.client.execute(
                    "drop database if exists db_%s_%s cascade" %
                    (unique_name, priv))
            self.role_cleanup(unique_role)

    @SkipIf.sentry_disabled
    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s "
        "--authorized_proxy_user_config=%s=* "
        "--min_privilege_set_for_show_stmts=select" %
        (SENTRY_CONFIG_FILE, getuser()),
        catalogd_args="--sentry_config={0}".format(SENTRY_CONFIG_FILE),
        sentry_config=SENTRY_CONFIG_FILE_OO,  # Enable Sentry Object Ownership
        sentry_log_dir="{0}/test_sentry_show_stmts_with_select".format(
            SENTRY_BASE_LOG_DIR))
    def test_sentry_show_stmts_with_select(self, unique_role, unique_name):
        self._test_sentry_show_stmts_helper(unique_role, unique_name,
                                            ['select'])

    @SkipIf.sentry_disabled
    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s "
        "--authorized_proxy_user_config=%s=* "
        "--min_privilege_set_for_show_stmts=select,insert" %
        (SENTRY_CONFIG_FILE, getuser()),
        catalogd_args="--sentry_config={0}".format(SENTRY_CONFIG_FILE),
        sentry_config=SENTRY_CONFIG_FILE_OO,  # Enable Sentry Object Ownership
        sentry_log_dir="{0}/test_sentry_show_stmts_with_select_insert".format(
            SENTRY_BASE_LOG_DIR))
    def test_sentry_show_stmts_with_select_insert(self, unique_role,
                                                  unique_name):
        self._test_sentry_show_stmts_helper(unique_role, unique_name,
                                            ['select', 'insert'])

    @SkipIf.sentry_disabled
    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s "
        "--authorized_proxy_user_config=%s=* "
        "--min_privilege_set_for_show_stmts=any" %
        (SENTRY_CONFIG_FILE, getuser()),
        catalogd_args="--sentry_config={0}".format(SENTRY_CONFIG_FILE),
        sentry_config=SENTRY_CONFIG_FILE_OO,  # Enable Sentry Object Ownership
        sentry_log_dir="{0}/test_sentry_show_stmts_with_any".format(
            SENTRY_BASE_LOG_DIR))
    def test_sentry_show_stmts_with_any(self, unique_role, unique_name):
        self._test_sentry_show_stmts_helper(unique_role, unique_name,
                                            PRIVILEGES)

    def _test_ranger_show_stmts_helper(self, unique_name,
                                       visibility_privileges):
        unique_db = unique_name + "_db"
        admin_client = self.create_impala_client()
        try:
            admin_client.execute("drop database if exists %s cascade" %
                                 unique_db,
                                 user=ADMIN)
            admin_client.execute("create database %s" % unique_db, user=ADMIN)
            for priv in PRIVILEGES:
                admin_client.execute("create database db_%s_%s" %
                                     (unique_name, priv))
                admin_client.execute(
                    "grant {0} on database db_{1}_{2} to user {3}".format(
                        priv, unique_name, priv, getuser()))
                admin_client.execute("create table %s.tbl_%s (i int)" %
                                     (unique_db, priv))
                admin_client.execute(
                    "grant {0} on table {1}.tbl_{2} to user {3}".format(
                        priv, unique_db, priv, getuser()))

            # Admin can still see all the databases and tables
            result = admin_client.execute("show databases")
            TestAuthorization._verify_show_dbs(result, unique_name)
            result = admin_client.execute("show tables in %s" % unique_db)
            assert result.data == ["tbl_%s" % p for p in PRIVILEGES]

            # Check SHOW DATABASES and SHOW TABLES using another username
            result = self.client.execute("show databases")
            TestAuthorization._verify_show_dbs(result, unique_name,
                                               visibility_privileges)
            result = self.client.execute("show tables in %s" % unique_db)
            # Only show tables with privileges implying any of the visibility privileges
            assert 'tbl_all' in result.data  # ALL can imply to any privilege
            for p in visibility_privileges:
                assert 'tbl_%s' % p in result.data
        finally:
            admin_client.execute("drop database if exists %s cascade" %
                                 unique_db)
            for priv in PRIVILEGES:
                admin_client.execute(
                    "drop database if exists db_%s_%s cascade" %
                    (unique_name, priv))

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger "
        "--min_privilege_set_for_show_stmts=select",
        catalogd_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger")
    def test_ranger_show_stmts_with_select(self, unique_name):
        self._test_ranger_show_stmts_helper(unique_name, ['select'])

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger "
        "--min_privilege_set_for_show_stmts=select,insert",
        catalogd_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger")
    def test_ranger_show_stmts_with_select_insert(self, unique_name):
        self._test_ranger_show_stmts_helper(unique_name, ['select', 'insert'])

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger "
        "--min_privilege_set_for_show_stmts=any",
        catalogd_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger")
    def test_ranger_show_stmts_with_any(self, unique_name):
        self._test_ranger_show_stmts_helper(unique_name, PRIVILEGES)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger "
        "--num_check_authorization_threads=%d" % (random.randint(2, 128)),
        catalogd_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger")
    def test_num_check_authorization_threads_with_ranger(self, unique_name):
        self._test_ranger_show_stmts_helper(unique_name, PRIVILEGES)

    @SkipIf.sentry_disabled
    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s "
        "--authorized_proxy_user_config=%s=* "
        "--num_check_authorization_threads=%d" %
        (SENTRY_CONFIG_FILE, getuser(), random.randint(2, 128)),
        catalogd_args="--sentry_config={0}".format(SENTRY_CONFIG_FILE),
        sentry_config=SENTRY_CONFIG_FILE_OO,  # Enable Sentry Object Ownership
        sentry_log_dir="{0}/test_num_check_authorization_threads_with_sentry".
        format(SENTRY_BASE_LOG_DIR))
    def test_num_check_authorization_threads_with_sentry(
            self, unique_role, unique_name):
        self._test_sentry_show_stmts_helper(unique_role, unique_name,
                                            PRIVILEGES)
Beispiel #11
0
class HS2TestSuite(ImpalaTestSuite):
    # This DB will be created/dropped for every HS2TestSuite subclass. Make the name unique
    # so different test suites don't clobber each other's DBs. The [2:] is to remove the
    # "0." from the random floating-point number.
    TEST_DB = "hs2_db" + str(random.random())[2:]

    HS2_V6_COLUMN_TYPES = ["boolVal", "stringVal", "byteVal", "i16Val", "i32Val", "i64Val", "doubleVal", "binaryVal"]

    def setup(self):
        self.cleanup_db(self.TEST_DB)
        host, port = IMPALAD_HS2_HOST_PORT.split(":")
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)
        self.client.execute("create database %s" % self.TEST_DB)

    def teardown(self):
        self.cleanup_db(self.TEST_DB)
        if self.socket:
            self.socket.close()

    @staticmethod
    def check_response(
        response, expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix=None
    ):
        assert response.status.statusCode == expected_status_code
        if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS and expected_error_prefix is not None:
            assert response.status.errorMessage.startswith(expected_error_prefix)

    def close(self, op_handle):
        close_op_req = TCLIService.TCloseOperationReq()
        close_op_req.operationHandle = op_handle
        close_op_resp = self.hs2_client.CloseOperation(close_op_req)
        assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS

    def get_num_rows(self, result_set):
        # rows will always be set, so the only way to tell if we should use it is to see if
        # any columns are set
        if result_set.columns is None or len(result_set.columns) == 0:
            return len(result_set.rows)

        assert result_set.columns is not None
        for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
            typed_col = getattr(result_set.columns[0], col_type)
            if typed_col != None:
                return len(typed_col.values)

        assert False

    def fetch(self, handle, orientation, size, expected_num_rows=None):
        """Fetches at most size number of rows from the query identified by the given
    operation handle. Uses the given fetch orientation. Asserts that the fetch returns
    a success status, and that the number of rows returned is equal to size, or
    equal to the given expected_num_rows (if one was given)."""
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = size
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp)
        num_rows = size
        if expected_num_rows is not None:
            num_rows = expected_num_rows
        assert self.get_num_rows(fetch_results_resp.results) == num_rows
        return fetch_results_resp

    def fetch_until(self, handle, orientation, size):
        """Tries to fetch exactly 'size' rows from the given query handle, with the given
    fetch orientation. If fewer rows than 'size' are returned by the first fetch, repeated
    fetches are issued until either 0 rows are returned, or the number of rows fetched is
    equal to 'size'"""
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = size
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp)
        num_rows = size
        num_rows_fetched = self.get_num_rows(fetch_results_resp.results)
        while num_rows_fetched < size:
            fetch_results_req.maxRows = size - num_rows_fetched
            fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
            HS2TestSuite.check_response(fetch_results_resp)
            last_fetch_size = self.get_num_rows(fetch_results_resp.results)
            assert last_fetch_size > 0
            num_rows_fetched += last_fetch_size

        assert num_rows_fetched == size

    def fetch_fail(self, handle, orientation, expected_error_prefix):
        """Attempts to fetch rows from the query identified by the given operation handle.
    Asserts that the fetch returns an error with an error message matching the given
    expected_error_prefix."""
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = 100
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix)
        return fetch_results_resp
class TestAuthorization(CustomClusterTestSuite):
  AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR'))

  def setup(self):
    host, port = (self.cluster.impalads[0].service.hostname,
                  self.cluster.impalads[0].service.hs2_port)
    self.socket = TSocket(host, port)
    self.transport = TBufferedTransport(self.socket)
    self.transport.open()
    self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
    self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

  def teardown(self):
    if self.socket:
      self.socket.close()
    shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True)

  def __execute_hs2_stmt(self, statement, verify=True):
    """
    Executes an hs2 statement

    :param statement: the statement to execute
    :param verify: If set to true, will thrown an exception on a failed hs2 execution
    :return: the result of execution
    """
    from tests.hs2.test_hs2 import TestHS2
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = statement
    result = self.hs2_client.ExecuteStatement(execute_statement_req)
    if verify:
      TestHS2.check_response(result)
    return result

  def __open_hs2(self, user, configuration, verify=True):
    """
    Open a session with hs2

    :param user: the user to open the session
    :param configuration: the configuration for the session
    :param verify: If set to true, will thrown an exception on failed session open
    :return: the result of opening the session
    """
    from tests.hs2.test_hs2 import TestHS2
    open_session_req = TCLIService.TOpenSessionReq()
    open_session_req.username = user
    open_session_req.configuration = configuration
    resp = self.hs2_client.OpenSession(open_session_req)
    if verify:
      TestHS2.check_response(resp)
    return resp

  @pytest.mark.execute_serially
  @CustomClusterTestSuite.with_args(
      impalad_args="--server_name=server1 "
                   "--sentry_config={0} "
                   "--authorization_policy_provider_class="
                   "org.apache.impala.service.CustomClusterResourceAuthorizationProvider"
                   .format(SENTRY_CONFIG_FILE),
      catalogd_args="--sentry_config={0} "
                    "--authorization_policy_provider_class="
                    "org.apache.impala.service.CustomClusterResourceAuthorizationProvider"
                    .format(SENTRY_CONFIG_FILE),
      sentry_config=SENTRY_CONFIG_FILE)
  def test_custom_authorization_provider(self, unique_role):
    try:
      self.session_handle = self.__open_hs2(getuser(), dict()).sessionHandle
      self.__execute_hs2_stmt("create role {0}".format(unique_role))
      self.__execute_hs2_stmt("grant role {0} to group {1}"
                              .format(unique_role, grp.getgrnam(getuser()).gr_name))
      self.__execute_hs2_stmt("grant select on table tpch.lineitem to role {0}"
                              .format(unique_role))

      bad_resp = self.__execute_hs2_stmt("describe tpch_seq.lineitem", False)
      assert 'User \'%s\' does not have privileges to access' % getuser() in \
             str(bad_resp)
      self.__execute_hs2_stmt("describe tpch.lineitem")
    finally:
      self.__execute_hs2_stmt("drop role {0}".format(unique_role))

  @pytest.mark.execute_serially
  @CustomClusterTestSuite.with_args(
      impalad_args="--server_name=server1 "
                   "--authorized_proxy_user_config=hue={0}".format(getuser()),
      catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE,
      sentry_config=SENTRY_CONFIG_FILE)
  def test_access_runtime_profile(self, unique_role, unique_name):
    unique_db = unique_name + "_db"

    try:
      self.session_handle = self.__open_hs2(getuser(), dict()).sessionHandle
      self.__execute_hs2_stmt("create role {0}".format(unique_role))
      self.__execute_hs2_stmt("grant create on server to role {0}".format(unique_role))
      self.__execute_hs2_stmt("grant all on database tpch to role {0}"
                              .format(unique_role))
      self.__execute_hs2_stmt("grant select on table functional.complex_view to role {0}"
                              .format(unique_role))
      self.__execute_hs2_stmt("grant role {0} to group {1}"
                              .format(unique_role, grp.getgrnam(getuser()).gr_name))
      # Create db with permissions
      self.__execute_hs2_stmt("create database {0}".format(unique_db))
      self.__execute_hs2_stmt("grant all on database {0} to role {1}"
                              .format(unique_db, unique_role))

      # Current user can't access view's underlying tables
      bad_resp = self.__execute_hs2_stmt("explain select * from functional.complex_view",
                                         False)
      assert 'User \'%s\' does not have privileges to EXPLAIN' % getuser() in \
             str(bad_resp)
      # User should not have access to the runtime profile
      self.__run_stmt_and_verify_profile_access("select * from functional.complex_view",
                                                False, False)
      self.__run_stmt_and_verify_profile_access("select * from functional.complex_view",
                                                False, True)

      # Repeat as a delegated user
      self.session_handle = \
          self.__open_hs2('hue', {'impala.doas.user': getuser()}).sessionHandle
      # User should not have access to the runtime profile
      self.__run_stmt_and_verify_profile_access("select * from functional.complex_view",
                                                False, False)
      self.__run_stmt_and_verify_profile_access("select * from functional.complex_view",
                                                False, True)

      # Create a view for which the user has access to the underlying tables.
      self.session_handle = self.__open_hs2(getuser(), dict()).sessionHandle
      self.__execute_hs2_stmt(
          "create view if not exists {0}.customer_view as select * from tpch.customer "
          "limit 1".format(unique_db))

      # User should be able to run EXPLAIN
      self.__execute_hs2_stmt("explain select * from {0}.customer_view"
                              .format(unique_db))

      # User should have access to the runtime profile and exec summary
      self.__run_stmt_and_verify_profile_access("select * from {0}.customer_view"
                                                .format(unique_db), True, False)
      self.__run_stmt_and_verify_profile_access("select * from {0}.customer_view"
                                                .format(unique_db), True, True)

      # Repeat as a delegated user
      self.session_handle = \
          self.__open_hs2('hue', {'impala.doas.user': getuser()}).sessionHandle
      # Delegated user is the current user
      self.__run_stmt_and_verify_profile_access("select * from {0}.customer_view"
                                                .format(unique_db), True, False)
      self.__run_stmt_and_verify_profile_access("select * from {0}.customer_view"
                                                .format(unique_db), True, True)
    finally:
      self.__execute_hs2_stmt("grant all on server to role {0}".format(unique_role))
      self.__execute_hs2_stmt("drop view if exists {0}.customer_view".format(unique_db))
      self.__execute_hs2_stmt("drop table if exists {0}.customer".format(unique_db))
      self.__execute_hs2_stmt("drop database if exists {0}".format(unique_db))
      self.__execute_hs2_stmt("drop role {0}".format(unique_role))

  @pytest.mark.execute_serially
  @CustomClusterTestSuite.with_args(
      impalad_args="--server_name=server1 "
                   "--authorized_proxy_user_config=foo=bar;hue={0} "
                   "--abort_on_failed_audit_event=false "
                   "--audit_event_log_dir={1}"
                   .format(getuser(), AUDIT_LOG_DIR),
      catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE,
      sentry_config=SENTRY_CONFIG_FILE)
  def test_user_impersonation(self, unique_role):
    """End-to-end user impersonation + authorization test"""
    self.__test_impersonation(unique_role)

  @pytest.mark.execute_serially
  @CustomClusterTestSuite.with_args(
      impalad_args="--server_name=server1 "
                   "--authorized_proxy_user_config=hue=bar "
                   "--authorized_proxy_group_config=foo=bar;hue={0} "
                   "--abort_on_failed_audit_event=false "
                   "--audit_event_log_dir={1}"
                   .format(grp.getgrgid(os.getgid()).gr_name, AUDIT_LOG_DIR),
      catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE,
      sentry_config=SENTRY_CONFIG_FILE)
  def test_group_impersonation(self, unique_role):
    """End-to-end group impersonation + authorization test"""
    self.__test_impersonation(unique_role)

  @pytest.mark.execute_serially
  @CustomClusterTestSuite.with_args("--server_name=server1\
        --authorization_policy_file=%s\
        --authorized_proxy_user_config=foo=bar\
        --authorized_proxy_group_config=foo=bar\
        --abort_on_failed_audit_event=false\
        --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, AUDIT_LOG_DIR))
  def test_no_matching_user_and_group_impersonation(self):
    open_session_req = TCLIService.TOpenSessionReq()
    open_session_req.username = '******'
    open_session_req.configuration = dict()
    open_session_req.configuration['impala.doas.user'] = '******'
    resp = self.hs2_client.OpenSession(open_session_req)
    assert 'User \'hue\' is not authorized to delegate to \'abc\'' in str(resp)

  def __test_impersonation(self, role):
    """End-to-end impersonation + authorization test. Expects authorization to be
    configured before running this test"""
    # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import
    # the module within this test function, rather than as a top-level import. This way
    # the tests in that module will not get pulled when executing this test suite. The fix
    # is to split the utility code out of the TestHS2 class and support HS2 as a first
    # class citizen in our test framework.
    from tests.hs2.test_hs2 import TestHS2

    try:
      self.session_handle = self.__open_hs2(getuser(), dict()).sessionHandle
      self.__execute_hs2_stmt("create role {0}".format(role))
      self.__execute_hs2_stmt("grant all on table tpch.lineitem to role {0}"
                              .format(role))
      self.__execute_hs2_stmt("grant role {0} to group {1}"
                              .format(role, grp.getgrnam(getuser()).gr_name))
      self.__execute_hs2_stmt("grant role {0} to group {1}"
                              .format(role, grp.getgrgid(os.getgid()).gr_name))

      # Try to query a table we are not authorized to access
      self.session_handle = self.__open_hs2('hue',
                                            {'impala.doas.user': getuser()}).sessionHandle
      bad_resp = self.__execute_hs2_stmt("describe tpch_seq.lineitem", False)
      assert 'User \'%s\' does not have privileges to access' % getuser() in\
          str(bad_resp)

      assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\
          'No matching audit event recorded in time window'

      # Now try the same operation on a table we are authorized to access.
      good_resp = self.__execute_hs2_stmt("describe tpch.lineitem")
      TestHS2.check_response(good_resp)

      # Verify the correct user information is in the runtime profile
      query_id = operation_id_to_query_id(
          good_resp.operationHandle.operationId)
      profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id)
      self.__verify_profile_user_fields(profile_page, effective_user=getuser(),
          delegated_user=getuser(), connected_user='******')

      # Try to user we are not authorized to delegate to.
      resp = self.__open_hs2('hue', {'impala.doas.user': '******'}, False)
      assert 'User \'hue\' is not authorized to delegate to \'some_user\'' in str(resp)

      # Create a new session which does not have a do_as_user and run a simple query.
      self.session_handle = self.__open_hs2('hue', dict()).sessionHandle
      resp = self.__execute_hs2_stmt("select 1")

      # Verify the correct user information is in the runtime profile. Since there is
      # no do_as_user the Delegated User field should be empty.
      query_id = operation_id_to_query_id(resp.operationHandle.operationId)

      profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id)
      self.__verify_profile_user_fields(profile_page, effective_user='******',
          delegated_user='', connected_user='******')
    finally:
      self.session_handle = self.__open_hs2(getuser(), dict()).sessionHandle
      self.__execute_hs2_stmt("grant all on server to role {0}".format(role))
      self.__execute_hs2_stmt("grant role {0} to group {1}"
                              .format(role, grp.getgrnam(getuser()).gr_name))
      self.__execute_hs2_stmt("drop role {0}".format(role))

  def __verify_profile_user_fields(self, profile_str, effective_user, connected_user,
      delegated_user):
    """Verifies the given runtime profile string contains the specified values for
    User, Connected User, and Delegated User"""
    assert '\n    User: %s\n' % effective_user in profile_str
    assert '\n    Connected User: %s\n' % connected_user in profile_str
    assert '\n    Delegated User: %s\n' % delegated_user in profile_str

  def __wait_for_audit_record(self, user, impersonator, timeout_secs=30):
    """Waits until an audit log record is found that contains the given user and
    impersonator, or until the timeout is reached.
    """
    # The audit event might not show up immediately (the audit logs are flushed to disk
    # on regular intervals), so poll the audit event logs until a matching record is
    # found.
    start_time = time()
    while time() - start_time < timeout_secs:
      for audit_file_name in os.listdir(self.AUDIT_LOG_DIR):
        if self.__find_matching_audit_record(audit_file_name, user, impersonator):
          return True
      sleep(1)
    return False

  def __find_matching_audit_record(self, audit_file_name, user, impersonator):
    with open(os.path.join(self.AUDIT_LOG_DIR, audit_file_name)) as audit_log_file:
      for line in audit_log_file.readlines():
          json_dict = json.loads(line)
          if len(json_dict) == 0: continue
          if json_dict[min(json_dict)]['user'] == user and\
              json_dict[min(json_dict)]['impersonator'] == impersonator:
            return True
    return False

  def __run_stmt_and_verify_profile_access(self, stmt, has_access, close_operation):
    """Runs 'stmt' and retrieves the runtime profile and exec summary. If
      'has_access' is true, it verifies that no runtime profile or exec summary are
      returned. If 'close_operation' is true, make sure the operation is closed before
      retrieving the profile and exec summary."""
    from tests.hs2.test_hs2 import TestHS2
    execute_statement_resp = self.__execute_hs2_stmt(stmt, False)

    if close_operation:
      close_operation_req = TCLIService.TCloseOperationReq()
      close_operation_req.operationHandle = execute_statement_resp.operationHandle
      TestHS2.check_response(self.hs2_client.CloseOperation(close_operation_req))

    get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq()
    get_profile_req.operationHandle = execute_statement_resp.operationHandle
    get_profile_req.sessionHandle = self.session_handle
    get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req)

    if has_access:
      TestHS2.check_response(get_profile_resp)
      assert "Plan: " in get_profile_resp.profile
    else:
      assert "User %s is not authorized to access the runtime profile or "\
          "execution summary." % (getuser()) in str(get_profile_resp)

    exec_summary_req = ImpalaHiveServer2Service.TGetExecSummaryReq()
    exec_summary_req.operationHandle = execute_statement_resp.operationHandle
    exec_summary_req.sessionHandle = self.session_handle
    exec_summary_resp = self.hs2_client.GetExecSummary(exec_summary_req)

    if has_access:
      TestHS2.check_response(exec_summary_resp)
    else:
      assert "User %s is not authorized to access the runtime profile or "\
          "execution summary." % (getuser()) in str(exec_summary_resp)

  @pytest.mark.execute_serially
  @CustomClusterTestSuite.with_args(
      impalad_args="--server_name=server1 --sentry_config=" + SENTRY_CONFIG_FILE,
      catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE,
      impala_log_dir=tempfile.mkdtemp(prefix="test_deprecated_none_",
      dir=os.getenv("LOG_DIR")))
  def test_deprecated_flag_doesnt_show(self):
    assert_no_files_in_dir_contain(self.impala_log_dir, "Ignoring removed flag "
                                                        "authorization_policy_file")

  @pytest.mark.execute_serially
  @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=%s" % (AUTH_POLICY_FILE),
      impala_log_dir=tempfile.mkdtemp(prefix="test_deprecated_",
      dir=os.getenv("LOG_DIR")))
  def test_deprecated_flags(self):
    assert_file_in_dir_contains(self.impala_log_dir, "Ignoring removed flag "
                                                     "authorization_policy_file")

  @pytest.mark.execute_serially
  @CustomClusterTestSuite.with_args(
    impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE,
    catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE,
    impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_",
                                    dir=os.getenv("LOG_DIR")))
  def test_catalog_restart(self, unique_role):
    """IMPALA-7713: Tests that a catalogd restart when authorization is enabled should
    reset the previous privileges stored in impalad's catalog to avoid stale privilege
    data in the impalad's catalog."""
    def assert_privileges():
      result = self.client.execute("show grant role %s_foo" % unique_role)
      TestAuthorization._check_privileges(result, [["database", "functional",
                                                    "", "", "", "all", "false"]])

      result = self.client.execute("show grant role %s_bar" % unique_role)
      TestAuthorization._check_privileges(result, [["database", "functional_kudu",
                                                    "", "", "", "all", "false"]])

      result = self.client.execute("show grant role %s_baz" % unique_role)
      TestAuthorization._check_privileges(result, [["database", "functional_avro",
                                                    "", "", "", "all", "false"]])

    self.role_cleanup(unique_role)
    try:
      self.client.execute("create role %s_foo" % unique_role)
      self.client.execute("create role %s_bar" % unique_role)
      self.client.execute("create role %s_baz" % unique_role)
      self.client.execute("grant all on database functional to role %s_foo" %
                          unique_role)
      self.client.execute("grant all on database functional_kudu to role %s_bar" %
                          unique_role)
      self.client.execute("grant all on database functional_avro to role %s_baz" %
                          unique_role)

      assert_privileges()
      self._start_impala_cluster(["--catalogd_args=--sentry_config=%s" %
                                  SENTRY_CONFIG_FILE, "--restart_catalogd_only"])
      assert_privileges()
    finally:
      self.role_cleanup(unique_role)

  def role_cleanup(self, role_name_match):
    """Cleans up any roles that match the given role name."""
    for role_name in self.client.execute("show roles").data:
      if role_name_match in role_name:
        self.client.execute("drop role %s" % role_name)

  @staticmethod
  def _check_privileges(result, expected):
    def columns(row):
      cols = row.split("\t")
      return cols[0:len(cols) - 1]
    assert map(columns, result.data) == expected

  @pytest.mark.execute_serially
  @CustomClusterTestSuite.with_args(
    impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE,
    catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE,
    impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_",
                                    dir=os.getenv("LOG_DIR")))
  def test_catalog_object(self, unique_role):
    """IMPALA-7721: Tests /catalog_object web API for principal and privilege"""
    self.role_cleanup(unique_role)
    try:
      self.client.execute("create role %s" % unique_role)
      self.client.execute("grant select on database functional to role %s" % unique_role)
      for service in [self.cluster.catalogd.service,
                      self.cluster.get_first_impalad().service]:
        obj_dump = service.get_catalog_object_dump("PRINCIPAL", "%s.ROLE" % unique_role)
        assert "catalog_version" in obj_dump

        # Get the privilege associated with that principal ID.
        principal_id = re.search(r"principal_id \(i32\) = (\d+)", obj_dump)
        assert principal_id is not None
        obj_dump = service.get_catalog_object_dump("PRIVILEGE", urllib.quote(
            "server=server1->db=functional->action=select->grantoption=false.%s.ROLE" %
            principal_id.group(1)))
        assert "catalog_version" in obj_dump

        # Get the principal that does not exist.
        obj_dump = service.get_catalog_object_dump("PRINCIPAL", "doesnotexist.ROLE")
        assert "CatalogException" in obj_dump

        # Get the privilege that does not exist.
        obj_dump = service.get_catalog_object_dump("PRIVILEGE", urllib.quote(
            "server=server1->db=doesntexist->action=select->grantoption=false.%s.ROLE" %
            principal_id.group(1)))
        assert "CatalogException" in obj_dump
    finally:
      self.role_cleanup(unique_role)

  @pytest.mark.execute_serially
  @CustomClusterTestSuite.with_args(
    impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE,
    catalogd_args="--sentry_config=%s --sentry_catalog_polling_frequency_s=3600" %
                  SENTRY_CONFIG_FILE,
    impala_log_dir=tempfile.mkdtemp(prefix="test_invalidate_metadata_sentry_unavailable_",
                                    dir=os.getenv("LOG_DIR")))
  def test_invalidate_metadata_sentry_unavailable(self, unique_role):
    """IMPALA-7824: Tests that running INVALIDATE METADATA when Sentry is unavailable
    should not cause Impala to hang."""
    self.role_cleanup(unique_role)
    try:
      group_name = grp.getgrnam(getuser()).gr_name
      self.client.execute("create role %s" % unique_role)
      self.client.execute("grant all on server to role %s" % unique_role)
      self.client.execute("grant role %s to group `%s`" % (unique_role, group_name))

      self._stop_sentry_service()
      # Calling INVALIDATE METADATA when Sentry is unavailable should return an error.
      result = self.execute_query_expect_failure(self.client, "invalidate metadata")
      result_str = str(result)
      assert "MESSAGE: CatalogException: Error refreshing authorization policy:" \
             in result_str
      assert "CAUSED BY: ImpalaRuntimeException: Error refreshing authorization policy." \
             " Sentry is unavailable. Ensure Sentry is up:" in result_str

      self._start_sentry_service(SENTRY_CONFIG_FILE)
      # Calling INVALIDATE METADATA after Sentry is up should not return an error.
      self.execute_query_expect_success(self.client, "invalidate metadata")
    finally:
      self.role_cleanup(unique_role)

  @pytest.mark.execute_serially
  @CustomClusterTestSuite.with_args(
      impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE,
      catalogd_args="--sentry_config=%s --sentry_catalog_polling_frequency_s=3600 " %
                    SENTRY_CONFIG_FILE,
      impala_log_dir=tempfile.mkdtemp(prefix="test_refresh_authorization_",
                                      dir=os.getenv("LOG_DIR")))
  def test_refresh_authorization(self, unique_role):
    """Tests refresh authorization statement by adding and removing roles and privileges
       externally. The long Sentry polling is used so that any authorization metadata
       updated externally does not get polled by Impala in order to test an an explicit
       call to refresh authorization statement."""
    group_name = grp.getgrnam(getuser()).gr_name
    self.role_cleanup(unique_role)
    for sync_ddl in [1, 0]:
      query_options = {'sync_ddl': sync_ddl}
      clients = []
      if sync_ddl:
        # When sync_ddl is True, we want to ensure the changes are propagated to all
        # coordinators.
        for impalad in self.cluster.impalads:
          clients.append(impalad.service.create_beeswax_client())
      else:
        clients.append(self.client)
      try:
        self.client.execute("create role %s" % unique_role)
        self.client.execute("grant role %s to group `%s`" % (unique_role, group_name))
        self.client.execute("grant refresh on server to %s" % unique_role)

        self.validate_refresh_authorization_roles(unique_role, query_options, clients)
        self.validate_refresh_authorization_privileges(unique_role, query_options,
                                                       clients)
      finally:
        self.role_cleanup(unique_role)

  def validate_refresh_authorization_roles(self, unique_role, query_options, clients):
    """This method tests refresh authorization statement by adding and removing
       roles externally."""
    try:
      # Create two roles inside Impala.
      self.client.execute("create role %s_internal1" % unique_role)
      self.client.execute("create role %s_internal2" % unique_role)
      # Drop an existing role (_internal1) outside Impala.
      role = "%s_internal1" % unique_role
      subprocess.check_call(
        ["/bin/bash", "-c",
         "%s/bin/sentryShell --conf %s/sentry-site.xml -dr -r %s" %
         (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role)],
        stdout=sys.stdout, stderr=sys.stderr)

      result = self.execute_query_expect_success(self.client, "show roles")
      assert any(role in x for x in result.data)
      self.execute_query_expect_success(self.client, "refresh authorization",
                                        query_options=query_options)
      for client in clients:
        result = self.execute_query_expect_success(client, "show roles")
        assert not any(role in x for x in result.data)

      # Add a new role outside Impala.
      role = "%s_external" % unique_role
      subprocess.check_call(
          ["/bin/bash", "-c",
           "%s/bin/sentryShell --conf %s/sentry-site.xml -cr -r %s" %
           (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role)],
          stdout=sys.stdout, stderr=sys.stderr)

      result = self.execute_query_expect_success(self.client, "show roles")
      assert not any(role in x for x in result.data)
      self.execute_query_expect_success(self.client, "refresh authorization",
                                        query_options=query_options)
      for client in clients:
        result = self.execute_query_expect_success(client, "show roles")
        assert any(role in x for x in result.data)
    finally:
      for suffix in ["internal1", "internal2", "external"]:
        self.role_cleanup("%s_%s" % (unique_role, suffix))

  def validate_refresh_authorization_privileges(self, unique_role, query_options,
                                                clients):
    """This method tests refresh authorization statement by adding and removing
       privileges externally."""
    # Grant select privilege outside Impala.
    subprocess.check_call(
        ["/bin/bash", "-c",
         "%s/bin/sentryShell --conf %s/sentry-site.xml -gpr -p "
         "'server=server1->db=functional->table=alltypes->action=select' -r %s" %
         (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), unique_role)],
        stdout=sys.stdout, stderr=sys.stderr)

    # Before refresh authorization, there should only be one refresh privilege.
    result = self.execute_query_expect_success(self.client, "show grant role %s" %
                                               unique_role)
    assert len(result.data) == 1
    assert any("refresh" in x for x in result.data)

    for client in clients:
      self.execute_query_expect_failure(client,
                                        "select * from functional.alltypes limit 1")

    self.execute_query_expect_success(self.client, "refresh authorization",
                                      query_options=query_options)

    for client in clients:
      # Ensure select privilege was granted after refresh authorization.
      result = self.execute_query_expect_success(client, "show grant role %s" %
                                                 unique_role)
      assert len(result.data) == 2
      assert any("select" in x for x in result.data)
      assert any("refresh" in x for x in result.data)
      self.execute_query_expect_success(client,
                                        "select * from functional.alltypes limit 1")
class TestAuthorization(CustomClusterTestSuite):
    AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR'))

    def setup(self):
        host, port = IMPALAD_HS2_HOST_PORT.split(":")
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = TCLIService.Client(self.protocol)

    def teardown(self):
        if self.socket:
            self.socket.close()
        shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        "--server_name=server1\
      --authorization_policy_file=/test-warehouse/authz-policy.ini\
      --authorization_policy_provider_class=%s" %
        "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider"
    )
    def test_custom_authorization_provider(self):
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        # User is 'test_user' (defined in the authorization policy file)
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Try to query a table we are not authorized to access.
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch_seq.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        assert 'User \'%s\' does not have privileges to access' % 'test_user' in\
            str(execute_statement_resp)

        # Now try the same operation on a table we are authorized to access.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=/test-warehouse/authz-policy.ini\
      --authorized_proxy_user_config=hue=%s\
      --audit_event_log_dir=%s" % (getuser(), AUDIT_LOG_DIR))
    def test_impersonation(self):
        """End-to-end impersonation + authorization test. Expects authorization to be
    configured before running this test"""
        # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import
        # the module within this test function, rather than as a top-level import. This way
        # the tests in that module will not get pulled when executing this test suite. The fix
        # is to split the utility code out of the TestHS2 class and support HS2 as a first
        # class citizen in our test framework.
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        # Connected user is 'hue'
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        # Delegated user is the current user
        open_session_req.configuration['impala.doas.user'] = getuser()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Try to query a table we are not authorized to access.
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch_seq.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        assert 'User \'%s\' does not have privileges to access' % getuser() in\
            str(execute_statement_resp)

        assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\
            'No matching audit event recorded in time window'

        # Now try the same operation on a table we are authorized to access.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)

        TestHS2.check_response(execute_statement_resp)

        # Verify the correct user information is in the runtime profile
        query_id = operation_id_to_query_id(
            execute_statement_resp.operationHandle.operationId)
        profile_page = self.cluster.impalads[
            0].service.read_query_profile_page(query_id)
        self.__verify_profile_user_fields(profile_page,
                                          effective_user=getuser(),
                                          delegated_user=getuser(),
                                          connected_user='******')

        # Try to user we are not authorized to delegate to.
        open_session_req.configuration['impala.doas.user'] = '******'
        resp = self.hs2_client.OpenSession(open_session_req)
        assert 'User \'hue\' is not authorized to delegate to \'some_user\'' in str(
            resp)

        # Create a new session which does not have a do_as_user.
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Run a simple query, which should succeed.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = resp.sessionHandle
        execute_statement_req.statement = "select 1"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        # Verify the correct user information is in the runtime profile. Since there is
        # no do_as_user the Delegated User field should be empty.
        query_id = operation_id_to_query_id(
            execute_statement_resp.operationHandle.operationId)
        profile_page = self.cluster.impalads[
            0].service.read_query_profile_page(query_id)
        self.__verify_profile_user_fields(profile_page,
                                          effective_user='******',
                                          delegated_user='',
                                          connected_user='******')

        self.socket.close()
        self.socket = None

    def __verify_profile_user_fields(self, profile_str, effective_user,
                                     connected_user, delegated_user):
        """Verifies the given runtime profile string contains the specified values for
    User, Connected User, and Delegated User"""
        assert '\n    User: %s\n' % effective_user in profile_str
        assert '\n    Connected User: %s\n' % connected_user in profile_str
        assert '\n    Delegated User: %s\n' % delegated_user in profile_str

    def __wait_for_audit_record(self, user, impersonator, timeout_secs=30):
        """Waits until an audit log record is found that contains the given user and
    impersonator, or until the timeout is reached.
    """
        # The audit event might not show up immediately (the audit logs are flushed to disk
        # on regular intervals), so poll the audit event logs until a matching record is
        # found.
        start_time = time()
        while time() - start_time < timeout_secs:
            for audit_file_name in os.listdir(self.AUDIT_LOG_DIR):
                if self.__find_matching_audit_record(audit_file_name, user,
                                                     impersonator):
                    return True
            sleep(1)
        return False

    def __find_matching_audit_record(self, audit_file_name, user,
                                     impersonator):
        with open(os.path.join(self.AUDIT_LOG_DIR,
                               audit_file_name)) as audit_log_file:
            for line in audit_log_file.readlines():
                json_dict = json.loads(line)
                if len(json_dict) == 0: continue
                if json_dict[min(json_dict)]['user'] == user and\
                    json_dict[min(json_dict)]['impersonator'] == impersonator:
                    return True
        return False
Beispiel #14
0
class HS2TestSuite(ImpalaTestSuite):
    # This DB will be created/dropped for every HS2TestSuite subclass. Make the name unique
    # so different test suites don't clobber each other's DBs. The [2:] is to remove the
    # "0." from the random floating-point number.
    TEST_DB = 'hs2_db' + str(random.random())[2:]

    HS2_V6_COLUMN_TYPES = [
        'boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val',
        'doubleVal', 'binaryVal'
    ]

    def setup(self):
        self.cleanup_db(self.TEST_DB)
        host, port = IMPALAD_HS2_HOST_PORT.split(":")
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)
        self.client.execute("create database %s" % self.TEST_DB)

    def teardown(self):
        self.cleanup_db(self.TEST_DB)
        if self.socket:
            self.socket.close()

    @staticmethod
    def check_response(
            response,
            expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS,
            expected_error_prefix=None):
        assert response.status.statusCode == expected_status_code
        if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\
           and expected_error_prefix is not None:
            assert response.status.errorMessage.startswith(
                expected_error_prefix)

    def close(self, op_handle):
        close_op_req = TCLIService.TCloseOperationReq()
        close_op_req.operationHandle = op_handle
        close_op_resp = self.hs2_client.CloseOperation(close_op_req)
        assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS

    def fetch(self, handle, orientation, size, expected_num_rows=None):
        """Fetches at most size number of rows from the query identified by the given
    operation handle. Uses the given fetch orientation. Asserts that the fetch returns
    a success status, and that the number of rows returned is equal to size, or
    equal to the given expected_num_rows (it one was given)."""
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = size
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp)
        num_rows = size
        if expected_num_rows is not None:
            num_rows = expected_num_rows
        assert len(fetch_results_resp.results.rows) == num_rows
        return fetch_results_resp

    def fetch_fail(self, handle, orientation, expected_error_prefix):
        """Attempts to fetch rows from the query identified by the given operation handle.
    Asserts that the fetch returns an error with an error message matching the given
    expected_error_prefix."""
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = 100
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp,
                                    TCLIService.TStatusCode.ERROR_STATUS,
                                    expected_error_prefix)
        return fetch_results_resp
Beispiel #15
0
class HS2TestSuite(ImpalaTestSuite):
  TEST_DB = 'hs2_db'

  HS2_V6_COLUMN_TYPES = ['boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val',
                         'doubleVal', 'binaryVal']

  def setup(self):
    self.cleanup_db(self.TEST_DB)
    host, port = IMPALAD_HS2_HOST_PORT.split(":")
    self.socket = TSocket(host, port)
    self.transport = TBufferedTransport(self.socket)
    self.transport.open()
    self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
    self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

  def teardown(self):
    self.cleanup_db(self.TEST_DB)
    if self.socket:
      self.socket.close()

  @staticmethod
  def check_response(response,
                     expected_status_code = TCLIService.TStatusCode.SUCCESS_STATUS,
                     expected_error_prefix = None):
    assert response.status.statusCode == expected_status_code
    if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\
       and expected_error_prefix is not None:
      assert response.status.errorMessage.startswith(expected_error_prefix)

  def close(self, op_handle):
    close_op_req = TCLIService.TCloseOperationReq()
    close_op_req.operationHandle = op_handle
    close_op_resp = self.hs2_client.CloseOperation(close_op_req)
    assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS

  def get_num_rows(self, result_set):
    # rows will always be set, so the only way to tell if we should use it is to see if
    # any columns are set
    if result_set.columns is None or len(result_set.columns) == 0:
      return len(result_set.rows)

    assert result_set.columns is not None
    for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
      typed_col = getattr(result_set.columns[0], col_type)
      if typed_col != None:
        return len(typed_col.values)

    assert False

  def fetch_at_most(self, handle, orientation, size, expected_num_rows = None):
    """Fetches at most size number of rows from the query identified by the given
    operation handle. Uses the given fetch orientation. Asserts that the fetch returns a
    success status, and that the number of rows returned is equal to given
    expected_num_rows (if given). It is only safe for expected_num_rows to be 0 or 1:
    Impala does not guarantee that a larger result set will be returned in one go. Use
    fetch_until() for repeated fetches."""
    assert expected_num_rows is None or expected_num_rows in (0, 1)
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = handle
    fetch_results_req.orientation = orientation
    fetch_results_req.maxRows = size
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp)
    num_rows = size
    if expected_num_rows is not None:
      assert self.get_num_rows(fetch_results_resp.results) == expected_num_rows
    return fetch_results_resp

  def fetch_until(self, handle, orientation, size, expected_num_rows = None):
    """Tries to fetch exactly 'size' rows from the given query handle, with the given
    fetch orientation, by repeatedly issuing fetch(size - num rows already fetched)
    calls. Returns fewer than 'size' rows if either a fetch() returns 0 rows (indicating
    EOS) or 'expected_num_rows' rows are returned. If 'expected_num_rows' is set to None,
    it defaults to 'size', so that the effect is to both ask for and expect the same
    number of rows."""
    assert expected_num_rows is None or (size >= expected_num_rows)
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = handle
    fetch_results_req.orientation = orientation
    fetch_results_req.maxRows = size
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp)
    num_rows_fetched = self.get_num_rows(fetch_results_resp.results)
    if expected_num_rows is None: expected_num_rows = size
    while num_rows_fetched < expected_num_rows:
      # Always try to fetch at most 'size'
      fetch_results_req.maxRows = size - num_rows_fetched
      fetch_results_req.orientation = TCLIService.TFetchOrientation.FETCH_NEXT
      fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
      HS2TestSuite.check_response(fetch_results_resp)
      last_fetch_size = self.get_num_rows(fetch_results_resp.results)
      assert last_fetch_size > 0
      num_rows_fetched += last_fetch_size

    assert num_rows_fetched == expected_num_rows

  def fetch_fail(self, handle, orientation, expected_error_prefix):
    """Attempts to fetch rows from the query identified by the given operation handle.
    Asserts that the fetch returns an error with an error message matching the given
    expected_error_prefix."""
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = handle
    fetch_results_req.orientation = orientation
    fetch_results_req.maxRows = 100
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS,
                                expected_error_prefix)
    return fetch_results_resp

  def result_metadata(self, handle):
    """ Gets the schema for the query identified by the handle """
    req = TCLIService.TGetResultSetMetadataReq()
    req.operationHandle = handle
    resp = self.hs2_client.GetResultSetMetadata(req)
    HS2TestSuite.check_response(resp)
    return resp
Beispiel #16
0
class TestHS2(ImpalaTestSuite):
    def setup(self):
        host, port = IMPALAD_HS2_HOST_PORT.split(":")
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = TCLIService.Client(self.protocol)

    def teardown(self):
        if self.socket:
            self.socket.close()

    @staticmethod
    def check_response(response,
                       expected=TCLIService.TStatusCode.SUCCESS_STATUS):
        assert response.status.statusCode == expected

    def test_open_session(self):
        """Check that a session can be opened"""
        open_session_req = TCLIService.TOpenSessionReq()
        TestHS2.check_response(self.hs2_client.OpenSession(open_session_req))

    def test_close_session(self):
        """Test that an open session can be closed"""
        open_session_req = TCLIService.TOpenSessionReq()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        close_session_req = TCLIService.TCloseSessionReq()
        close_session_req.sessionHandle = resp.sessionHandle
        TestHS2.check_response(self.hs2_client.CloseSession(close_session_req))

    def test_double_close_session(self):
        """Test that an already closed session cannot be closed a second time"""
        open_session_req = TCLIService.TOpenSessionReq()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        close_session_req = TCLIService.TCloseSessionReq()
        close_session_req.sessionHandle = resp.sessionHandle
        TestHS2.check_response(self.hs2_client.CloseSession(close_session_req))

        # Double close should be an error
        TestHS2.check_response(self.hs2_client.CloseSession(close_session_req),
                               TCLIService.TStatusCode.ERROR_STATUS)

    @needs_session
    def test_execute_select(self):
        """Test that a simple select statement works"""
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = execute_statement_resp.operationHandle
        fetch_results_req.maxRows = 100
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        TestHS2.check_response(fetch_results_resp)

        assert len(fetch_results_resp.results.rows) == 1
        assert fetch_results_resp.results.startRowOffset == 0

        try:
            assert not fetch_results_resp.hasMoreRows
        except AssertionError:
            pytest.xfail("IMPALA-558")

    @needs_session
    def test_get_operation_status(self):
        """Tests that GetOperationStatus returns a valid result for a running query"""
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        get_operation_status_req = TCLIService.TGetOperationStatusReq()
        get_operation_status_req.operationHandle = execute_statement_resp.operationHandle

        get_operation_status_resp = \
            self.hs2_client.GetOperationStatus(get_operation_status_req)
        TestHS2.check_response(get_operation_status_resp)

        assert get_operation_status_resp.operationState in \
            [TCLIService.TOperationState.INITIALIZED_STATE,
             TCLIService.TOperationState.RUNNING_STATE,
             TCLIService.TOperationState.FINISHED_STATE]

    @needs_session
    def test_malformed_get_operation_status(self):
        """Tests that a short guid / secret returns an error (regression would be to crash
    impalad)"""
        operation_handle = TCLIService.TOperationHandle()
        operation_handle.operationId = TCLIService.THandleIdentifier()
        operation_handle.operationId.guid = "short"
        operation_handle.operationId.secret = "short_secret"
        assert len(operation_handle.operationId.guid) != 16
        assert len(operation_handle.operationId.secret) != 16
        operation_handle.operationType = TCLIService.TOperationType.EXECUTE_STATEMENT
        operation_handle.hasResultSet = False

        get_operation_status_req = TCLIService.TGetOperationStatusReq()
        get_operation_status_req.operationHandle = operation_handle

        get_operation_status_resp = \
            self.hs2_client.GetOperationStatus(get_operation_status_req)
        TestHS2.check_response(get_operation_status_resp,
                               TCLIService.TStatusCode.ERROR_STATUS)
        err_msg = "(guid size: %d, expected 16, secret size: %d, expected 16)" \
            % (len(operation_handle.operationId.guid),
               len(operation_handle.operationId.secret))
        assert err_msg in get_operation_status_resp.status.errorMessage

    @pytest.mark.execute_serially
    def test_socket_close_forces_session_close(self):
        """Test that closing the underlying socket forces the associated session to close.
    See IMPALA-564"""
        open_session_req = TCLIService.TOpenSessionReq()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)
        num_sessions = self.impalad_test_service.get_metric_value(
            "impala-server.num-open-hiveserver2-sessions")

        assert num_sessions > 0

        self.socket.close()
        self.socket = None
        self.impalad_test_service.wait_for_metric_value(
            "impala-server.num-open-hiveserver2-sessions", num_sessions - 1)

    @pytest.mark.execute_serially
    def test_multiple_sessions(self):
        """Test that multiple sessions on the same socket connection are allowed"""
        num_sessions = self.impalad_test_service.get_metric_value(
            "impala-server.num-open-hiveserver2-sessions")
        session_ids = []
        for _ in xrange(5):
            open_session_req = TCLIService.TOpenSessionReq()
            resp = self.hs2_client.OpenSession(open_session_req)
            TestHS2.check_response(resp)
            # Check that all sessions get different IDs
            assert resp.sessionHandle not in session_ids
            session_ids.append(resp.sessionHandle)

        self.impalad_test_service.wait_for_metric_value(
            "impala-server.num-open-hiveserver2-sessions", num_sessions + 5)

        self.socket.close()
        self.socket = None
        self.impalad_test_service.wait_for_metric_value(
            "impala-server.num-open-hiveserver2-sessions", num_sessions)

    @needs_session
    def test_get_schemas(self):
        get_schemas_req = TCLIService.TGetSchemasReq()
        get_schemas_req.sessionHandle = self.session_handle
        get_schemas_resp = self.hs2_client.GetSchemas(get_schemas_req)
        TestHS2.check_response(get_schemas_resp)
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = get_schemas_resp.operationHandle
        fetch_results_req.maxRows = 100
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        TestHS2.check_response(fetch_results_resp)
        query_id = operation_id_to_query_id(
            get_schemas_resp.operationHandle.operationId)
        profile_page = self.impalad_test_service.read_query_profile_page(
            query_id)

        # Test fix for IMPALA-619
        assert "Sql Statement: GET_SCHEMAS" in profile_page
        assert "Query Type: DDL" in profile_page
class TestAuthorization(CustomClusterTestSuite):
  AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR'))

  def setup(self):
    host, port = IMPALAD_HS2_HOST_PORT.split(":")
    self.socket = TSocket(host, port)
    self.transport = TBufferedTransport(self.socket)
    self.transport.open()
    self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
    self.hs2_client = TCLIService.Client(self.protocol)

  def teardown(self):
    if self.socket:
      self.socket.close()
    shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True)

  @pytest.mark.execute_serially
  @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=/test-warehouse/authz-policy.ini\
      --authorized_proxy_user_config=hue=%s\
      --audit_event_log_dir=%s" % (getuser(), AUDIT_LOG_DIR))
  def test_impersonation(self):
    """End-to-end impersonation + authorization test. Expects authorization to be
    configured before running this test"""
    # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import
    # the module within this test function, rather than as a top-level import. This way
    # the tests in that module will not get pulled when executing this test suite. The fix
    # is to split the utility code out of the TestHS2 class and support HS2 as a first
    # class citizen in our test framework.
    from tests.hs2.test_hs2 import TestHS2
    open_session_req = TCLIService.TOpenSessionReq()
    open_session_req.username = '******'
    open_session_req.configuration = dict()
    open_session_req.configuration['impala.doas.user'] = getuser()
    resp = self.hs2_client.OpenSession(open_session_req)
    TestHS2.check_response(resp)

    # Try to query a table we are not authorized to access.
    self.session_handle = resp.sessionHandle
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = "describe tpch_seq.lineitem"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    assert 'User \'%s\' does not have privileges to access' % getuser() in\
        str(execute_statement_resp)

    assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\
        'No matching audit event recorded in time window'

    # Now try the same operation on a table we are authorized to access.
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = "describe tpch.lineitem"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)

    TestHS2.check_response(execute_statement_resp)

    # Try to impersonate as a user we are not authorized to impersonate.
    open_session_req.configuration['impala.doas.user'] = '******'
    resp = self.hs2_client.OpenSession(open_session_req)
    assert 'User \'hue\' is not authorized to impersonate \'some_user\'' in str(resp)

    self.socket.close()
    self.socket = None

  def __wait_for_audit_record(self, user, impersonator, timeout_secs=30):
    """Waits until an audit log record is found that contains the given user and
    impersonator, or until the timeout is reached.
    """
    # The audit event might not show up immediately (the audit logs are flushed to disk
    # on regular intervals), so poll the audit event logs until a matching record is
    # found.
    start_time = time()
    while time() - start_time < timeout_secs:
      for audit_file_name in os.listdir(self.AUDIT_LOG_DIR):
        if self.__find_matching_audit_record(audit_file_name, user, impersonator):
          return True
      sleep(1)
    return False

  def __find_matching_audit_record(self, audit_file_name, user, impersonator):
    with open(os.path.join(self.AUDIT_LOG_DIR, audit_file_name)) as audit_log_file:
      for line in audit_log_file.readlines():
          json_dict = json.loads(line)
          if len(json_dict) == 0: continue
          if json_dict[min(json_dict)]['user'] == user and\
              json_dict[min(json_dict)]['impersonator'] == impersonator:
            return True
    return False
Beispiel #18
0
class TestAuthorization(CustomClusterTestSuite):
    def setup(self):
        host, port = (self.cluster.impalads[0].service.hostname,
                      self.cluster.impalads[0].service.hs2_port)
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

    def teardown(self):
        if self.socket:
            self.socket.close()

    def __execute_hs2_stmt(self, statement, verify=True):
        """
    Executes an hs2 statement

    :param statement: the statement to execute
    :param verify: If set to true, will thrown an exception on a failed hs2 execution
    :return: the result of execution
    """
        from tests.hs2.test_hs2 import TestHS2
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = statement
        result = self.hs2_client.ExecuteStatement(execute_statement_req)
        if verify:
            TestHS2.check_response(result)
        return result

    def __open_hs2(self, user, configuration, verify=True):
        """
    Open a session with hs2

    :param user: the user to open the session
    :param configuration: the configuration for the session
    :param verify: If set to true, will thrown an exception on failed session open
    :return: the result of opening the session
    """
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_req.username = user
        open_session_req.configuration = configuration
        resp = self.hs2_client.OpenSession(open_session_req)
        if verify:
            TestHS2.check_response(resp)
        return resp

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=ignored_file",
                                      impala_log_dir=tempfile.mkdtemp(
                                          prefix="test_deprecated_",
                                          dir=os.getenv("LOG_DIR")))
    def test_deprecated_flags(self):
        assert_file_in_dir_contains(
            self.impala_log_dir, "Ignoring removed flag "
            "authorization_policy_file")

    @staticmethod
    def _verify_show_dbs(result,
                         unique_name,
                         visibility_privileges=PRIVILEGES):
        """ Helper function for verifying the results of SHOW DATABASES below.
    Only show databases with privileges implying any of the visibility_privileges.
    """
        for priv in PRIVILEGES:
            # Result lines are in the format of "db_name\tdb_comment"
            db_name = 'db_%s_%s\t' % (unique_name, priv)
            if priv != 'all' and priv not in visibility_privileges:
                assert db_name not in result.data
            else:
                assert db_name in result.data

    def _test_ranger_show_stmts_helper(self, unique_name,
                                       visibility_privileges):
        unique_db = unique_name + "_db"
        admin_client = self.create_impala_client()
        try:
            admin_client.execute("drop database if exists %s cascade" %
                                 unique_db,
                                 user=ADMIN)
            admin_client.execute("create database %s" % unique_db, user=ADMIN)
            for priv in PRIVILEGES:
                admin_client.execute("create database db_%s_%s" %
                                     (unique_name, priv))
                admin_client.execute(
                    "grant {0} on database db_{1}_{2} to user {3}".format(
                        priv, unique_name, priv, getuser()))
                admin_client.execute("create table %s.tbl_%s (i int)" %
                                     (unique_db, priv))
                admin_client.execute(
                    "grant {0} on table {1}.tbl_{2} to user {3}".format(
                        priv, unique_db, priv, getuser()))

            # Admin can still see all the databases and tables
            result = admin_client.execute("show databases")
            TestAuthorization._verify_show_dbs(result, unique_name)
            result = admin_client.execute("show tables in %s" % unique_db)
            assert result.data == ["tbl_%s" % p for p in PRIVILEGES]

            # Check SHOW DATABASES and SHOW TABLES using another username
            result = self.client.execute("show databases")
            TestAuthorization._verify_show_dbs(result, unique_name,
                                               visibility_privileges)
            result = self.client.execute("show tables in %s" % unique_db)
            # Only show tables with privileges implying any of the visibility privileges
            assert 'tbl_all' in result.data  # ALL can imply to any privilege
            for p in visibility_privileges:
                assert 'tbl_%s' % p in result.data
        finally:
            admin_client.execute("drop database if exists %s cascade" %
                                 unique_db)
            for priv in PRIVILEGES:
                admin_client.execute(
                    "drop database if exists db_%s_%s cascade" %
                    (unique_name, priv))

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger "
        "--min_privilege_set_for_show_stmts=select",
        catalogd_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger")
    def test_ranger_show_stmts_with_select(self, unique_name):
        self._test_ranger_show_stmts_helper(unique_name, ['select'])

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger "
        "--min_privilege_set_for_show_stmts=select,insert",
        catalogd_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger")
    def test_ranger_show_stmts_with_select_insert(self, unique_name):
        self._test_ranger_show_stmts_helper(unique_name, ['select', 'insert'])

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger "
        "--min_privilege_set_for_show_stmts=any",
        catalogd_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger")
    def test_ranger_show_stmts_with_any(self, unique_name):
        self._test_ranger_show_stmts_helper(unique_name, PRIVILEGES)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger "
        "--num_check_authorization_threads=%d" % (random.randint(2, 128)),
        catalogd_args="--server-name=server1 --ranger_service_type=hive "
        "--ranger_app_id=impala --authorization_provider=ranger")
    def test_num_check_authorization_threads_with_ranger(self, unique_name):
        self._test_ranger_show_stmts_helper(unique_name, PRIVILEGES)
Beispiel #19
0
class HS2TestSuite(ImpalaTestSuite):
  # This DB will be created/dropped for every HS2TestSuite subclass. Make the name unique
  # so different test suites don't clobber each other's DBs. The [2:] is to remove the
  # "0." from the random floating-point number.
  TEST_DB = 'hs2_db' + str(random.random())[2:]

  HS2_V6_COLUMN_TYPES = ['boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val',
                         'doubleVal', 'binaryVal']

  def setup(self):
    self.cleanup_db(self.TEST_DB)
    host, port = IMPALAD_HS2_HOST_PORT.split(":")
    self.socket = TSocket(host, port)
    self.transport = TBufferedTransport(self.socket)
    self.transport.open()
    self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
    self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)
    self.client.execute("create database %s" % self.TEST_DB)

  def teardown(self):
    self.cleanup_db(self.TEST_DB)
    if self.socket:
      self.socket.close()

  @staticmethod
  def check_response(response,
                     expected_status_code = TCLIService.TStatusCode.SUCCESS_STATUS,
                     expected_error_prefix = None):
    assert response.status.statusCode == expected_status_code
    if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\
       and expected_error_prefix is not None:
      assert response.status.errorMessage.startswith(expected_error_prefix)

  def close(self, op_handle):
    close_op_req = TCLIService.TCloseOperationReq()
    close_op_req.operationHandle = op_handle
    close_op_resp = self.hs2_client.CloseOperation(close_op_req)
    assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS

  def fetch(self, handle, orientation, size, expected_num_rows = None):
    """Fetches at most size number of rows from the query identified by the given
    operation handle. Uses the given fetch orientation. Asserts that the fetch returns
    a success status, and that the number of rows returned is equal to size, or
    equal to the given expected_num_rows (it one was given)."""
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = handle
    fetch_results_req.orientation = orientation
    fetch_results_req.maxRows = size
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp)
    num_rows = size
    if expected_num_rows is not None:
      num_rows = expected_num_rows
    assert len(fetch_results_resp.results.rows) == num_rows
    return fetch_results_resp

  def fetch_fail(self, handle, orientation, expected_error_prefix):
    """Attempts to fetch rows from the query identified by the given operation handle.
    Asserts that the fetch returns an error with an error message matching the given
    expected_error_prefix."""
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = handle
    fetch_results_req.orientation = orientation
    fetch_results_req.maxRows = 100
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS,
                                expected_error_prefix)
    return fetch_results_resp
Beispiel #20
0
class HS2TestSuite(ImpalaTestSuite):
    TEST_DB = 'hs2_db'

    HS2_V6_COLUMN_TYPES = [
        'boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val',
        'doubleVal', 'binaryVal'
    ]

    def setup(self):
        self.cleanup_db(self.TEST_DB)
        host, port = IMPALAD_HS2_HOST_PORT.split(":")
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

    def teardown(self):
        self.cleanup_db(self.TEST_DB)
        if self.socket:
            self.socket.close()

    @staticmethod
    def check_response(
            response,
            expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS,
            expected_error_prefix=None):
        assert response.status.statusCode == expected_status_code
        if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\
           and expected_error_prefix is not None:
            assert response.status.errorMessage.startswith(
                expected_error_prefix)

    def close(self, op_handle):
        close_op_req = TCLIService.TCloseOperationReq()
        close_op_req.operationHandle = op_handle
        close_op_resp = self.hs2_client.CloseOperation(close_op_req)
        assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS

    def get_num_rows(self, result_set):
        # rows will always be set, so the only way to tell if we should use it is to see if
        # any columns are set
        if result_set.columns is None or len(result_set.columns) == 0:
            return len(result_set.rows)

        assert result_set.columns is not None
        for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
            typed_col = getattr(result_set.columns[0], col_type)
            if typed_col != None:
                return len(typed_col.values)

        assert False

    def fetch_at_most(self, handle, orientation, size, expected_num_rows=None):
        """Fetches at most size number of rows from the query identified by the given
    operation handle. Uses the given fetch orientation. Asserts that the fetch returns a
    success status, and that the number of rows returned is equal to given
    expected_num_rows (if given). It is only safe for expected_num_rows to be 0 or 1:
    Impala does not guarantee that a larger result set will be returned in one go. Use
    fetch_until() for repeated fetches."""
        assert expected_num_rows is None or expected_num_rows in (0, 1)
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = size
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp)
        num_rows = size
        if expected_num_rows is not None:
            assert self.get_num_rows(
                fetch_results_resp.results) == expected_num_rows
        return fetch_results_resp

    def fetch_until(self, handle, orientation, size, expected_num_rows=None):
        """Tries to fetch exactly 'size' rows from the given query handle, with the given
    fetch orientation, by repeatedly issuing fetch(size - num rows already fetched)
    calls. Returns fewer than 'size' rows if either a fetch() returns 0 rows (indicating
    EOS) or 'expected_num_rows' rows are returned. If 'expected_num_rows' is set to None,
    it defaults to 'size', so that the effect is to both ask for and expect the same
    number of rows."""
        assert expected_num_rows is None or (size >= expected_num_rows)
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = size
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp)
        num_rows_fetched = self.get_num_rows(fetch_results_resp.results)
        if expected_num_rows is None: expected_num_rows = size
        while num_rows_fetched < expected_num_rows:
            # Always try to fetch at most 'size'
            fetch_results_req.maxRows = size - num_rows_fetched
            fetch_results_req.orientation = TCLIService.TFetchOrientation.FETCH_NEXT
            fetch_results_resp = self.hs2_client.FetchResults(
                fetch_results_req)
            HS2TestSuite.check_response(fetch_results_resp)
            last_fetch_size = self.get_num_rows(fetch_results_resp.results)
            assert last_fetch_size > 0
            num_rows_fetched += last_fetch_size

        assert num_rows_fetched == expected_num_rows

    def fetch_fail(self, handle, orientation, expected_error_prefix):
        """Attempts to fetch rows from the query identified by the given operation handle.
    Asserts that the fetch returns an error with an error message matching the given
    expected_error_prefix."""
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = 100
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp,
                                    TCLIService.TStatusCode.ERROR_STATUS,
                                    expected_error_prefix)
        return fetch_results_resp

    def result_metadata(self, handle):
        """ Gets the schema for the query identified by the handle """
        req = TCLIService.TGetResultSetMetadataReq()
        req.operationHandle = handle
        resp = self.hs2_client.GetResultSetMetadata(req)
        HS2TestSuite.check_response(resp)
        return resp
Beispiel #21
0
class TestAuthorization(CustomClusterTestSuite):
    AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR'))

    def setup(self):
        host, port = (self.cluster.impalads[0].service.hostname,
                      self.cluster.impalads[0].service.hs2_port)
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

    def teardown(self):
        if self.socket:
            self.socket.close()
        shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=%s\
      --authorization_policy_provider_class=%s"                                                  %\
        (AUTH_POLICY_FILE,
         "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider"))
    def test_custom_authorization_provider(self):
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        # User is 'test_user' (defined in the authorization policy file)
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Try to query a table we are not authorized to access.
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch_seq.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        assert 'User \'%s\' does not have privileges to access' % 'test_user' in\
            str(execute_statement_resp)

        # Now try the same operation on a table we are authorized to access.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=%s\
      --authorized_proxy_user_config=hue=%s" % (AUTH_POLICY_FILE, getuser()))
    def test_access_runtime_profile(self):
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_req.username = getuser()
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Current user can't access view's underlying tables
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "explain select * from functional.complex_view"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        assert 'User \'%s\' does not have privileges to EXPLAIN' % getuser() in\
            str(execute_statement_resp)
        # User should not have access to the runtime profile
        self.__run_stmt_and_verify_profile_access(
            "select * from functional.complex_view", False, False)
        self.__run_stmt_and_verify_profile_access(
            "select * from functional.complex_view", False, True)

        # Repeat as a delegated user
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        # Delegated user is the current user
        open_session_req.configuration['impala.doas.user'] = getuser()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)
        self.session_handle = resp.sessionHandle
        # User should not have access to the runtime profile
        self.__run_stmt_and_verify_profile_access(
            "select * from functional.complex_view", False, False)
        self.__run_stmt_and_verify_profile_access(
            "select * from functional.complex_view", False, True)

        # Create a view for which the user has access to the underlying tables.
        open_session_req.username = getuser()
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = """create view if not exists tpch.customer_view as
        select * from tpch.customer limit 1"""
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        # User should be able to run EXPLAIN
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = """explain select * from tpch.customer_view"""
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        # User should have access to the runtime profile and exec summary
        self.__run_stmt_and_verify_profile_access(
            "select * from tpch.customer_view", True, False)
        self.__run_stmt_and_verify_profile_access(
            "select * from tpch.customer_view", True, True)

        # Repeat as a delegated user
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        # Delegated user is the current user
        open_session_req.configuration['impala.doas.user'] = getuser()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)
        self.session_handle = resp.sessionHandle
        # User should have access to the runtime profile and exec summary
        self.__run_stmt_and_verify_profile_access(
            "select * from tpch.customer_view", True, False)
        self.__run_stmt_and_verify_profile_access(
            "select * from tpch.customer_view", True, True)

        # Clean up
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "drop view if exists tpch.customer_view"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=%s\
      --authorized_proxy_user_config=foo=bar;hue=%s\
      --abort_on_failed_audit_event=false\
      --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, getuser(), AUDIT_LOG_DIR))
    def test_user_impersonation(self):
        """End-to-end user impersonation + authorization test"""
        self.__test_impersonation()

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        "--server_name=server1\
        --authorization_policy_file=%s\
        --authorized_proxy_user_config=hue=bar\
        --authorized_proxy_group_config=foo=bar;hue=%s\
        --abort_on_failed_audit_event=false\
        --audit_event_log_dir=%s" %
        (AUTH_POLICY_FILE, ','.join(get_groups()), AUDIT_LOG_DIR))
    def test_group_impersonation(self):
        """End-to-end group impersonation + authorization test"""
        self.__test_impersonation()

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
        --authorization_policy_file=%s\
        --authorized_proxy_user_config=foo=bar\
        --authorized_proxy_group_config=foo=bar\
        --abort_on_failed_audit_event=false\
        --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, AUDIT_LOG_DIR))
    def test_no_matching_user_and_group_impersonation(self):
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        open_session_req.configuration['impala.doas.user'] = '******'
        resp = self.hs2_client.OpenSession(open_session_req)
        assert 'User \'hue\' is not authorized to delegate to \'abc\'' in str(
            resp)

    def __test_impersonation(self):
        """End-to-end impersonation + authorization test. Expects authorization to be
    configured before running this test"""
        # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import
        # the module within this test function, rather than as a top-level import. This way
        # the tests in that module will not get pulled when executing this test suite. The fix
        # is to split the utility code out of the TestHS2 class and support HS2 as a first
        # class citizen in our test framework.
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        # Connected user is 'hue'
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        # Delegated user is the current user
        open_session_req.configuration['impala.doas.user'] = getuser()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Try to query a table we are not authorized to access.
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch_seq.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        assert 'User \'%s\' does not have privileges to access' % getuser() in\
            str(execute_statement_resp)

        assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\
            'No matching audit event recorded in time window'

        # Now try the same operation on a table we are authorized to access.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)

        TestHS2.check_response(execute_statement_resp)

        # Verify the correct user information is in the runtime profile
        query_id = operation_id_to_query_id(
            execute_statement_resp.operationHandle.operationId)
        profile_page = self.cluster.impalads[
            0].service.read_query_profile_page(query_id)
        self.__verify_profile_user_fields(profile_page,
                                          effective_user=getuser(),
                                          delegated_user=getuser(),
                                          connected_user='******')

        # Try to user we are not authorized to delegate to.
        open_session_req.configuration['impala.doas.user'] = '******'
        resp = self.hs2_client.OpenSession(open_session_req)
        assert 'User \'hue\' is not authorized to delegate to \'some_user\'' in str(
            resp)

        # Create a new session which does not have a do_as_user.
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Run a simple query, which should succeed.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = resp.sessionHandle
        execute_statement_req.statement = "select 1"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        # Verify the correct user information is in the runtime profile. Since there is
        # no do_as_user the Delegated User field should be empty.
        query_id = operation_id_to_query_id(
            execute_statement_resp.operationHandle.operationId)

        profile_page = self.cluster.impalads[
            0].service.read_query_profile_page(query_id)
        self.__verify_profile_user_fields(profile_page,
                                          effective_user='******',
                                          delegated_user='',
                                          connected_user='******')

        self.socket.close()
        self.socket = None

    def __verify_profile_user_fields(self, profile_str, effective_user,
                                     connected_user, delegated_user):
        """Verifies the given runtime profile string contains the specified values for
    User, Connected User, and Delegated User"""
        assert '\n    User: %s\n' % effective_user in profile_str
        assert '\n    Connected User: %s\n' % connected_user in profile_str
        assert '\n    Delegated User: %s\n' % delegated_user in profile_str

    def __wait_for_audit_record(self, user, impersonator, timeout_secs=30):
        """Waits until an audit log record is found that contains the given user and
    impersonator, or until the timeout is reached.
    """
        # The audit event might not show up immediately (the audit logs are flushed to disk
        # on regular intervals), so poll the audit event logs until a matching record is
        # found.
        start_time = time()
        while time() - start_time < timeout_secs:
            for audit_file_name in os.listdir(self.AUDIT_LOG_DIR):
                if self.__find_matching_audit_record(audit_file_name, user,
                                                     impersonator):
                    return True
            sleep(1)
        return False

    def __find_matching_audit_record(self, audit_file_name, user,
                                     impersonator):
        with open(os.path.join(self.AUDIT_LOG_DIR,
                               audit_file_name)) as audit_log_file:
            for line in audit_log_file.readlines():
                json_dict = json.loads(line)
                if len(json_dict) == 0: continue
                if json_dict[min(json_dict)]['user'] == user and\
                    json_dict[min(json_dict)]['impersonator'] == impersonator:
                    return True
        return False

    def __run_stmt_and_verify_profile_access(self, stmt, has_access,
                                             close_operation):
        """Runs 'stmt' and retrieves the runtime profile and exec summary. If
      'has_access' is true, it verifies that no runtime profile or exec summary are
      returned. If 'close_operation' is true, make sure the operation is closed before
      retrieving the profile and exec summary."""
        from tests.hs2.test_hs2 import TestHS2
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = stmt
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        if close_operation:
            close_operation_req = TCLIService.TCloseOperationReq()
            close_operation_req.operationHandle = execute_statement_resp.operationHandle
            TestHS2.check_response(
                self.hs2_client.CloseOperation(close_operation_req))

        get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq()
        get_profile_req.operationHandle = execute_statement_resp.operationHandle
        get_profile_req.sessionHandle = self.session_handle
        get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req)

        if has_access:
            TestHS2.check_response(get_profile_resp)
            assert "Plan: " in get_profile_resp.profile
        else:
            assert "User %s is not authorized to access the runtime profile or "\
                "execution summary." % (getuser()) in str(get_profile_resp)

        exec_summary_req = ImpalaHiveServer2Service.TGetExecSummaryReq()
        exec_summary_req.operationHandle = execute_statement_resp.operationHandle
        exec_summary_req.sessionHandle = self.session_handle
        exec_summary_resp = self.hs2_client.GetExecSummary(exec_summary_req)

        if has_access:
            TestHS2.check_response(exec_summary_resp)
        else:
            assert "User %s is not authorized to access the runtime profile or "\
                "execution summary." % (getuser()) in str(exec_summary_resp)
Beispiel #22
0
class HS2TestSuite(ImpalaTestSuite):
  HS2_V6_COLUMN_TYPES = ['boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val',
                         'doubleVal', 'binaryVal']

  def setup(self):
    host, port = IMPALAD_HS2_HOST_PORT.split(":")
    self.socket = TSocket(host, port)
    self.transport = TBufferedTransport(self.socket)
    self.transport.open()
    self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
    self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

  def teardown(self):
    if self.socket:
      self.socket.close()

  @staticmethod
  def check_response(response,
                     expected_status_code = TCLIService.TStatusCode.SUCCESS_STATUS,
                     expected_error_prefix = None):
    assert response.status.statusCode == expected_status_code
    if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\
       and expected_error_prefix is not None:
      assert response.status.errorMessage.startswith(expected_error_prefix)

  @staticmethod
  def check_invalid_session(response):
    """Checks that the HS2 API response is the correct response if the session is invalid,
    i.e. the session doesn't exist or the secret is invalid."""
    HS2TestSuite.check_response(response, TCLIService.TStatusCode.ERROR_STATUS,
                                "Invalid session id:")

  @staticmethod
  def check_invalid_query(response, expect_legacy_err=False):
    """Checks that the HS2 API response is the correct response if the query is invalid,
    i.e. the query doesn't exist, doesn't match the session provided, or the secret is
    invalid. """
    if expect_legacy_err:
      # Some operations return non-standard errors like "Query id ... not found".
      expected_err = "Query id"
    else:
      # We should standardise on this error message.
      expected_err = "Invalid query handle:"
    HS2TestSuite.check_response(response, TCLIService.TStatusCode.ERROR_STATUS,
                                expected_err)

  @staticmethod
  def check_profile_access_denied(response, user):
    """Checks that the HS2 API response is the correct response if the user is not
    authorised to access the query's profile."""
    HS2TestSuite.check_response(response, TCLIService.TStatusCode.ERROR_STATUS,
                                "User {0} is not authorized to access the runtime "
                                "profile or execution summary".format(user))

  def close(self, op_handle):
    close_op_req = TCLIService.TCloseOperationReq()
    close_op_req.operationHandle = op_handle
    close_op_resp = self.hs2_client.CloseOperation(close_op_req)
    assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS

  @staticmethod
  def get_num_rows(result_set):
    # rows will always be set, so the only way to tell if we should use it is to see if
    # any columns are set
    if result_set.columns is None or len(result_set.columns) == 0:
      return len(result_set.rows)

    assert result_set.columns is not None
    for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
      typed_col = getattr(result_set.columns[0], col_type)
      if typed_col != None:
        return len(typed_col.values)

    assert False

  def fetch(self, fetch_results_req):
    """Wrapper around ImpalaHiveServer2Service.FetchResults(fetch_results_req) that
    issues the given fetch request until the TCLIService.TStatusCode transitions from
    STILL_EXECUTING_STATUS to SUCCESS_STATUS. If a fetch response contains the
    STILL_EXECUTING_STATUS then rows are not yet available for consumption (e.g. the
    query is still running and has not produced any rows yet). This status may be
    returned to the client if the FETCH_ROWS_TIMEOUT_MS is hit."""
    fetch_results_resp = None
    while fetch_results_resp is None or \
        fetch_results_resp.status.statusCode == \
          TCLIService.TStatusCode.STILL_EXECUTING_STATUS:
      fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp)
    return fetch_results_resp

  def fetch_at_most(self, handle, orientation, size, expected_num_rows = None):
    """Fetches at most size number of rows from the query identified by the given
    operation handle. Uses the given fetch orientation. Asserts that the fetch returns a
    success status, and that the number of rows returned is equal to given
    expected_num_rows (if given). It is only safe for expected_num_rows to be 0 or 1:
    Impala does not guarantee that a larger result set will be returned in one go. Use
    fetch_until() for repeated fetches."""
    assert expected_num_rows is None or expected_num_rows in (0, 1)
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = handle
    fetch_results_req.orientation = orientation
    fetch_results_req.maxRows = size
    fetch_results_resp = self.fetch(fetch_results_req)
    if expected_num_rows is not None:
      assert self.get_num_rows(fetch_results_resp.results) == expected_num_rows
    return fetch_results_resp

  def fetch_until(self, handle, orientation, size, expected_num_rows = None):
    """Tries to fetch exactly 'size' rows from the given query handle, with the given
    fetch orientation, by repeatedly issuing fetch(size - num rows already fetched)
    calls. Returns fewer than 'size' rows if either a fetch() returns 0 rows (indicating
    EOS) or 'expected_num_rows' rows are returned. If 'expected_num_rows' is set to None,
    it defaults to 'size', so that the effect is to both ask for and expect the same
    number of rows."""
    assert expected_num_rows is None or (size >= expected_num_rows)
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = handle
    fetch_results_req.orientation = orientation
    fetch_results_req.maxRows = size
    fetch_results_resp = self.fetch(fetch_results_req)
    num_rows_fetched = self.get_num_rows(fetch_results_resp.results)
    if expected_num_rows is None: expected_num_rows = size
    while num_rows_fetched < expected_num_rows:
      # Always try to fetch at most 'size'
      fetch_results_req.maxRows = size - num_rows_fetched
      fetch_results_req.orientation = TCLIService.TFetchOrientation.FETCH_NEXT
      fetch_results_resp = self.fetch(fetch_results_req)
      last_fetch_size = self.get_num_rows(fetch_results_resp.results)
      assert last_fetch_size > 0
      num_rows_fetched += last_fetch_size

    assert num_rows_fetched == expected_num_rows

  def fetch_fail(self, handle, orientation, expected_error_prefix):
    """Attempts to fetch rows from the query identified by the given operation handle.
    Asserts that the fetch returns an error with an error message matching the given
    expected_error_prefix."""
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = handle
    fetch_results_req.orientation = orientation
    fetch_results_req.maxRows = 100
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS,
                                expected_error_prefix)
    return fetch_results_resp

  def result_metadata(self, handle):
    """ Gets the schema for the query identified by the handle """
    req = TCLIService.TGetResultSetMetadataReq()
    req.operationHandle = handle
    resp = self.hs2_client.GetResultSetMetadata(req)
    HS2TestSuite.check_response(resp)
    return resp

  def column_results_to_string(self, columns):
    """Quick-and-dirty way to get a readable string to compare the output of a
    columnar-oriented query to its expected output"""
    formatted = ""
    num_rows = 0
    # Determine the number of rows by finding the type of the first column
    for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
      typed_col = getattr(columns[0], col_type)
      if typed_col != None:
        num_rows = len(typed_col.values)
        break

    for i in xrange(num_rows):
      row = []
      for c in columns:
        for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
          typed_col = getattr(c, col_type)
          if typed_col != None:
            indicator = ord(typed_col.nulls[i / 8])
            if indicator & (1 << (i % 8)):
              row.append("NULL")
            else:
              row.append(str(typed_col.values[i]))
            break
      formatted += (", ".join(row) + "\n")
    return (num_rows, formatted)

  def get_operation_status(self, operation_handle):
    """Executes GetOperationStatus with the given operation handle and returns the
    TGetOperationStatusResp"""
    get_operation_status_req = TCLIService.TGetOperationStatusReq()
    get_operation_status_req.operationHandle = operation_handle
    get_operation_status_resp = \
        self.hs2_client.GetOperationStatus(get_operation_status_req)
    return get_operation_status_resp

  def wait_for_operation_state(self, operation_handle, expected_state, \
                               timeout = 10, interval = 1):
    """Waits for the operation to reach expected_state by polling GetOperationStatus every
    interval seconds, returning the TGetOperationStatusResp, or raising an assertion after
    timeout seconds."""
    start_time = time()
    while (time() - start_time < timeout):
      get_operation_status_resp = self.get_operation_status(operation_handle)
      HS2TestSuite.check_response(get_operation_status_resp)
      if get_operation_status_resp.operationState is expected_state:
        return get_operation_status_resp
      sleep(interval)
    assert False, 'Did not reach expected operation state %s in time, actual state was ' \
        '%s' % (expected_state, get_operation_status_resp.operationState)

  def wait_for_admission_control(self, operation_handle, timeout = 10):
    """Waits for the admission control processing of the query to complete by polling
      GetOperationStatus every interval seconds, returning the TGetOperationStatusResp,
      or raising an assertion after timeout seconds."""
    start_time = time()
    while (time() - start_time < timeout):
      get_operation_status_resp = self.get_operation_status(operation_handle)
      HS2TestSuite.check_response(get_operation_status_resp)
      if TCLIService.TOperationState.INITIALIZED_STATE < \
          get_operation_status_resp.operationState < \
          TCLIService.TOperationState.PENDING_STATE:
        return get_operation_status_resp
      sleep(0.05)
    assert False, 'Did not complete admission control processing in time, current ' \
        'operation state of query: %s' % (get_operation_status_resp.operationState)

  def execute_statement(self, statement, conf_overlay=None,
                        expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS,
                        expected_error_prefix=None):
    """Executes statement and checks if the response meets the expectations.
    If so, it returns the response."""
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = statement
    if conf_overlay:
      execute_statement_req.confOverlay = conf_overlay
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp, expected_status_code,
                                expected_error_prefix)
    return execute_statement_resp
Beispiel #23
0
class ClientEx:

    def __init__(self, address):
        self.tr = TSocket(address[0], address[1])
        self.protocol = TBinaryProtocol(self.tr)
        self.client = Client(self.protocol)
        self.tr.open()

    def close(self):
        self.tr.close()

    def wallet_balance_get(self, pub_key_bytes):
        return self.client.WalletBalanceGet(pub_key_bytes)

    def __fee(self, value):
        sign = 0
        if value < 0.0:
            value = 1
        value = abs(value)
        expf = 0
        if value != 0.0:
            expf = math.log10(value)
        if expf >= 0:
            expf = expf + .5
        else:
            expf = expf - .5
        expi = int(expf)
        value /= math.pow(10, expi)
        if value >= 1.0:
            value *= 0.1
            expi = expi + 1
        exp = expi + 18
        if exp < 0 or exp > 28:
            print('exponent value {0} out of range [0, 28]'.format(exp))
            return -1
        frac = round(value * 1024)
        return sign * 32768 + exp * 1024 + frac

    def transfer_coins(self, integral, fraction, fee, keys):
        res = self.client.TransactionFlow(self.create_transaction(integral, fraction, fee, keys))
        print(res)

    def create_transaction(self, integral, fraction, fee, keys):
        tr = Transaction()
        tr.id = self.client.WalletTransactionsCountGet(keys.public_key_bytes).lastTransactionInnerId + 1
        tr.source = keys.public_key_bytes
        tr.target = keys.target_public_key_bytes
        tr.amount = Amount()
        tr.amount.integral = integral
        tr.amount.fraction = fraction
        tr.currency = 1

        tr.fee = AmountCommission()
        tr.fee.commission = self.__fee(fee)

        serial_transaction = pack('=6s32s32slqhbb',                       # '=' - without alignment'
                                  bytearray(tr.id.to_bytes(6, 'little')), # 6s - 6 byte InnerID (char[] C Type)
                                  tr.source,                              # 32s - 32 byte source public key (char[] C Type)
                                  tr.target,                              # 32s - 32 byte target pyblic key (char[] C Type)
                                  tr.amount.integral,                     # i - 4 byte integer(int C Type)
                                  tr.amount.fraction,                     # q - 8 byte integer(long long C Type)
                                  tr.fee.commission,                      # h - 2 byte integer (short C Type)
                                  tr.currency,                            # b - 1 byte integer (signed char C Type)
                                  0                                       # b - 1 byte userfield_num
        )

        signing_key = ed25519.SigningKey(keys.private_key_bytes)
        sign = signing_key.sign(serial_transaction)
        tr.signature = sign

        return tr

    def deploy_smart_contract(self, code, fee, keys):
        res = self.client.TransactionFlow(self.create_transaction_with_smart_contract(code, fee, keys))
        print(res)

    def create_transaction_with_smart_contract(self, code, fee, keys):

        if code == "":
            code = 'import com.credits.scapi.annotations.*; import com.credits.scapi.v0.*; public class ' \
                   'MySmartContract extends SmartContract { public MySmartContract() {} public String hello2(String ' \
                   'say) { return \"Hello\" + say; } }';

        tr = Transaction()
        tr.id = self.client.WalletTransactionsCountGet(keys.public_key_bytes).lastTransactionInnerId + 1
        tr.source = keys.public_key_bytes
        tr.target = keys.target_public_key_bytes
        tr.amount = Amount()
        tr.amount.integral = 0
        tr.amount.fraction = 0
        tr.currency = 1

        tr.fee = AmountCommission()
        tr.fee.commission = self.__fee(fee)

        serial_transaction = pack('=6s32s32slqhbb',                       # '=' - without alignment'
                                  bytearray(tr.id.to_bytes(6, 'little')), # 6s - 6 byte InnerID (char[] C Type)
                                  tr.source,                              # 32s - 32 byte source public key (char[] C Type)
                                  tr.target,                              # 32s - 32 byte target pyblic key (char[] C Type)
                                  tr.amount.integral,                     # i - 4 byte integer(int C Type)
                                  tr.amount.fraction,                     # q - 8 byte integer(long long C Type)
                                  tr.fee.commission,                      # h - 2 byte integer (short C Type)
                                  tr.currency,                            # b - 1 byte integer (signed char C Type)
                                  1                                       # b - 1 byte userfield_num
        )

        target = pack('=6s', bytearray(tr.id.to_bytes(6, 'little')))
        byte_code = self.client.SmartContractCompile(code)
        if byte_code.status.code == 0:
            for bco in byte_code.byteCodeObjects:
                target = target + bco.byteCode
        else:
            print(byte_code.Status.Message)
            return 'compile error'

        tr.smartContract = SmartContractInvocation()
        tr.smartContract.smartContractDeploy = SmartContractDeploy()
        tr.smartContract.smartContractDeploy.sourceCode = code

        tr.smartContract.ForgetNewState = False
        tr.target = hashlib.blake2s(target).hexdigest()

        uf = bytearray(b'\x11\x00\x01\x00\x00\x00\x00\x015\x00\x02\x12\x00\x00\x00\x00\x15\x00\x03\x11\x00\x00\x00\x00\x02\x00\x04\x00\x12\x00\x05\x11\x00\x01')

        uf = uf + pack('=6s', self.reverse(len(code)))
        uf = uf + bytearray(code.encode())
        uf = uf + bytearray(b'\x15\x00\x02\x12')
        uf = uf + self.reverse(len(byte_code.byteCodeObjects))

        for bco in byte_code.byteCodeObjects:
            uf = uf + b'1101'
            uf = uf + self.reverse(len(bco.name))
            uf = uf + bytearray(bco.name.encode())
            uf = uf + b'1102'
            uf = uf + self.reverse(len(bco.byteCode))
            uf = uf + bco.byteCode

            nbco = ByteCodeObject()
            nbco.name = bco.name
            nbco.byteCode = bco.byteCode

            tr.smartContract.smartContractDeploy.byteCodeObjects = [nbco]

            uf = uf + b'\x00'

        uf = uf + b'\x11\x00\x03\x00\x00\x00\x00\x08\x00\x04\x00\x00\x00\x00\x00'
        uf = uf + b'\x00'

        serial_transaction = serial_transaction + self.reverse(len(uf))
        serial_transaction = serial_transaction + uf

        signing_key = ed25519.SigningKey(keys.private_key_bytes)
        sign = signing_key.sign(serial_transaction)
        tr.signature = sign

        return tr

    def reverse(self, a):
        a = a.to_bytes(6, 'little')
        a = bytearray(a)
        a.reverse()
        return a
class HS2TestSuite(ImpalaTestSuite):
  HS2_V6_COLUMN_TYPES = ['boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val',
                         'doubleVal', 'binaryVal']

  def setup(self):
    host, port = IMPALAD_HS2_HOST_PORT.split(":")
    self.socket = TSocket(host, port)
    self.transport = TBufferedTransport(self.socket)
    self.transport.open()
    self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
    self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

  def teardown(self):
    if self.socket:
      self.socket.close()

  @staticmethod
  def check_response(response,
                     expected_status_code = TCLIService.TStatusCode.SUCCESS_STATUS,
                     expected_error_prefix = None):
    assert response.status.statusCode == expected_status_code
    if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\
       and expected_error_prefix is not None:
      assert response.status.errorMessage.startswith(expected_error_prefix)

  def close(self, op_handle):
    close_op_req = TCLIService.TCloseOperationReq()
    close_op_req.operationHandle = op_handle
    close_op_resp = self.hs2_client.CloseOperation(close_op_req)
    assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS

  def get_num_rows(self, result_set):
    # rows will always be set, so the only way to tell if we should use it is to see if
    # any columns are set
    if result_set.columns is None or len(result_set.columns) == 0:
      return len(result_set.rows)

    assert result_set.columns is not None
    for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
      typed_col = getattr(result_set.columns[0], col_type)
      if typed_col != None:
        return len(typed_col.values)

    assert False

  def fetch_at_most(self, handle, orientation, size, expected_num_rows = None):
    """Fetches at most size number of rows from the query identified by the given
    operation handle. Uses the given fetch orientation. Asserts that the fetch returns a
    success status, and that the number of rows returned is equal to given
    expected_num_rows (if given). It is only safe for expected_num_rows to be 0 or 1:
    Impala does not guarantee that a larger result set will be returned in one go. Use
    fetch_until() for repeated fetches."""
    assert expected_num_rows is None or expected_num_rows in (0, 1)
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = handle
    fetch_results_req.orientation = orientation
    fetch_results_req.maxRows = size
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp)
    if expected_num_rows is not None:
      assert self.get_num_rows(fetch_results_resp.results) == expected_num_rows
    return fetch_results_resp

  def fetch_until(self, handle, orientation, size, expected_num_rows = None):
    """Tries to fetch exactly 'size' rows from the given query handle, with the given
    fetch orientation, by repeatedly issuing fetch(size - num rows already fetched)
    calls. Returns fewer than 'size' rows if either a fetch() returns 0 rows (indicating
    EOS) or 'expected_num_rows' rows are returned. If 'expected_num_rows' is set to None,
    it defaults to 'size', so that the effect is to both ask for and expect the same
    number of rows."""
    assert expected_num_rows is None or (size >= expected_num_rows)
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = handle
    fetch_results_req.orientation = orientation
    fetch_results_req.maxRows = size
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp)
    num_rows_fetched = self.get_num_rows(fetch_results_resp.results)
    if expected_num_rows is None: expected_num_rows = size
    while num_rows_fetched < expected_num_rows:
      # Always try to fetch at most 'size'
      fetch_results_req.maxRows = size - num_rows_fetched
      fetch_results_req.orientation = TCLIService.TFetchOrientation.FETCH_NEXT
      fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
      HS2TestSuite.check_response(fetch_results_resp)
      last_fetch_size = self.get_num_rows(fetch_results_resp.results)
      assert last_fetch_size > 0
      num_rows_fetched += last_fetch_size

    assert num_rows_fetched == expected_num_rows

  def fetch_fail(self, handle, orientation, expected_error_prefix):
    """Attempts to fetch rows from the query identified by the given operation handle.
    Asserts that the fetch returns an error with an error message matching the given
    expected_error_prefix."""
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = handle
    fetch_results_req.orientation = orientation
    fetch_results_req.maxRows = 100
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS,
                                expected_error_prefix)
    return fetch_results_resp

  def result_metadata(self, handle):
    """ Gets the schema for the query identified by the handle """
    req = TCLIService.TGetResultSetMetadataReq()
    req.operationHandle = handle
    resp = self.hs2_client.GetResultSetMetadata(req)
    HS2TestSuite.check_response(resp)
    return resp

  def column_results_to_string(self, columns):
    """Quick-and-dirty way to get a readable string to compare the output of a
    columnar-oriented query to its expected output"""
    formatted = ""
    num_rows = 0
    # Determine the number of rows by finding the type of the first column
    for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
      typed_col = getattr(columns[0], col_type)
      if typed_col != None:
        num_rows = len(typed_col.values)
        break

    for i in xrange(num_rows):
      row = []
      for c in columns:
        for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
          typed_col = getattr(c, col_type)
          if typed_col != None:
            indicator = ord(typed_col.nulls[i / 8])
            if indicator & (1 << (i % 8)):
              row.append("NULL")
            else:
              row.append(str(typed_col.values[i]))
            break
      formatted += (", ".join(row) + "\n")
    return (num_rows, formatted)

  def get_operation_status(self, operation_handle):
    """Executes GetOperationStatus with the given operation handle and returns the
    TGetOperationStatusResp"""
    get_operation_status_req = TCLIService.TGetOperationStatusReq()
    get_operation_status_req.operationHandle = operation_handle
    get_operation_status_resp = \
        self.hs2_client.GetOperationStatus(get_operation_status_req)
    return get_operation_status_resp

  def wait_for_operation_state(self, operation_handle, expected_state, \
                               timeout = 10, interval = 1):
    """Waits for the operation to reach expected_state by polling GetOperationStatus every
    interval seconds, returning the TGetOperationStatusResp, or raising an assertion after
    timeout seconds."""
    start_time = time()
    while (time() - start_time < timeout):
      get_operation_status_resp = self.get_operation_status(operation_handle)
      HS2TestSuite.check_response(get_operation_status_resp)
      if get_operation_status_resp.operationState is expected_state:
        return get_operation_status_resp
      sleep(interval)
    assert False, 'Did not reach expected operation state %s in time, actual state was ' \
        '%s' % (expected_state, get_operation_status_resp.operationState)

  def wait_for_admission_control(self, operation_handle, timeout = 10):
    """Waits for the admission control processing of the query to complete by polling
      GetOperationStatus every interval seconds, returning the TGetOperationStatusResp,
      or raising an assertion after timeout seconds."""
    start_time = time()
    while (time() - start_time < timeout):
      get_operation_status_resp = self.get_operation_status(operation_handle)
      HS2TestSuite.check_response(get_operation_status_resp)
      if TCLIService.TOperationState.INITIALIZED_STATE < \
          get_operation_status_resp.operationState < \
          TCLIService.TOperationState.PENDING_STATE:
        return get_operation_status_resp
      sleep(0.05)
    assert False, 'Did not complete admission control processing in time, current ' \
        'operation state of query: %s' % (get_operation_status_resp.operationState)

  def execute_statement(self, statement, conf_overlay=None,
                        expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS,
                        expected_error_prefix=None):
    """Executes statement and checks if the response meets the expectations.
    If so, it returns the response."""
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = statement
    if conf_overlay:
      execute_statement_req.confOverlay = conf_overlay
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp, expected_status_code,
                                expected_error_prefix)
    return execute_statement_resp
Beispiel #25
0
class ClientEx:
    def __init__(self, address):
        self.tr = TSocket(address[0], address[1])
        self.protocol = TBinaryProtocol(self.tr)
        self.client = Client(self.protocol)
        self.tr.open()

    def close(self):
        self.tr.close()

    def wallet_balance_get(self, pub_key_bytes):
        return self.client.WalletBalanceGet(pub_key_bytes)

    def double_to_fee(self, value):
        fee_comission = 0
        a = True
        if value < 0.:
            fee_comission += 32768
        else:
            fee_comission += (32768 if value < 0. else 0)
            value = math.fabs(value)
            expf = (0. if value == 0. else math.log10(value))
            expi = int(expf + 0.5 if expf >= 0. else expf - 0.5)
            value /= math.pow(10, expi)
            if value >= 1.:
                value *= 0.1
                expi += 1
            fee_comission += int(1024 * (expi + 18))
            fee_comission += int(value * 1024)
        return fee_comission

    def __fee(self, value):
        sign = 0
        if value < 0.0:
            value = 1
        value = abs(value)
        expf = 0
        if value != 0.0:
            expf = math.log10(value)
        if expf >= 0:
            expf = expf + .5
        else:
            expf = expf - .5
        expi = int(expf)
        value /= math.pow(10, expi)
        if value >= 1.0:
            value *= 0.1
            expi = expi + 1
        exp = expi + 18
        if exp < 0 or exp > 28:
            print('exponent value {0} out of range [0, 28]'.format(exp))
            return -1
        frac = round(value * 1024)
        return sign * 32768 + exp * 1024 + frac

    def transfer_coins(self, integral, fraction, fee, keys):
        res = self.client.TransactionFlow(
            self.create_transaction(integral, fraction, fee, keys))
        print(res)

    def create_transaction(self, integral, fraction, fee, keys):
        tr = Transaction()
        tr.id = self.client.WalletTransactionsCountGet(
            keys.public_key_bytes).lastTransactionInnerId + 1
        tr.source = keys.public_key_bytes
        tr.target = keys.target_public_key_bytes
        tr.amount = Amount()
        tr.amount.integral = integral
        tr.amount.fraction = fraction
        tr.currency = 1

        tr.fee = AmountCommission()
        tr.fee.commission = self.__fee(fee)

        serial_transaction = pack(
            '=6s32s32slqhbb',  # '=' - without alignment'
            bytearray(tr.id.to_bytes(
                6, 'little')),  # 6s - 6 byte InnerID (char[] C Type)
            tr.source,  # 32s - 32 byte source public key (char[] C Type)
            tr.target,  # 32s - 32 byte target pyblic key (char[] C Type)
            tr.amount.integral,  # i - 4 byte integer(int C Type)
            tr.amount.fraction,  # q - 8 byte integer(long long C Type)
            tr.fee.commission,  # h - 2 byte integer (short C Type)
            tr.currency,  # b - 1 byte integer (signed char C Type)
            0  # b - 1 byte userfield_num
        )

        signing_key = ed25519.SigningKey(keys.private_key_bytes)
        sign = signing_key.sign(serial_transaction)
        tr.signature = sign

        return tr

    def deploy_smart_contract(self, code, fee, keys):
        res = self.client.TransactionFlow(
            self.create_transaction_with_smart_contract(code, fee, keys))
        print(res)

    def createContractAddress(self, source, tId, contract):
        tmpBytes = bytearray()
        tmpBytes.extend(source)
        tmpBytes.extend(tId)
        for a in contract.smartContractDeploy.byteCodeObjects:
            tmpBytes.extend(a.byteCode)
        res = hashlib.blake2s()
        res.update(tmpBytes)
        return res.digest()

    def normalizeCode(self, javaText):
        javaText = javaText.replace('\r', ' ').replace('\t',
                                                       ' ').replace('{', ' {')
        while '  ' in javaText:
            javaText = javaText.replace('  ', ' ')
        return javaText

    def compile_smart(self, contract_body):
        if self.client == None:
            return None
        res = self.client.SmartContractCompile(contract_body)
        return res

    def create_transaction_with_smart_contract(self, code, fee, keys):
        tr = Transaction()
        contract = SmartContractInvocation()
        contract.smartContractDeploy = SmartContractDeploy()
        if code == "":
            code = 'import com.credits.scapi.annotations.*; import com.credits.scapi.v0.*; public class ' \
                   'MySmartContract extends SmartContract { public MySmartContract() {} public String hello2(String ' \
                   'say) { return \"Hello\" + say; } }'
        contractText = self.normalizeCode(code)
        result = self.compile_smart(contractText)
        contract.smartContractDeploy.byteCodeObjects = result.byteCodeObjects
        tr.smartContract = contract
        tr.smartContract.smartContractDeploy.sourceCode = contractText
        tr.source = keys.public_key_bytes
        w = self.client.WalletTransactionsCountGet(tr.source)
        lastInnerId = bytearray(
            (w.lastTransactionInnerId + 1).to_bytes(6, 'little'))
        tr.id = int.from_bytes(lastInnerId, byteorder='little', signed=False)
        tr.target = self.createContractAddress(tr.source, lastInnerId,
                                               contract)
        tr.amount = Amount()
        tr.amount.integral = 0
        tr.amount.fraction = 0
        tr.balance = Amount()
        tr.balance.integral = 0
        tr.balance.fraction = 0
        tr.currency = 1
        tr.fee = AmountCommission()
        tr.fee.commission = self.double_to_fee(fee)
        tr.userFields = ""
        ufNum1 = bytearray(b'\x01')
        contract.smartContractDeploy.hashState = ""
        contract.smartContractDeploy.tokenStandard = 0
        contract.method = ""
        contract.params = []
        contract.usedContracts = []
        contract.forgetNewState = False
        transportOut = TMemoryBuffer()
        protocolOut = TBinaryProtocol(transportOut)
        contract.write(protocolOut)
        scBytes = transportOut.getvalue()
        sMap = '=6s32s32slqhb1s4s' + str(
            len(scBytes)
        ) + 's'  #4s' + str(scriptLength) + 's4s' + str(codeNameLength) + 's4s' + str(codeLength) + 's' #len(userField_bytes)
        serial_transaction_for_sign = pack(
            sMap,  #'=' - without alignment
            lastInnerId,  #6s - 6 byte InnerID (char[] C Type)
            tr.source,  #32s - 32 byte source public key (char[] C Type)
            tr.target,  #32s - 32 byte target pyblic key (char[] C Type)
            tr.amount.integral,  #i - 4 byte integer(int C Type)
            tr.amount.fraction,  #q - 8 byte integer(long long C Type)
            tr.fee.commission,  #h - 2 byte integer (short C Type)
            tr.currency,  #b - 1 byte integer (signed char C Type)
            ufNum1,
            bytes(len(scBytes).to_bytes(4, byteorder="little")),
            scBytes)
        signing_key = ed25519.SigningKey(
            keys.private_key_bytes)  # Create object for calulate signing
        tr.signature = signing_key.sign(serial_transaction_for_sign)
        return tr

    def reverse(self, a):
        a = a.to_bytes(6, 'little')
        a = bytearray(a)
        a.reverse()
        return a
Beispiel #26
0
class TestAuthorization(CustomClusterTestSuite):
    AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR'))

    def setup(self):
        host, port = IMPALAD_HS2_HOST_PORT.split(":")
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = TCLIService.Client(self.protocol)

    def teardown(self):
        if self.socket:
            self.socket.close()
        shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=/test-warehouse/authz-policy.ini\
      --authorized_proxy_user_config=hue=%s\
      --audit_event_log_dir=%s" % (getuser(), AUDIT_LOG_DIR))
    def test_impersonation(self):
        """End-to-end impersonation + authorization test. Expects authorization to be
    configured before running this test"""
        # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import
        # the module within this test function, rather than as a top-level import. This way
        # the tests in that module will not get pulled when executing this test suite. The fix
        # is to split the utility code out of the TestHS2 class and support HS2 as a first
        # class citizen in our test framework.
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        open_session_req.configuration['impala.doas.user'] = getuser()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Try to query a table we are not authorized to access.
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch_seq.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        assert 'User \'%s\' does not have privileges to access' % getuser() in\
            str(execute_statement_resp)

        assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\
            'No matching audit event recorded in time window'

        # Now try the same operation on a table we are authorized to access.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)

        TestHS2.check_response(execute_statement_resp)

        # Try to impersonate as a user we are not authorized to impersonate.
        open_session_req.configuration['impala.doas.user'] = '******'
        resp = self.hs2_client.OpenSession(open_session_req)
        assert 'User \'hue\' is not authorized to impersonate \'some_user\'' in str(
            resp)

        self.socket.close()
        self.socket = None

    def __wait_for_audit_record(self, user, impersonator, timeout_secs=30):
        """Waits until an audit log record is found that contains the given user and
    impersonator, or until the timeout is reached.
    """
        # The audit event might not show up immediately (the audit logs are flushed to disk
        # on regular intervals), so poll the audit event logs until a matching record is
        # found.
        start_time = time()
        while time() - start_time < timeout_secs:
            for audit_file_name in os.listdir(self.AUDIT_LOG_DIR):
                if self.__find_matching_audit_record(audit_file_name, user,
                                                     impersonator):
                    return True
            sleep(1)
        return False

    def __find_matching_audit_record(self, audit_file_name, user,
                                     impersonator):
        with open(os.path.join(self.AUDIT_LOG_DIR,
                               audit_file_name)) as audit_log_file:
            for line in audit_log_file.readlines():
                json_dict = json.loads(line)
                if len(json_dict) == 0: continue
                if json_dict[min(json_dict)]['user'] == user and\
                    json_dict[min(json_dict)]['impersonator'] == impersonator:
                    return True
        return False
Beispiel #27
0
class TestAuthorization(CustomClusterTestSuite):
    AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR'))

    def setup(self):
        host, port = (self.cluster.impalads[0].service.hostname,
                      self.cluster.impalads[0].service.hs2_port)
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

    def teardown(self):
        if self.socket:
            self.socket.close()
        shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=%s\
      --authorization_policy_provider_class=%s"                                                  %\
        (AUTH_POLICY_FILE,
         "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider"))
    def test_custom_authorization_provider(self):
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        # User is 'test_user' (defined in the authorization policy file)
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Try to query a table we are not authorized to access.
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch_seq.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        assert 'User \'%s\' does not have privileges to access' % 'test_user' in\
            str(execute_statement_resp)

        # Now try the same operation on a table we are authorized to access.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=%s\
      --authorized_proxy_user_config=hue=%s" % (AUTH_POLICY_FILE, getuser()))
    def test_access_runtime_profile(self):
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_req.username = getuser()
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Current user can't access view's underlying tables
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "explain select * from functional.complex_view"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        assert 'User \'%s\' does not have privileges to EXPLAIN' % getuser() in\
            str(execute_statement_resp)
        # User should not have access to the runtime profile
        self.__run_stmt_and_verify_profile_access(
            "select * from functional.complex_view", False, False)
        self.__run_stmt_and_verify_profile_access(
            "select * from functional.complex_view", False, True)

        # Repeat as a delegated user
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        # Delegated user is the current user
        open_session_req.configuration['impala.doas.user'] = getuser()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)
        self.session_handle = resp.sessionHandle
        # User should not have access to the runtime profile
        self.__run_stmt_and_verify_profile_access(
            "select * from functional.complex_view", False, False)
        self.__run_stmt_and_verify_profile_access(
            "select * from functional.complex_view", False, True)

        # Create a view for which the user has access to the underlying tables.
        open_session_req.username = getuser()
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = """create view if not exists tpch.customer_view as
        select * from tpch.customer limit 1"""
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        # User should be able to run EXPLAIN
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = """explain select * from tpch.customer_view"""
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        # User should have access to the runtime profile and exec summary
        self.__run_stmt_and_verify_profile_access(
            "select * from tpch.customer_view", True, False)
        self.__run_stmt_and_verify_profile_access(
            "select * from tpch.customer_view", True, True)

        # Repeat as a delegated user
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        # Delegated user is the current user
        open_session_req.configuration['impala.doas.user'] = getuser()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)
        self.session_handle = resp.sessionHandle
        # User should have access to the runtime profile and exec summary
        self.__run_stmt_and_verify_profile_access(
            "select * from tpch.customer_view", True, False)
        self.__run_stmt_and_verify_profile_access(
            "select * from tpch.customer_view", True, True)

        # Clean up
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "drop view if exists tpch.customer_view"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=%s\
      --authorized_proxy_user_config=foo=bar;hue=%s\
      --abort_on_failed_audit_event=false\
      --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, getuser(), AUDIT_LOG_DIR))
    def test_user_impersonation(self):
        """End-to-end user impersonation + authorization test"""
        self.__test_impersonation()

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        "--server_name=server1\
        --authorization_policy_file=%s\
        --authorized_proxy_user_config=hue=bar\
        --authorized_proxy_group_config=foo=bar;hue=%s\
        --abort_on_failed_audit_event=false\
        --audit_event_log_dir=%s" %
        (AUTH_POLICY_FILE, grp.getgrgid(os.getgid()).gr_name, AUDIT_LOG_DIR))
    def test_group_impersonation(self):
        """End-to-end group impersonation + authorization test"""
        self.__test_impersonation()

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
        --authorization_policy_file=%s\
        --authorized_proxy_user_config=foo=bar\
        --authorized_proxy_group_config=foo=bar\
        --abort_on_failed_audit_event=false\
        --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, AUDIT_LOG_DIR))
    def test_no_matching_user_and_group_impersonation(self):
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        open_session_req.configuration['impala.doas.user'] = '******'
        resp = self.hs2_client.OpenSession(open_session_req)
        assert 'User \'hue\' is not authorized to delegate to \'abc\'' in str(
            resp)

    def __test_impersonation(self):
        """End-to-end impersonation + authorization test. Expects authorization to be
    configured before running this test"""
        # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import
        # the module within this test function, rather than as a top-level import. This way
        # the tests in that module will not get pulled when executing this test suite. The fix
        # is to split the utility code out of the TestHS2 class and support HS2 as a first
        # class citizen in our test framework.
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        # Connected user is 'hue'
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        # Delegated user is the current user
        open_session_req.configuration['impala.doas.user'] = getuser()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Try to query a table we are not authorized to access.
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch_seq.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        assert 'User \'%s\' does not have privileges to access' % getuser() in\
            str(execute_statement_resp)

        assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\
            'No matching audit event recorded in time window'

        # Now try the same operation on a table we are authorized to access.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)

        TestHS2.check_response(execute_statement_resp)

        # Verify the correct user information is in the runtime profile
        query_id = operation_id_to_query_id(
            execute_statement_resp.operationHandle.operationId)
        profile_page = self.cluster.impalads[
            0].service.read_query_profile_page(query_id)
        self.__verify_profile_user_fields(profile_page,
                                          effective_user=getuser(),
                                          delegated_user=getuser(),
                                          connected_user='******')

        # Try to user we are not authorized to delegate to.
        open_session_req.configuration['impala.doas.user'] = '******'
        resp = self.hs2_client.OpenSession(open_session_req)
        assert 'User \'hue\' is not authorized to delegate to \'some_user\'' in str(
            resp)

        # Create a new session which does not have a do_as_user.
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Run a simple query, which should succeed.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = resp.sessionHandle
        execute_statement_req.statement = "select 1"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        # Verify the correct user information is in the runtime profile. Since there is
        # no do_as_user the Delegated User field should be empty.
        query_id = operation_id_to_query_id(
            execute_statement_resp.operationHandle.operationId)

        profile_page = self.cluster.impalads[
            0].service.read_query_profile_page(query_id)
        self.__verify_profile_user_fields(profile_page,
                                          effective_user='******',
                                          delegated_user='',
                                          connected_user='******')

        self.socket.close()
        self.socket = None

    def __verify_profile_user_fields(self, profile_str, effective_user,
                                     connected_user, delegated_user):
        """Verifies the given runtime profile string contains the specified values for
    User, Connected User, and Delegated User"""
        assert '\n    User: %s\n' % effective_user in profile_str
        assert '\n    Connected User: %s\n' % connected_user in profile_str
        assert '\n    Delegated User: %s\n' % delegated_user in profile_str

    def __wait_for_audit_record(self, user, impersonator, timeout_secs=30):
        """Waits until an audit log record is found that contains the given user and
    impersonator, or until the timeout is reached.
    """
        # The audit event might not show up immediately (the audit logs are flushed to disk
        # on regular intervals), so poll the audit event logs until a matching record is
        # found.
        start_time = time()
        while time() - start_time < timeout_secs:
            for audit_file_name in os.listdir(self.AUDIT_LOG_DIR):
                if self.__find_matching_audit_record(audit_file_name, user,
                                                     impersonator):
                    return True
            sleep(1)
        return False

    def __find_matching_audit_record(self, audit_file_name, user,
                                     impersonator):
        with open(os.path.join(self.AUDIT_LOG_DIR,
                               audit_file_name)) as audit_log_file:
            for line in audit_log_file.readlines():
                json_dict = json.loads(line)
                if len(json_dict) == 0: continue
                if json_dict[min(json_dict)]['user'] == user and\
                    json_dict[min(json_dict)]['impersonator'] == impersonator:
                    return True
        return False

    def __run_stmt_and_verify_profile_access(self, stmt, has_access,
                                             close_operation):
        """Runs 'stmt' and retrieves the runtime profile and exec summary. If
      'has_access' is true, it verifies that no runtime profile or exec summary are
      returned. If 'close_operation' is true, make sure the operation is closed before
      retrieving the profile and exec summary."""
        from tests.hs2.test_hs2 import TestHS2
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = stmt
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        if close_operation:
            close_operation_req = TCLIService.TCloseOperationReq()
            close_operation_req.operationHandle = execute_statement_resp.operationHandle
            TestHS2.check_response(
                self.hs2_client.CloseOperation(close_operation_req))

        get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq()
        get_profile_req.operationHandle = execute_statement_resp.operationHandle
        get_profile_req.sessionHandle = self.session_handle
        get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req)

        if has_access:
            TestHS2.check_response(get_profile_resp)
            assert "Plan: " in get_profile_resp.profile
        else:
            assert "User %s is not authorized to access the runtime profile or "\
                "execution summary." % (getuser()) in str(get_profile_resp)

        exec_summary_req = ImpalaHiveServer2Service.TGetExecSummaryReq()
        exec_summary_req.operationHandle = execute_statement_resp.operationHandle
        exec_summary_req.sessionHandle = self.session_handle
        exec_summary_resp = self.hs2_client.GetExecSummary(exec_summary_req)

        if has_access:
            TestHS2.check_response(exec_summary_resp)
        else:
            assert "User %s is not authorized to access the runtime profile or "\
                "execution summary." % (getuser()) in str(exec_summary_resp)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=" +
        SENTRY_CONFIG_FILE,
        catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE,
        impala_log_dir=tempfile.mkdtemp(prefix="test_deprecated_none_",
                                        dir=os.getenv("LOG_DIR")))
    def test_deprecated_flag_doesnt_show(self):
        assert_no_files_in_dir_contain(
            self.impala_log_dir, "authorization_policy_file " +
            "flag is deprecated. Object Ownership feature is not supported")

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=%s\
      --authorization_policy_provider_class=%s" % (
        AUTH_POLICY_FILE,
        "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider"
    ),
                                      impala_log_dir=tempfile.mkdtemp(
                                          prefix="test_deprecated_",
                                          dir=os.getenv("LOG_DIR")))
    def test_deprecated_flags(self):
        assert_file_in_dir_contains(
            self.impala_log_dir, "authorization_policy_file flag" +
            " is deprecated. Object Ownership feature is not supported")

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s" %
        SENTRY_CONFIG_FILE,
        catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE,
        impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_",
                                        dir=os.getenv("LOG_DIR")))
    def test_catalog_restart(self, unique_role):
        """IMPALA-7713: Tests that a catalogd restart when authorization is enabled should
    reset the previous privileges stored in impalad's catalog to avoid stale privilege
    data in the impalad's catalog."""
        def assert_privileges():
            result = self.client.execute("show grant role %s_foo" %
                                         unique_role)
            TestAuthorization._check_privileges(
                result,
                [["database", "functional", "", "", "", "all", "false"]])

            result = self.client.execute("show grant role %s_bar" %
                                         unique_role)
            TestAuthorization._check_privileges(
                result,
                [["database", "functional_kudu", "", "", "", "all", "false"]])

            result = self.client.execute("show grant role %s_baz" %
                                         unique_role)
            TestAuthorization._check_privileges(
                result,
                [["database", "functional_avro", "", "", "", "all", "false"]])

        self.role_cleanup(unique_role)
        try:
            self.client.execute("create role %s_foo" % unique_role)
            self.client.execute("create role %s_bar" % unique_role)
            self.client.execute("create role %s_baz" % unique_role)
            self.client.execute(
                "grant all on database functional to role %s_foo" %
                unique_role)
            self.client.execute(
                "grant all on database functional_kudu to role %s_bar" %
                unique_role)
            self.client.execute(
                "grant all on database functional_avro to role %s_baz" %
                unique_role)

            assert_privileges()
            self._start_impala_cluster([
                "--catalogd_args=--sentry_config=%s" % SENTRY_CONFIG_FILE,
                "--restart_catalogd_only"
            ])
            assert_privileges()
        finally:
            self.role_cleanup(unique_role)

    def role_cleanup(self, role_name_match):
        """Cleans up any roles that match the given role name."""
        for role_name in self.client.execute("show roles").data:
            if role_name_match in role_name:
                self.client.execute("drop role %s" % role_name)

    @staticmethod
    def _check_privileges(result, expected):
        def columns(row):
            cols = row.split("\t")
            return cols[0:len(cols) - 1]

        assert map(columns, result.data) == expected

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s" %
        SENTRY_CONFIG_FILE,
        catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE,
        impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_",
                                        dir=os.getenv("LOG_DIR")))
    def test_catalog_object(self, unique_role):
        """IMPALA-7721: Tests /catalog_object web API for principal and privilege"""
        self.role_cleanup(unique_role)
        try:
            self.client.execute("create role %s" % unique_role)
            self.client.execute(
                "grant select on database functional to role %s" % unique_role)
            for service in [
                    self.cluster.catalogd.service,
                    self.cluster.get_first_impalad().service
            ]:
                obj_dump = service.get_catalog_object_dump(
                    "PRINCIPAL", "%s.ROLE" % unique_role)
                assert "catalog_version" in obj_dump

                # Get the privilege associated with that principal ID.
                principal_id = re.search(r"principal_id \(i32\) = (\d+)",
                                         obj_dump)
                assert principal_id is not None
                obj_dump = service.get_catalog_object_dump(
                    "PRIVILEGE",
                    urllib.quote(
                        "server=server1->db=functional->action=select->grantoption=false.%s.ROLE"
                        % principal_id.group(1)))
                assert "catalog_version" in obj_dump

                # Get the principal that does not exist.
                obj_dump = service.get_catalog_object_dump(
                    "PRINCIPAL", "doesnotexist.ROLE")
                assert "CatalogException" in obj_dump

                # Get the privilege that does not exist.
                obj_dump = service.get_catalog_object_dump(
                    "PRIVILEGE",
                    urllib.quote(
                        "server=server1->db=doesntexist->action=select->grantoption=false.%s.ROLE"
                        % principal_id.group(1)))
                assert "CatalogException" in obj_dump
        finally:
            self.role_cleanup(unique_role)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s" %
        SENTRY_CONFIG_FILE,
        catalogd_args=
        "--sentry_config=%s --sentry_catalog_polling_frequency_s=3600" %
        SENTRY_CONFIG_FILE,
        impala_log_dir=tempfile.mkdtemp(
            prefix="test_invalidate_metadata_sentry_unavailable_",
            dir=os.getenv("LOG_DIR")))
    def test_invalidate_metadata_sentry_unavailable(self, unique_role):
        """IMPALA-7824: Tests that running INVALIDATE METADATA when Sentry is unavailable
    should not cause Impala to hang."""
        self.role_cleanup(unique_role)
        try:
            group_name = grp.getgrnam(getuser()).gr_name
            self.client.execute("create role %s" % unique_role)
            self.client.execute("grant all on server to role %s" % unique_role)
            self.client.execute("grant role %s to group `%s`" %
                                (unique_role, group_name))

            self._stop_sentry_service()
            # Calling INVALIDATE METADATA when Sentry is unavailable should return an error.
            result = self.execute_query_expect_failure(self.client,
                                                       "invalidate metadata")
            result_str = str(result)
            assert "MESSAGE: CatalogException: Error refreshing authorization policy:" \
                   in result_str
            assert "CAUSED BY: ImpalaRuntimeException: Error refreshing authorization policy." \
                   " Sentry is unavailable. Ensure Sentry is up:" in result_str

            self._start_sentry_service(SENTRY_CONFIG_FILE)
            # Calling INVALIDATE METADATA after Sentry is up should not return an error.
            self.execute_query_expect_success(self.client,
                                              "invalidate metadata")
        finally:
            self.role_cleanup(unique_role)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s" %
        SENTRY_CONFIG_FILE,
        catalogd_args=
        "--sentry_config=%s --sentry_catalog_polling_frequency_s=3600 " %
        SENTRY_CONFIG_FILE,
        impala_log_dir=tempfile.mkdtemp(prefix="test_refresh_authorization_",
                                        dir=os.getenv("LOG_DIR")))
    def test_refresh_authorization(self, unique_role):
        """Tests refresh authorization statement by adding and removing roles and privileges
       externally. The long Sentry polling is used so that any authorization metadata
       updated externally does not get polled by Impala in order to test an an explicit
       call to refresh authorization statement."""
        group_name = grp.getgrnam(getuser()).gr_name
        self.role_cleanup(unique_role)
        for sync_ddl in [1, 0]:
            query_options = {'sync_ddl': sync_ddl}
            clients = []
            if sync_ddl:
                # When sync_ddl is True, we want to ensure the changes are propagated to all
                # coordinators.
                for impalad in self.cluster.impalads:
                    clients.append(impalad.service.create_beeswax_client())
            else:
                clients.append(self.client)
            try:
                self.client.execute("create role %s" % unique_role)
                self.client.execute("grant role %s to group `%s`" %
                                    (unique_role, group_name))
                self.client.execute("grant refresh on server to %s" %
                                    unique_role)

                self.validate_refresh_authorization_roles(
                    unique_role, query_options, clients)
                self.validate_refresh_authorization_privileges(
                    unique_role, query_options, clients)
            finally:
                self.role_cleanup(unique_role)

    def validate_refresh_authorization_roles(self, unique_role, query_options,
                                             clients):
        """This method tests refresh authorization statement by adding and removing
       roles externally."""
        try:
            # Create two roles inside Impala.
            self.client.execute("create role %s_internal1" % unique_role)
            self.client.execute("create role %s_internal2" % unique_role)
            # Drop an existing role (_internal1) outside Impala.
            role = "%s_internal1" % unique_role
            subprocess.check_call([
                "/bin/bash", "-c",
                "%s/bin/sentryShell --conf %s/sentry-site.xml -dr -r %s" %
                (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role)
            ],
                                  stdout=sys.stdout,
                                  stderr=sys.stderr)

            result = self.execute_query_expect_success(self.client,
                                                       "show roles")
            assert any(role in x for x in result.data)
            self.execute_query_expect_success(self.client,
                                              "refresh authorization",
                                              query_options=query_options)
            for client in clients:
                result = self.execute_query_expect_success(
                    client, "show roles")
                assert not any(role in x for x in result.data)

            # Add a new role outside Impala.
            role = "%s_external" % unique_role
            subprocess.check_call([
                "/bin/bash", "-c",
                "%s/bin/sentryShell --conf %s/sentry-site.xml -cr -r %s" %
                (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role)
            ],
                                  stdout=sys.stdout,
                                  stderr=sys.stderr)

            result = self.execute_query_expect_success(self.client,
                                                       "show roles")
            assert not any(role in x for x in result.data)
            self.execute_query_expect_success(self.client,
                                              "refresh authorization",
                                              query_options=query_options)
            for client in clients:
                result = self.execute_query_expect_success(
                    client, "show roles")
                assert any(role in x for x in result.data)
        finally:
            for suffix in ["internal1", "internal2", "external"]:
                self.role_cleanup("%s_%s" % (unique_role, suffix))

    def validate_refresh_authorization_privileges(self, unique_role,
                                                  query_options, clients):
        """This method tests refresh authorization statement by adding and removing
       privileges externally."""
        # Grant select privilege outside Impala.
        subprocess.check_call([
            "/bin/bash", "-c",
            "%s/bin/sentryShell --conf %s/sentry-site.xml -gpr -p "
            "'server=server1->db=functional->table=alltypes->action=select' -r %s"
            % (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"),
               unique_role)
        ],
                              stdout=sys.stdout,
                              stderr=sys.stderr)

        # Before refresh authorization, there should only be one refresh privilege.
        result = self.execute_query_expect_success(
            self.client, "show grant role %s" % unique_role)
        assert len(result.data) == 1
        assert any("refresh" in x for x in result.data)

        for client in clients:
            self.execute_query_expect_failure(
                client, "select * from functional.alltypes limit 1")

        self.execute_query_expect_success(self.client,
                                          "refresh authorization",
                                          query_options=query_options)

        for client in clients:
            # Ensure select privilege was granted after refresh authorization.
            result = self.execute_query_expect_success(
                client, "show grant role %s" % unique_role)
            assert len(result.data) == 2
            assert any("select" in x for x in result.data)
            assert any("refresh" in x for x in result.data)
            self.execute_query_expect_success(
                client, "select * from functional.alltypes limit 1")
Beispiel #28
0
def main():
    args = parser.parse_args()

    INT_RE = re.compile("^\d+$")
    FLOAT_RE = re.compile("^\d+?\.\d+$")

    EXPS = [
        "==",
        "!=",
        ">=",
        "<=",
        ">",
        "<",
        "=regexp",
    ]
    FORMATS = ["int", "float"]

    exps = []
    for aexp in args.exps:
        for exp in EXPS:
            if exp in aexp:
                aexp = aexp.split(exp)
                if len(aexp) == 2:
                    if exp == "=regexp":
                        if aexp[1][0] == '(' and aexp[1][-1] == ')':
                            vtype = "string"
                            value = aexp[1][1:-1]
                            try:
                                re.compile(value)
                            except:
                                break
                            exp = "regexp"
                        else:
                            break
                    elif (aexp[1][0] == '"'
                          and aexp[1][-1] == '"') or (aexp[1][0] == "'"
                                                      and aexp[1][-1] == "'"):
                        vtype = "string"
                        value = aexp[1][1:-1]
                    elif FLOAT_RE.match(aexp[1]):
                        vtype = "float"
                        value = aexp[1]
                    elif INT_RE.match(aexp[1]):
                        vtype = "int"
                        value = aexp[1]
                    else:
                        vtype = "string"
                        value = aexp[1]
                    exps.append(FilterExpression(aexp[0], exp, vtype, value))
                break

    fields = {}
    for field in args.fields:
        fields[field] = 1

    formats = {}
    for format in args.formats:
        format = format.split(":")
        if len(format) != 2:
            continue

        if format[1] in FORMATS:
            formats[format[0]] = format[1]

    name = "tail_" + "".join([
        random.choice(string.digits + string.ascii_letters) for _ in range(16)
    ])
    filter = Filter(args.collection,
                    name,
                    exps=exps,
                    fields=fields,
                    formats=formats,
                    expried_time=5)

    transport = TSocket(args.host, args.port)
    transport = TBufferedTransport(transport)
    protocol = TBinaryProtocolAccelerated(transport)
    client = Client(protocol)
    transport.open()

    result = client.register_filter(filter)
    if result.result != 0:
        print("register error", name, result.msg)
        exit()

    print("register", name, filter)
    try:
        cursor = client.pull(name)
        while True:
            log = cursor.next()
            if not log:
                break
            if args.fields:
                flogs = []
                try:
                    log = json.loads(log)
                except:
                    print(log.encode("utf-8"))
                    continue

                for field in args.fields:
                    if field in args.timefields:
                        ts = log.get(field, 0)
                        try:
                            ts = int(ts)
                        except:
                            pass

                        if isinstance(ts, str):
                            flogs.append(ts)
                        else:
                            flogs.append(
                                datetime.datetime.fromtimestamp(
                                    ts).isoformat())
                    elif formats and field in formats:
                        if formats[field] not in ("int", "float"):
                            flogs.append("'%s'" % log.get(field, ""))
                        else:
                            flogs.append(str(log.get(field, 0)))
                    else:
                        flogs.append("'%s'" % log.get(field, ""))
                print(" ".join(flogs).encode("utf-8"))
            else:
                print(log.encode("utf-8"))
    except KeyboardInterrupt:
        pass
    finally:
        result = client.unregister_filter(name)
        print("unregister", name, result.msg)
        transport.close()