Ejemplo n.º 1
0
    def __init__(self, host, port=10000, username=None, database='default', configuration=None):
        socket = thrift.transport.TSocket.TSocket(host, port)
        username = username or getpass.getuser()
        configuration = configuration or {}

        def sasl_factory():
            sasl_client = sasl.Client()
            sasl_client.setAttr(b'username', username.encode('latin-1'))
            # Password doesn't matter in PLAIN mode, just needs to be nonempty.
            sasl_client.setAttr(b'password', b'x')
            sasl_client.init()
            return sasl_client

        # PLAIN corresponds to hive.server2.authentication=NONE in hive-site.xml
        self._transport = thrift_sasl.TSaslClientTransport(sasl_factory, b'PLAIN', socket)
        protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(self._transport)
        self._client = TCLIService.Client(protocol)

        try:
            self._transport.open()
            open_session_req = ttypes.TOpenSessionReq(
                client_protocol=ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1,
                configuration=configuration,
            )
            response = self._client.OpenSession(open_session_req)
            _check_status(response)
            assert(response.sessionHandle is not None), "Expected a session from OpenSession"
            self._sessionHandle = response.sessionHandle
            assert(response.serverProtocolVersion == ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1), \
                "Unable to handle protocol version {}".format(response.serverProtocolVersion)
            with contextlib.closing(self.cursor()) as cursor:
                cursor.execute('USE `{}`'.format(database))
        except:
            self._transport.close()
            raise
Ejemplo n.º 2
0
    def __create_binary_transport(host, username, password,
                                  kerberos_service_name, port, auth):

        if port is None:
            port = 10000
        if auth is None:
            auth = 'NONE'

        if (password is not None) != (auth in ('LDAP', 'CUSTOM')):
            raise ValueError(
                "Password should be set if and only if in LDAP or CUSTOM mode; "
                "Remove password or use one of those modes")

        socket = thrift.transport.TSocket.TSocket(host, port)

        if auth == 'NOSASL':
            # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml
            transport = thrift.transport.TTransport.TBufferedTransport(socket)
        elif auth in ('LDAP', 'KERBEROS', 'NONE', 'CUSTOM'):
            # Defer import so package dependency is optional
            import sasl
            import thrift_sasl

            if auth == 'KERBEROS':
                # KERBEROS mode in hive.server2.authentication is GSSAPI in sasl library
                sasl_auth = 'GSSAPI'
            else:
                sasl_auth = 'PLAIN'
                if password is None:
                    # Password doesn't matter in NONE mode, just needs to be nonempty.
                    password = '******'

            def sasl_factory():
                sasl_client = sasl.Client()
                sasl_client.setAttr('host', host)
                if sasl_auth == 'GSSAPI':
                    sasl_client.setAttr('service', kerberos_service_name)
                elif sasl_auth == 'PLAIN':
                    sasl_client.setAttr('username', username)
                    sasl_client.setAttr('password', password)
                else:
                    raise AssertionError
                sasl_client.init()
                return sasl_client

            transport = thrift_sasl.TSaslClientTransport(sasl_factory,
                                                         sasl_auth,
                                                         socket)
        else:
            # All HS2 config options:
            # https://cwiki.apache.org/confluence/display/Hive/Setting+Up+HiveServer2#SettingUpHiveServer2-Configuration
            # PAM currently left to end user via thrift_transport option.
            raise NotImplementedError(
                "Only NONE, NOSASL, LDAP, KERBEROS, CUSTOM "
                "authentication are supported, got {}".format(auth))

        return transport
Ejemplo n.º 3
0
def transport_factory(host, port, username, password, **kwargs):
    """
    Creates a thrift_sasl transport for use with HIVE/PyHive. Only pass this object
    to PyHive connection.
    :param host: str - host name
    :param port: int/str - port number - hive default is 10000
    :param username: str -
    :param password: str
    :param kwargs: optional
        use_ssl=True will use a SSL socket with validate=False, default is False
        socket_kwargs={}, pass custom kwargs to SSL socket
        use_sasl=optionally select to use sasl library instead of PureSASL
    :return:
    """
    sasl_auth = 'PLAIN'
    use_ssl = kwargs.get('use_ssl', False)
    socket_kwargs = kwargs.get('socket_kwargs', {})
    kerberos_service_name = kwargs.get('kerberos_service_name', None)
    use_sasl = kwargs.get('use_sasl', False)
    if use_ssl:
        if socket_kwargs:
            socket = TSSLSocket(host, port, **socket_kwargs)
        else:
            socket = TSSLSocket(host, port, cert_reqs=ssl.CERT_NONE)
    else:
        socket = TSocket(host, port)  # basic socket

    if use_sasl:
        import sasl

        def sasl_factory():
            sasl_client = sasl.Client()
            sasl_client.setAttr('host', host)
            if sasl_auth == 'GSSAPI':
                sasl_client.setAttr('service', kerberos_service_name)
            elif sasl_auth == 'PLAIN':
                sasl_client.setAttr('username', username)
                sasl_client.setAttr('password', password)
            else:
                raise AssertionError
            sasl_client.init()
            return sasl_client

    else:
        from sasl_compat import PureSASLClient

        def sasl_factory():
            return PureSASLClient(host,
                                  username=username,
                                  password=password,
                                  service=kerberos_service_name,
                                  mechanism=sasl_auth)

    transport = thrift_sasl.TSaslClientTransport(sasl_factory, sasl_auth,
                                                 socket)
    return transport
Ejemplo n.º 4
0
    def __init__(self, host, port=10000, username=None, database='default', auth='NONE',
                 configuration=None):
        """Connect to HiveServer2

        :param auth: The value of hive.server2.authentication used by HiveServer2
        """
        socket = thrift.transport.TSocket.TSocket(host, port)
        username = username or getpass.getuser()
        configuration = configuration or {}

        if auth == 'NOSASL':
            # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml
            self._transport = thrift.transport.TTransport.TBufferedTransport(socket)
        elif auth == 'NONE':
            def sasl_factory():
                sasl_client = sasl.Client()
                sasl_client.setAttr(b'username', username.encode('latin-1'))
                # Password doesn't matter in NONE mode, just needs to be nonempty.
                sasl_client.setAttr(b'password', b'x')
                sasl_client.init()
                return sasl_client

            # PLAIN corresponds to hive.server2.authentication=NONE in hive-site.xml
            self._transport = thrift_sasl.TSaslClientTransport(sasl_factory, b'PLAIN', socket)
        else:
            raise NotImplementedError(
                "Only NONE & NOSASL authentication are supported, got {}".format(auth))

        protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(self._transport)
        self._client = TCLIService.Client(protocol)
        # oldest version that still contains features we care about
        # "V6 uses binary type for binary payload (was string) and uses columnar result set"
        protocol_version = ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6

        try:
            self._transport.open()
            open_session_req = ttypes.TOpenSessionReq(
                client_protocol=protocol_version,
                configuration=configuration,
            )
            response = self._client.OpenSession(open_session_req)
            _check_status(response)
            assert response.sessionHandle is not None, "Expected a session from OpenSession"
            self._sessionHandle = response.sessionHandle
            assert response.serverProtocolVersion == protocol_version, \
                "Unable to handle protocol version {}".format(response.serverProtocolVersion)
            with contextlib.closing(self.cursor()) as cursor:
                cursor.execute('USE `{}`'.format(database))
        except:
            self._transport.close()
            raise
Ejemplo n.º 5
0
    def test_custom_transport(self):
        socket = thrift.transport.TSocket.TSocket('localhost', 10000)
        sasl_auth = 'PLAIN'

        def sasl_factory():
            sasl_client = sasl.Client()
            sasl_client.setAttr('host', 'localhost')
            sasl_client.setAttr('username', 'test_username')
            sasl_client.setAttr('password', 'x')
            sasl_client.init()
            return sasl_client
        transport = thrift_sasl.TSaslClientTransport(sasl_factory, sasl_auth, socket)
        conn = hive.connect(thrift_transport=transport)
        with contextlib.closing(conn):
            with contextlib.closing(conn.cursor()) as cursor:
                cursor.execute('SELECT * FROM one_row')
                self.assertEqual(cursor.fetchall(), [(1,)])
Ejemplo n.º 6
0
def build_ssl_transport(host,
                        port,
                        username,
                        auth,
                        kerberos_service_name,
                        password=None):
    transport = None
    if port is None:
        port = 10000
    if auth is None:
        auth = 'NONE'
    socket = TSSLSocket(host, port, cert_reqs=ssl.CERT_NONE)
    if auth == 'NOSASL':
        # NOSASL corresponds to hive.server2.authentication=NOSASL
        # in hive-site.xml
        transport = thrift.transport.TTransport.TBufferedTransport(socket)
    elif auth in ('LDAP', 'KERBEROS', 'NONE', 'CUSTOM'):
        # Defer import so package dependency is optional
        if auth == 'KERBEROS':
            # KERBEROS mode in hive.server2.authentication is GSSAPI
            # in sasl library
            sasl_auth = 'GSSAPI'
        else:
            sasl_auth = 'PLAIN'
            if password is None:
                # Password doesn't matter in NONE mode, just needs
                # to be nonempty.
                password = '******'

        def sasl_factory():
            sasl_client = sasl.Client()
            sasl_client.setAttr('host', host)
            if sasl_auth == 'GSSAPI':
                sasl_client.setAttr('service', kerberos_service_name)
            elif sasl_auth == 'PLAIN':
                sasl_client.setAttr('username', username)
                sasl_client.setAttr('password', password)
            else:
                raise AssertionError
            sasl_client.init()
            return sasl_client

        transport = thrift_sasl.TSaslClientTransport(sasl_factory, sasl_auth,
                                                     socket)
    return transport
Ejemplo n.º 7
0
    def __init__(self,
                 host=None,
                 port=None,
                 username=None,
                 database='default',
                 auth=None,
                 configuration=None,
                 kerberos_service_name=None,
                 password=None,
                 thrift_transport=None):
        """Connect to HiveServer2

        :param host: What host HiveServer2 runs on
        :param port: What port HiveServer2 runs on. Defaults to 10000.
        :param auth: The value of hive.server2.authentication used by HiveServer2.
            Defaults to ``NONE``.
        :param configuration: A dictionary of Hive settings (functionally same as the `set` command)
        :param kerberos_service_name: Use with auth='KERBEROS' only
        :param password: Use with auth='LDAP' or auth='CUSTOM' only
        :param thrift_transport: A ``TTransportBase`` for custom advanced usage.
            Incompatible with host, port, auth, kerberos_service_name, and password.

        The way to support LDAP and GSSAPI is originated from cloudera/Impyla:
        https://github.com/cloudera/impyla/blob/255b07ed973d47a3395214ed92d35ec0615ebf62
        /impala/_thrift_api.py#L152-L160
        """
        username = username or getpass.getuser()
        configuration = configuration or {}

        if (password is not None) != (auth in ('LDAP', 'CUSTOM')):
            raise ValueError(
                "Password should be set if and only if in LDAP or CUSTOM mode; "
                "Remove password or use one of those modes")
        if (kerberos_service_name is not None) != (auth == 'KERBEROS'):
            raise ValueError(
                "kerberos_service_name should be set if and only if in KERBEROS mode"
            )
        if thrift_transport is not None:
            has_incompatible_arg = (host is not None or port is not None
                                    or auth is not None
                                    or kerberos_service_name is not None
                                    or password is not None)
            if has_incompatible_arg:
                raise ValueError(
                    "thrift_transport cannot be used with "
                    "host/port/auth/kerberos_service_name/password")

        if thrift_transport is not None:
            self._transport = thrift_transport
        else:
            if port is None:
                port = 10000
            if auth is None:
                auth = 'NONE'
            socket = thrift.transport.TSocket.TSocket(host, port)
            if auth == 'NOSASL':
                # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml
                self._transport = thrift.transport.TTransport.TBufferedTransport(
                    socket)
            elif auth in ('LDAP', 'KERBEROS', 'NONE', 'CUSTOM'):
                # Defer import so package dependency is optional
                import sasl
                import thrift_sasl

                if auth == 'KERBEROS':
                    # KERBEROS mode in hive.server2.authentication is GSSAPI in sasl library
                    sasl_auth = 'GSSAPI'
                else:
                    sasl_auth = 'PLAIN'
                    if password is None:
                        # Password doesn't matter in NONE mode, just needs to be nonempty.
                        password = '******'

                def sasl_factory():
                    sasl_client = sasl.Client()
                    sasl_client.setAttr('host', host)
                    if sasl_auth == 'GSSAPI':
                        sasl_client.setAttr('service', kerberos_service_name)
                    elif sasl_auth == 'PLAIN':
                        sasl_client.setAttr('username', username)
                        sasl_client.setAttr('password', password)
                    else:
                        raise AssertionError
                    sasl_client.init()
                    return sasl_client

                self._transport = thrift_sasl.TSaslClientTransport(
                    sasl_factory, sasl_auth, socket)
            else:
                # All HS2 config options:
                # https://cwiki.apache.org/confluence/display/Hive/Setting+Up+HiveServer2#SettingUpHiveServer2-Configuration
                # PAM currently left to end user via thrift_transport option.
                raise NotImplementedError(
                    "Only NONE, NOSASL, LDAP, KERBEROS, CUSTOM "
                    "authentication are supported, got {}".format(auth))

        protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(
            self._transport)
        self._client = TCLIService.Client(protocol)
        # oldest version that still contains features we care about
        # "V6 uses binary type for binary payload (was string) and uses columnar result set"
        protocol_version = ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6

        try:
            self._transport.open()
            open_session_req = ttypes.TOpenSessionReq(
                client_protocol=protocol_version,
                configuration=configuration,
                username=username,
            )
            response = self._client.OpenSession(open_session_req)
            _check_status(response)
            assert response.sessionHandle is not None, "Expected a session from OpenSession"
            self._sessionHandle = response.sessionHandle
            assert response.serverProtocolVersion == protocol_version, \
                "Unable to handle protocol version {}".format(response.serverProtocolVersion)
            with contextlib.closing(self.cursor()) as cursor:
                cursor.execute('USE `{}`'.format(database))
        except:
            self._transport.close()
            raise
Ejemplo n.º 8
0
    def __init__(self,
                 host,
                 port=10000,
                 username=None,
                 database='default',
                 auth='NONE',
                 configuration=None,
                 kerberos_service_name=None,
                 password=None):
        """Connect to HiveServer2

        :param auth: The value of hive.server2.authentication used by HiveServer2
        :param configuration: A dictionary of Hive settings (functionally same as the `set` command)
        :param kerberos_service_name: Use with auth='KERBEROS' only
        :param password: Use with auth='LDAP' only

        The way to support LDAP and GSSAPI is originated from cloudera/Impyla:
        https://github.com/cloudera/impyla/blob/255b07ed973d47a3395214ed92d35ec0615ebf62
        /impala/_thrift_api.py#L152-L160
        """
        socket = thrift.transport.TSocket.TSocket(host, port)
        username = username or getpass.getuser()
        configuration = configuration or {}

        if (password is not None) != (auth == 'LDAP'):
            raise ValueError(
                "password should be set if and only if in LDAP mode")
        if (kerberos_service_name is not None) != (auth == 'KERBEROS'):
            raise ValueError(
                "kerberos_service_name should be set if and only if in KERBEROS mode"
            )

        if auth == 'NOSASL':
            # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml
            self._transport = thrift.transport.TTransport.TBufferedTransport(
                socket)
        elif auth in ('LDAP', 'KERBEROS', 'NONE'):
            if auth == 'KERBEROS':
                # KERBEROS mode in hive.server2.authentication is GSSAPI in sasl library
                sasl_auth = 'GSSAPI'
            else:
                sasl_auth = 'PLAIN'
                if password is None:
                    # Password doesn't matter in NONE mode, just needs to be nonempty.
                    password = b'x'

            def sasl_factory():
                sasl_client = sasl.Client()
                sasl_client.setAttr(b'host', host)
                if sasl_auth == 'GSSAPI':
                    sasl_client.setAttr(b'service', kerberos_service_name)
                elif sasl_auth == 'PLAIN':
                    sasl_client.setAttr(b'username',
                                        username.encode('latin-1'))
                    sasl_client.setAttr(b'password', password)
                else:
                    raise AssertionError
                sasl_client.init()
                return sasl_client

            self._transport = thrift_sasl.TSaslClientTransport(
                sasl_factory, sasl_auth, socket)
        else:
            raise NotImplementedError(
                "Only NONE, NOSASL, LDAP, KERBEROS "
                "authentication are supported, got {}".format(auth))

        protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(
            self._transport)
        self._client = TCLIService.Client(protocol)
        # oldest version that still contains features we care about
        # "V6 uses binary type for binary payload (was string) and uses columnar result set"
        protocol_version = ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6

        try:
            self._transport.open()
            open_session_req = ttypes.TOpenSessionReq(
                client_protocol=protocol_version,
                configuration=configuration,
            )
            response = self._client.OpenSession(open_session_req)
            _check_status(response)
            assert response.sessionHandle is not None, "Expected a session from OpenSession"
            self._sessionHandle = response.sessionHandle
            assert response.serverProtocolVersion == protocol_version, \
                "Unable to handle protocol version {}".format(response.serverProtocolVersion)
            with contextlib.closing(self.cursor()) as cursor:
                cursor.execute('USE `{}`'.format(database))
        except:
            self._transport.close()
            raise
Ejemplo n.º 9
0
    def __init__(self, host=None, port=None, username=None, database='default', auth=None,
                 configuration=None, kerberos_service_name=None, password=None,
                 thrift_transport=None):
        self.logger = logging.getLogger(__name__)

        configuration = configuration or {}

        if (password is not None) != (auth in ('LDAP', 'CUSTOM')):
            raise ValueError("Password should be set if and only if in LDAP or CUSTOM mode; "
                             "Remove password or use one of those modes")
        if (kerberos_service_name is not None) != (auth == 'KERBEROS'):
            raise ValueError("kerberos_service_name should be set if and only if in KERBEROS mode")
        if thrift_transport is not None:
            has_incompatible_arg = (
                    host is not None
                    or port is not None
                    or auth is not None
                    or kerberos_service_name is not None
                    or password is not None
            )
            if has_incompatible_arg:
                raise ValueError("thrift_transport cannot be used with "
                                 "host/port/auth/kerberos_service_name/password")

        if thrift_transport is not None:
            self._transport = thrift_transport
        else:
            port = port or 10000
            auth = auth or 'NONE'
            socket = TSocket.TSocket(host, port)
            if auth == 'NOSASL':
                # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml
                self._transport = TTransport.TBufferedTransport(socket)
            elif auth in ('LDAP', 'KERBEROS', 'NONE', 'CUSTOM'):
                # Defer import so package dependency is optional
                import sasl
                import thrift_sasl

                if auth == 'KERBEROS':
                    # KERBEROS mode in hive.server2.authentication is GSSAPI in sasl library
                    sasl_auth = 'GSSAPI'
                else:
                    sasl_auth = 'PLAIN'
                    if password is None:
                        # Password doesn't matter in NONE mode, just needs to be nonempty.
                        password = '******'

                def sasl_factory():
                    sasl_client = sasl.Client()
                    sasl_client.setAttr('host', host)
                    if sasl_auth == 'GSSAPI':
                        sasl_client.setAttr('service', kerberos_service_name)
                    elif sasl_auth == 'PLAIN':
                        sasl_client.setAttr('username', username)
                        sasl_client.setAttr('password', password)
                    else:
                        raise AssertionError
                    sasl_client.init()
                    return sasl_client

                self._transport = thrift_sasl.TSaslClientTransport(sasl_factory, sasl_auth, socket)
            else:
                # All HS2 config options:
                # https://cwiki.apache.org/confluence/display/Hive/Setting+Up+HiveServer2#SettingUpHiveServer2-Configuration
                # PAM currently left to end user via thrift_transport option.
                raise NotImplementedError(
                    "Only NONE, NOSASL, LDAP, KERBEROS, CUSTOM "
                    "authentication are supported, got {}".format(auth))

        protocol = TBinaryProtocol.TBinaryProtocol(self._transport)
        super(HS2Client, self).__init__(protocol)
        # oldest version that still contains features we care about
        # "V6 uses binary type for binary payload (was string) and uses columnar result set"
        protocol_version = ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6

        try:
            self._oprot.trans.open()
            self.__isOpened = True
            open_session_req = ttypes.TOpenSessionReq(
                client_protocol=protocol_version,
                configuration=configuration,
                username=username,
            )
            response = self.OpenSession(open_session_req)
            _check_status(response)
            assert response.sessionHandle is not None, "Expected a session from OpenSession"
            self._sessionHandle = response.sessionHandle
            assert response.serverProtocolVersion == protocol_version, \
                "Unable to handle protocol version {}".format(response.serverProtocolVersion)
            with self.cursor() as cursor:
                cursor.execute('USE `{}`'.format(database))
        except:
            self._oprot.trans.close()
            raise
Ejemplo n.º 10
0
    def __init__(self,
                 host=None,
                 port=None,
                 username=None,
                 database='default',
                 auth=None,
                 configuration=None,
                 kerberos_service_name=None,
                 password=None,
                 thrift_transport=None,
                 service_mode='binary',
                 http_path=None,
                 is_zookeeper=False,
                 zookeeper_name_space='hiveserver2',
                 keytab_file=None,
                 krb_conf=None):
        """Connect to HiveServer2

        :param host: What host HiveServer2 runs on
        :param port: What port HiveServer2 runs on. Defaults to 10000.
        :param auth: The value of hive.server2.authentication used by HiveServer2.
            Defaults to ``NONE``.
        :param configuration: A dictionary of Hive settings (functionally same as the `set` command)
        :param kerberos_service_name: Use with auth='KERBEROS' only
        :param password: Use with auth='LDAP' or auth='CUSTOM' only
        :param thrift_transport: A ``TTransportBase`` for custom advanced usage.
            Incompatible with host, port, auth, kerberos_service_name, and password.
        :param service_mode: Set thrift transport mode ('http' or 'binary')
        :param http_path: Use with service_mode='http' only
        :param is_zookeeper: Set whether or not zookeeper method
        :param zookeeper_name_space: Use with service_mode='http' and is_zookeeper='true' only
        :param keytab_file: Use with service_mode='http' and auth='KERBEROS' only
        :param krb_conf: pycquery_krb.common.conf.KerberosConf instance. Use with service_mode='http' and auth='KERBEROS' only

        The way to support LDAP and GSSAPI is originated from cloudera/Impyla:
        https://github.com/cloudera/impyla/blob/255b07ed973d47a3395214ed92d35ec0615ebf62
        /impala/_thrift_api.py#L152-L160
        """
        self._opened = False
        self.auth = auth
        self.kerberos_service_name = kerberos_service_name
        self.username = username or getpass.getuser()
        self.password = password
        self.service_mode = service_mode
        self.keytab_file = keytab_file
        self.auth_lock = threading.Lock()
        self.realm = None
        self.kdc = None
        self.kerb_client = None
        self.krb_conf = krb_conf
        self.expired_time = 0
        configuration = configuration or {}
        last_exception = None

        # if (password is not None) != (auth in ('LDAP', 'CUSTOM')):
        #     raise ValueError("Password should be set if and only if in LDAP or CUSTOM mode; "
        #                      "Remove password or use one of those modes")
        if auth == 'KERBEROS':
            if kerberos_service_name is None:
                raise ValueError(
                    "kerberos_service_name must be set in KERBEROS mode")

            if krb_conf is None:
                raise ValueError("krb_conf must be set in KERBEROS mode")

            p = self.username.split('@')
            self.username = p[0]
            if len(p) > 1:
                self.realm = p[1]
            else:
                self.realm = krb_conf.lib_defaults.default_realm

            if self.realm is None:
                raise ValueError(
                    "Kerberos realm must be specified at username or krb5.conf in KERBEROS mode"
                )

            conf_realm = krb_conf.find_realm(self.realm)
            if conf_realm is None:
                raise ValueError("No matching realm in krb5.conf")
            if len(conf_realm.kdc) == 0:
                raise ValueError(
                    "No kdc information in {} realm of krb5.conf".format(
                        self.realm))

            # use the first kdc in the list for implementation simpleness
            p = conf_realm.kdc[0].split(':')
            self.kdc = {"host": p[0], "port": p[1]}

        if thrift_transport is not None:
            has_incompatible_arg = (host is not None or port is not None
                                    or auth is not None
                                    or kerberos_service_name is not None
                                    or password is not None)
            if has_incompatible_arg:
                raise ValueError(
                    "thrift_transport cannot be used with "
                    "host/port/auth/kerberos_service_name/password")

        if is_zookeeper:
            # It randomly shuffles node information stored in zookeeper.
            remaining_nodes = self._get_hiveserver2_info_with_zookeeper(
                host, port, zookeeper_name_space)
            random.shuffle(remaining_nodes)
        else:
            # Direct access to host and port if not zookeeper.
            remaining_nodes = [{'host': host, 'port': port}]

        # Access nodes sequentially and if they fail, access other nodes.
        while len(remaining_nodes) > 0:
            node = remaining_nodes.pop()
            self.host = node['host']
            self.port = node['port']
            if thrift_transport is not None:
                self._transport = thrift_transport
            elif service_mode == 'binary':
                if self.port is None:
                    self.port = 10000
                if self.auth is None:
                    self.auth = 'NONE'
                socket = thrift.transport.TSocket.TSocket(self.host, self.port)
                if auth == 'NOSASL':
                    # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml
                    self._transport = thrift.transport.TTransport.TBufferedTransport(
                        socket)
                elif self.auth in ('LDAP', 'KERBEROS', 'NONE', 'CUSTOM',
                                   'NOSASL'):
                    # Defer import so package dependency is optional
                    import sasl
                    import thrift_sasl

                    if self.auth == 'KERBEROS':
                        # KERBEROS mode in hive.server2.authentication is GSSAPI in sasl library
                        sasl_auth = 'GSSAPI'
                    else:
                        sasl_auth = 'PLAIN'
                        if self.password is None:
                            # Password doesn't matter in NONE mode, just needs to be nonempty.
                            self.password = '******'

                    def sasl_factory():
                        sasl_client = sasl.Client()
                        sasl_client.setAttr('host', self.host)
                        if sasl_auth == 'GSSAPI':
                            sasl_client.setAttr('service',
                                                kerberos_service_name)
                        elif sasl_auth == 'PLAIN':
                            sasl_client.setAttr('username', username)
                            sasl_client.setAttr('password', password)
                        else:
                            raise AssertionError
                        sasl_client.init()
                        return sasl_client

                    self._transport = thrift_sasl.TSaslClientTransport(
                        sasl_factory, sasl_auth, socket)

                else:
                    # All HS2 config options:
                    # https://cwiki.apache.org/confluence/display/Hive/Setting+Up+HiveServer2#SettingUpHiveServer2-Configuration
                    # PAM currently left to end user via thrift_transport option.
                    raise NotImplementedError(
                        "Only NONE, NOSASL, LDAP, KERBEROS, CUSTOM "
                        "authentication are supported with binary mode, got {}"
                        .format(auth))

            elif service_mode == 'http':
                if self.auth is None:
                    self.auth = 'NONE'

                if self.auth in ('NONE', 'LDAP', 'KERBEROS', 'CUSTOM'):
                    if self.password is None:
                        self.password = '******'
                    self._transport = thrift.transport.THttpClient.THttpClient(
                        "http://{}:{}/{}".format(self.host, self.port,
                                                 http_path))
                    if auth == 'KERBEROS':
                        self.set_auth_setting()
                    else:
                        auth_header = HTTPBasicAuth(username, self.password)
                        self._transport.setCustomHeaders(
                            auth_header(Request()).headers)
                else:
                    raise NotImplementedError(
                        "Only NONE, NOSASL, LDAP, KERBEROS, CUSTOM "
                        "authentication is supported with http mode, got {}".
                        format(auth))
            else:
                raise NotImplementedError(
                    "Only binary, http are supported for the transport mode, "
                    "got {}".format(service_mode))

            protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(
                self._transport)
            self._client = TCLIService.Client(protocol)
            # oldest version that still contains features we care about
            # "V6 uses binary type for binary payload (was string) and uses columnar result set"
            protocol_version = ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6

            try:
                self._transport.open()
                open_session_req = ttypes.TOpenSessionReq(
                    client_protocol=protocol_version,
                    configuration=configuration,
                    username=username,
                )
                response = self._client.OpenSession(open_session_req)
                _check_status(response)
                assert response.sessionHandle is not None, "Expected a session from OpenSession"
                self._sessionHandle = response.sessionHandle
                assert response.serverProtocolVersion == protocol_version, \
                    "Unable to handle protocol version {}".format(response.serverProtocolVersion)
                self._opened = True
                with contextlib.closing(self.cursor()) as cursor:
                    cursor.execute('USE `{}`'.format(database))
                atexit.register(self.close)
            except Exception as ex:
                import traceback
                # If the node fails to access, it will try to reconnect to the remaining node.
                _logger.warning('Failed to connect to %s:%s. (message = %s)' %
                                (self.host, self.port, 'Error opening session'
                                 if isinstance(ex, EOFError) else ex))
                last_exception = ex
                self.close()
            else:
                # If any of the remaining nodes passed to zookeeper is successful, return.
                _logger.info('Connected to %s:%s' % (self.host, self.port))
                return

        # Return the last error I received.
        raise last_exception