def __init__(self, host, port=10000, username=None, database='default', configuration=None): socket = thrift.transport.TSocket.TSocket(host, port) username = username or getpass.getuser() configuration = configuration or {} def sasl_factory(): sasl_client = sasl.Client() sasl_client.setAttr(b'username', username.encode('latin-1')) # Password doesn't matter in PLAIN mode, just needs to be nonempty. sasl_client.setAttr(b'password', b'x') sasl_client.init() return sasl_client # PLAIN corresponds to hive.server2.authentication=NONE in hive-site.xml self._transport = thrift_sasl.TSaslClientTransport(sasl_factory, b'PLAIN', socket) protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(self._transport) self._client = TCLIService.Client(protocol) try: self._transport.open() open_session_req = ttypes.TOpenSessionReq( client_protocol=ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1, configuration=configuration, ) response = self._client.OpenSession(open_session_req) _check_status(response) assert(response.sessionHandle is not None), "Expected a session from OpenSession" self._sessionHandle = response.sessionHandle assert(response.serverProtocolVersion == ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1), \ "Unable to handle protocol version {}".format(response.serverProtocolVersion) with contextlib.closing(self.cursor()) as cursor: cursor.execute('USE `{}`'.format(database)) except: self._transport.close() raise
def __create_binary_transport(host, username, password, kerberos_service_name, port, auth): if port is None: port = 10000 if auth is None: auth = 'NONE' if (password is not None) != (auth in ('LDAP', 'CUSTOM')): raise ValueError( "Password should be set if and only if in LDAP or CUSTOM mode; " "Remove password or use one of those modes") socket = thrift.transport.TSocket.TSocket(host, port) if auth == 'NOSASL': # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml transport = thrift.transport.TTransport.TBufferedTransport(socket) elif auth in ('LDAP', 'KERBEROS', 'NONE', 'CUSTOM'): # Defer import so package dependency is optional import sasl import thrift_sasl if auth == 'KERBEROS': # KERBEROS mode in hive.server2.authentication is GSSAPI in sasl library sasl_auth = 'GSSAPI' else: sasl_auth = 'PLAIN' if password is None: # Password doesn't matter in NONE mode, just needs to be nonempty. password = '******' def sasl_factory(): sasl_client = sasl.Client() sasl_client.setAttr('host', host) if sasl_auth == 'GSSAPI': sasl_client.setAttr('service', kerberos_service_name) elif sasl_auth == 'PLAIN': sasl_client.setAttr('username', username) sasl_client.setAttr('password', password) else: raise AssertionError sasl_client.init() return sasl_client transport = thrift_sasl.TSaslClientTransport(sasl_factory, sasl_auth, socket) else: # All HS2 config options: # https://cwiki.apache.org/confluence/display/Hive/Setting+Up+HiveServer2#SettingUpHiveServer2-Configuration # PAM currently left to end user via thrift_transport option. raise NotImplementedError( "Only NONE, NOSASL, LDAP, KERBEROS, CUSTOM " "authentication are supported, got {}".format(auth)) return transport
def transport_factory(host, port, username, password, **kwargs): """ Creates a thrift_sasl transport for use with HIVE/PyHive. Only pass this object to PyHive connection. :param host: str - host name :param port: int/str - port number - hive default is 10000 :param username: str - :param password: str :param kwargs: optional use_ssl=True will use a SSL socket with validate=False, default is False socket_kwargs={}, pass custom kwargs to SSL socket use_sasl=optionally select to use sasl library instead of PureSASL :return: """ sasl_auth = 'PLAIN' use_ssl = kwargs.get('use_ssl', False) socket_kwargs = kwargs.get('socket_kwargs', {}) kerberos_service_name = kwargs.get('kerberos_service_name', None) use_sasl = kwargs.get('use_sasl', False) if use_ssl: if socket_kwargs: socket = TSSLSocket(host, port, **socket_kwargs) else: socket = TSSLSocket(host, port, cert_reqs=ssl.CERT_NONE) else: socket = TSocket(host, port) # basic socket if use_sasl: import sasl def sasl_factory(): sasl_client = sasl.Client() sasl_client.setAttr('host', host) if sasl_auth == 'GSSAPI': sasl_client.setAttr('service', kerberos_service_name) elif sasl_auth == 'PLAIN': sasl_client.setAttr('username', username) sasl_client.setAttr('password', password) else: raise AssertionError sasl_client.init() return sasl_client else: from sasl_compat import PureSASLClient def sasl_factory(): return PureSASLClient(host, username=username, password=password, service=kerberos_service_name, mechanism=sasl_auth) transport = thrift_sasl.TSaslClientTransport(sasl_factory, sasl_auth, socket) return transport
def __init__(self, host, port=10000, username=None, database='default', auth='NONE', configuration=None): """Connect to HiveServer2 :param auth: The value of hive.server2.authentication used by HiveServer2 """ socket = thrift.transport.TSocket.TSocket(host, port) username = username or getpass.getuser() configuration = configuration or {} if auth == 'NOSASL': # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml self._transport = thrift.transport.TTransport.TBufferedTransport(socket) elif auth == 'NONE': def sasl_factory(): sasl_client = sasl.Client() sasl_client.setAttr(b'username', username.encode('latin-1')) # Password doesn't matter in NONE mode, just needs to be nonempty. sasl_client.setAttr(b'password', b'x') sasl_client.init() return sasl_client # PLAIN corresponds to hive.server2.authentication=NONE in hive-site.xml self._transport = thrift_sasl.TSaslClientTransport(sasl_factory, b'PLAIN', socket) else: raise NotImplementedError( "Only NONE & NOSASL authentication are supported, got {}".format(auth)) protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(self._transport) self._client = TCLIService.Client(protocol) # oldest version that still contains features we care about # "V6 uses binary type for binary payload (was string) and uses columnar result set" protocol_version = ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6 try: self._transport.open() open_session_req = ttypes.TOpenSessionReq( client_protocol=protocol_version, configuration=configuration, ) response = self._client.OpenSession(open_session_req) _check_status(response) assert response.sessionHandle is not None, "Expected a session from OpenSession" self._sessionHandle = response.sessionHandle assert response.serverProtocolVersion == protocol_version, \ "Unable to handle protocol version {}".format(response.serverProtocolVersion) with contextlib.closing(self.cursor()) as cursor: cursor.execute('USE `{}`'.format(database)) except: self._transport.close() raise
def test_custom_transport(self): socket = thrift.transport.TSocket.TSocket('localhost', 10000) sasl_auth = 'PLAIN' def sasl_factory(): sasl_client = sasl.Client() sasl_client.setAttr('host', 'localhost') sasl_client.setAttr('username', 'test_username') sasl_client.setAttr('password', 'x') sasl_client.init() return sasl_client transport = thrift_sasl.TSaslClientTransport(sasl_factory, sasl_auth, socket) conn = hive.connect(thrift_transport=transport) with contextlib.closing(conn): with contextlib.closing(conn.cursor()) as cursor: cursor.execute('SELECT * FROM one_row') self.assertEqual(cursor.fetchall(), [(1,)])
def build_ssl_transport(host, port, username, auth, kerberos_service_name, password=None): transport = None if port is None: port = 10000 if auth is None: auth = 'NONE' socket = TSSLSocket(host, port, cert_reqs=ssl.CERT_NONE) if auth == 'NOSASL': # NOSASL corresponds to hive.server2.authentication=NOSASL # in hive-site.xml transport = thrift.transport.TTransport.TBufferedTransport(socket) elif auth in ('LDAP', 'KERBEROS', 'NONE', 'CUSTOM'): # Defer import so package dependency is optional if auth == 'KERBEROS': # KERBEROS mode in hive.server2.authentication is GSSAPI # in sasl library sasl_auth = 'GSSAPI' else: sasl_auth = 'PLAIN' if password is None: # Password doesn't matter in NONE mode, just needs # to be nonempty. password = '******' def sasl_factory(): sasl_client = sasl.Client() sasl_client.setAttr('host', host) if sasl_auth == 'GSSAPI': sasl_client.setAttr('service', kerberos_service_name) elif sasl_auth == 'PLAIN': sasl_client.setAttr('username', username) sasl_client.setAttr('password', password) else: raise AssertionError sasl_client.init() return sasl_client transport = thrift_sasl.TSaslClientTransport(sasl_factory, sasl_auth, socket) return transport
def __init__(self, host=None, port=None, username=None, database='default', auth=None, configuration=None, kerberos_service_name=None, password=None, thrift_transport=None): """Connect to HiveServer2 :param host: What host HiveServer2 runs on :param port: What port HiveServer2 runs on. Defaults to 10000. :param auth: The value of hive.server2.authentication used by HiveServer2. Defaults to ``NONE``. :param configuration: A dictionary of Hive settings (functionally same as the `set` command) :param kerberos_service_name: Use with auth='KERBEROS' only :param password: Use with auth='LDAP' or auth='CUSTOM' only :param thrift_transport: A ``TTransportBase`` for custom advanced usage. Incompatible with host, port, auth, kerberos_service_name, and password. The way to support LDAP and GSSAPI is originated from cloudera/Impyla: https://github.com/cloudera/impyla/blob/255b07ed973d47a3395214ed92d35ec0615ebf62 /impala/_thrift_api.py#L152-L160 """ username = username or getpass.getuser() configuration = configuration or {} if (password is not None) != (auth in ('LDAP', 'CUSTOM')): raise ValueError( "Password should be set if and only if in LDAP or CUSTOM mode; " "Remove password or use one of those modes") if (kerberos_service_name is not None) != (auth == 'KERBEROS'): raise ValueError( "kerberos_service_name should be set if and only if in KERBEROS mode" ) if thrift_transport is not None: has_incompatible_arg = (host is not None or port is not None or auth is not None or kerberos_service_name is not None or password is not None) if has_incompatible_arg: raise ValueError( "thrift_transport cannot be used with " "host/port/auth/kerberos_service_name/password") if thrift_transport is not None: self._transport = thrift_transport else: if port is None: port = 10000 if auth is None: auth = 'NONE' socket = thrift.transport.TSocket.TSocket(host, port) if auth == 'NOSASL': # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml self._transport = thrift.transport.TTransport.TBufferedTransport( socket) elif auth in ('LDAP', 'KERBEROS', 'NONE', 'CUSTOM'): # Defer import so package dependency is optional import sasl import thrift_sasl if auth == 'KERBEROS': # KERBEROS mode in hive.server2.authentication is GSSAPI in sasl library sasl_auth = 'GSSAPI' else: sasl_auth = 'PLAIN' if password is None: # Password doesn't matter in NONE mode, just needs to be nonempty. password = '******' def sasl_factory(): sasl_client = sasl.Client() sasl_client.setAttr('host', host) if sasl_auth == 'GSSAPI': sasl_client.setAttr('service', kerberos_service_name) elif sasl_auth == 'PLAIN': sasl_client.setAttr('username', username) sasl_client.setAttr('password', password) else: raise AssertionError sasl_client.init() return sasl_client self._transport = thrift_sasl.TSaslClientTransport( sasl_factory, sasl_auth, socket) else: # All HS2 config options: # https://cwiki.apache.org/confluence/display/Hive/Setting+Up+HiveServer2#SettingUpHiveServer2-Configuration # PAM currently left to end user via thrift_transport option. raise NotImplementedError( "Only NONE, NOSASL, LDAP, KERBEROS, CUSTOM " "authentication are supported, got {}".format(auth)) protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol( self._transport) self._client = TCLIService.Client(protocol) # oldest version that still contains features we care about # "V6 uses binary type for binary payload (was string) and uses columnar result set" protocol_version = ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6 try: self._transport.open() open_session_req = ttypes.TOpenSessionReq( client_protocol=protocol_version, configuration=configuration, username=username, ) response = self._client.OpenSession(open_session_req) _check_status(response) assert response.sessionHandle is not None, "Expected a session from OpenSession" self._sessionHandle = response.sessionHandle assert response.serverProtocolVersion == protocol_version, \ "Unable to handle protocol version {}".format(response.serverProtocolVersion) with contextlib.closing(self.cursor()) as cursor: cursor.execute('USE `{}`'.format(database)) except: self._transport.close() raise
def __init__(self, host, port=10000, username=None, database='default', auth='NONE', configuration=None, kerberos_service_name=None, password=None): """Connect to HiveServer2 :param auth: The value of hive.server2.authentication used by HiveServer2 :param configuration: A dictionary of Hive settings (functionally same as the `set` command) :param kerberos_service_name: Use with auth='KERBEROS' only :param password: Use with auth='LDAP' only The way to support LDAP and GSSAPI is originated from cloudera/Impyla: https://github.com/cloudera/impyla/blob/255b07ed973d47a3395214ed92d35ec0615ebf62 /impala/_thrift_api.py#L152-L160 """ socket = thrift.transport.TSocket.TSocket(host, port) username = username or getpass.getuser() configuration = configuration or {} if (password is not None) != (auth == 'LDAP'): raise ValueError( "password should be set if and only if in LDAP mode") if (kerberos_service_name is not None) != (auth == 'KERBEROS'): raise ValueError( "kerberos_service_name should be set if and only if in KERBEROS mode" ) if auth == 'NOSASL': # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml self._transport = thrift.transport.TTransport.TBufferedTransport( socket) elif auth in ('LDAP', 'KERBEROS', 'NONE'): if auth == 'KERBEROS': # KERBEROS mode in hive.server2.authentication is GSSAPI in sasl library sasl_auth = 'GSSAPI' else: sasl_auth = 'PLAIN' if password is None: # Password doesn't matter in NONE mode, just needs to be nonempty. password = b'x' def sasl_factory(): sasl_client = sasl.Client() sasl_client.setAttr(b'host', host) if sasl_auth == 'GSSAPI': sasl_client.setAttr(b'service', kerberos_service_name) elif sasl_auth == 'PLAIN': sasl_client.setAttr(b'username', username.encode('latin-1')) sasl_client.setAttr(b'password', password) else: raise AssertionError sasl_client.init() return sasl_client self._transport = thrift_sasl.TSaslClientTransport( sasl_factory, sasl_auth, socket) else: raise NotImplementedError( "Only NONE, NOSASL, LDAP, KERBEROS " "authentication are supported, got {}".format(auth)) protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol( self._transport) self._client = TCLIService.Client(protocol) # oldest version that still contains features we care about # "V6 uses binary type for binary payload (was string) and uses columnar result set" protocol_version = ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6 try: self._transport.open() open_session_req = ttypes.TOpenSessionReq( client_protocol=protocol_version, configuration=configuration, ) response = self._client.OpenSession(open_session_req) _check_status(response) assert response.sessionHandle is not None, "Expected a session from OpenSession" self._sessionHandle = response.sessionHandle assert response.serverProtocolVersion == protocol_version, \ "Unable to handle protocol version {}".format(response.serverProtocolVersion) with contextlib.closing(self.cursor()) as cursor: cursor.execute('USE `{}`'.format(database)) except: self._transport.close() raise
def __init__(self, host=None, port=None, username=None, database='default', auth=None, configuration=None, kerberos_service_name=None, password=None, thrift_transport=None): self.logger = logging.getLogger(__name__) configuration = configuration or {} if (password is not None) != (auth in ('LDAP', 'CUSTOM')): raise ValueError("Password should be set if and only if in LDAP or CUSTOM mode; " "Remove password or use one of those modes") if (kerberos_service_name is not None) != (auth == 'KERBEROS'): raise ValueError("kerberos_service_name should be set if and only if in KERBEROS mode") if thrift_transport is not None: has_incompatible_arg = ( host is not None or port is not None or auth is not None or kerberos_service_name is not None or password is not None ) if has_incompatible_arg: raise ValueError("thrift_transport cannot be used with " "host/port/auth/kerberos_service_name/password") if thrift_transport is not None: self._transport = thrift_transport else: port = port or 10000 auth = auth or 'NONE' socket = TSocket.TSocket(host, port) if auth == 'NOSASL': # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml self._transport = TTransport.TBufferedTransport(socket) elif auth in ('LDAP', 'KERBEROS', 'NONE', 'CUSTOM'): # Defer import so package dependency is optional import sasl import thrift_sasl if auth == 'KERBEROS': # KERBEROS mode in hive.server2.authentication is GSSAPI in sasl library sasl_auth = 'GSSAPI' else: sasl_auth = 'PLAIN' if password is None: # Password doesn't matter in NONE mode, just needs to be nonempty. password = '******' def sasl_factory(): sasl_client = sasl.Client() sasl_client.setAttr('host', host) if sasl_auth == 'GSSAPI': sasl_client.setAttr('service', kerberos_service_name) elif sasl_auth == 'PLAIN': sasl_client.setAttr('username', username) sasl_client.setAttr('password', password) else: raise AssertionError sasl_client.init() return sasl_client self._transport = thrift_sasl.TSaslClientTransport(sasl_factory, sasl_auth, socket) else: # All HS2 config options: # https://cwiki.apache.org/confluence/display/Hive/Setting+Up+HiveServer2#SettingUpHiveServer2-Configuration # PAM currently left to end user via thrift_transport option. raise NotImplementedError( "Only NONE, NOSASL, LDAP, KERBEROS, CUSTOM " "authentication are supported, got {}".format(auth)) protocol = TBinaryProtocol.TBinaryProtocol(self._transport) super(HS2Client, self).__init__(protocol) # oldest version that still contains features we care about # "V6 uses binary type for binary payload (was string) and uses columnar result set" protocol_version = ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6 try: self._oprot.trans.open() self.__isOpened = True open_session_req = ttypes.TOpenSessionReq( client_protocol=protocol_version, configuration=configuration, username=username, ) response = self.OpenSession(open_session_req) _check_status(response) assert response.sessionHandle is not None, "Expected a session from OpenSession" self._sessionHandle = response.sessionHandle assert response.serverProtocolVersion == protocol_version, \ "Unable to handle protocol version {}".format(response.serverProtocolVersion) with self.cursor() as cursor: cursor.execute('USE `{}`'.format(database)) except: self._oprot.trans.close() raise
def __init__(self, host=None, port=None, username=None, database='default', auth=None, configuration=None, kerberos_service_name=None, password=None, thrift_transport=None, service_mode='binary', http_path=None, is_zookeeper=False, zookeeper_name_space='hiveserver2', keytab_file=None, krb_conf=None): """Connect to HiveServer2 :param host: What host HiveServer2 runs on :param port: What port HiveServer2 runs on. Defaults to 10000. :param auth: The value of hive.server2.authentication used by HiveServer2. Defaults to ``NONE``. :param configuration: A dictionary of Hive settings (functionally same as the `set` command) :param kerberos_service_name: Use with auth='KERBEROS' only :param password: Use with auth='LDAP' or auth='CUSTOM' only :param thrift_transport: A ``TTransportBase`` for custom advanced usage. Incompatible with host, port, auth, kerberos_service_name, and password. :param service_mode: Set thrift transport mode ('http' or 'binary') :param http_path: Use with service_mode='http' only :param is_zookeeper: Set whether or not zookeeper method :param zookeeper_name_space: Use with service_mode='http' and is_zookeeper='true' only :param keytab_file: Use with service_mode='http' and auth='KERBEROS' only :param krb_conf: pycquery_krb.common.conf.KerberosConf instance. Use with service_mode='http' and auth='KERBEROS' only The way to support LDAP and GSSAPI is originated from cloudera/Impyla: https://github.com/cloudera/impyla/blob/255b07ed973d47a3395214ed92d35ec0615ebf62 /impala/_thrift_api.py#L152-L160 """ self._opened = False self.auth = auth self.kerberos_service_name = kerberos_service_name self.username = username or getpass.getuser() self.password = password self.service_mode = service_mode self.keytab_file = keytab_file self.auth_lock = threading.Lock() self.realm = None self.kdc = None self.kerb_client = None self.krb_conf = krb_conf self.expired_time = 0 configuration = configuration or {} last_exception = None # if (password is not None) != (auth in ('LDAP', 'CUSTOM')): # raise ValueError("Password should be set if and only if in LDAP or CUSTOM mode; " # "Remove password or use one of those modes") if auth == 'KERBEROS': if kerberos_service_name is None: raise ValueError( "kerberos_service_name must be set in KERBEROS mode") if krb_conf is None: raise ValueError("krb_conf must be set in KERBEROS mode") p = self.username.split('@') self.username = p[0] if len(p) > 1: self.realm = p[1] else: self.realm = krb_conf.lib_defaults.default_realm if self.realm is None: raise ValueError( "Kerberos realm must be specified at username or krb5.conf in KERBEROS mode" ) conf_realm = krb_conf.find_realm(self.realm) if conf_realm is None: raise ValueError("No matching realm in krb5.conf") if len(conf_realm.kdc) == 0: raise ValueError( "No kdc information in {} realm of krb5.conf".format( self.realm)) # use the first kdc in the list for implementation simpleness p = conf_realm.kdc[0].split(':') self.kdc = {"host": p[0], "port": p[1]} if thrift_transport is not None: has_incompatible_arg = (host is not None or port is not None or auth is not None or kerberos_service_name is not None or password is not None) if has_incompatible_arg: raise ValueError( "thrift_transport cannot be used with " "host/port/auth/kerberos_service_name/password") if is_zookeeper: # It randomly shuffles node information stored in zookeeper. remaining_nodes = self._get_hiveserver2_info_with_zookeeper( host, port, zookeeper_name_space) random.shuffle(remaining_nodes) else: # Direct access to host and port if not zookeeper. remaining_nodes = [{'host': host, 'port': port}] # Access nodes sequentially and if they fail, access other nodes. while len(remaining_nodes) > 0: node = remaining_nodes.pop() self.host = node['host'] self.port = node['port'] if thrift_transport is not None: self._transport = thrift_transport elif service_mode == 'binary': if self.port is None: self.port = 10000 if self.auth is None: self.auth = 'NONE' socket = thrift.transport.TSocket.TSocket(self.host, self.port) if auth == 'NOSASL': # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml self._transport = thrift.transport.TTransport.TBufferedTransport( socket) elif self.auth in ('LDAP', 'KERBEROS', 'NONE', 'CUSTOM', 'NOSASL'): # Defer import so package dependency is optional import sasl import thrift_sasl if self.auth == 'KERBEROS': # KERBEROS mode in hive.server2.authentication is GSSAPI in sasl library sasl_auth = 'GSSAPI' else: sasl_auth = 'PLAIN' if self.password is None: # Password doesn't matter in NONE mode, just needs to be nonempty. self.password = '******' def sasl_factory(): sasl_client = sasl.Client() sasl_client.setAttr('host', self.host) if sasl_auth == 'GSSAPI': sasl_client.setAttr('service', kerberos_service_name) elif sasl_auth == 'PLAIN': sasl_client.setAttr('username', username) sasl_client.setAttr('password', password) else: raise AssertionError sasl_client.init() return sasl_client self._transport = thrift_sasl.TSaslClientTransport( sasl_factory, sasl_auth, socket) else: # All HS2 config options: # https://cwiki.apache.org/confluence/display/Hive/Setting+Up+HiveServer2#SettingUpHiveServer2-Configuration # PAM currently left to end user via thrift_transport option. raise NotImplementedError( "Only NONE, NOSASL, LDAP, KERBEROS, CUSTOM " "authentication are supported with binary mode, got {}" .format(auth)) elif service_mode == 'http': if self.auth is None: self.auth = 'NONE' if self.auth in ('NONE', 'LDAP', 'KERBEROS', 'CUSTOM'): if self.password is None: self.password = '******' self._transport = thrift.transport.THttpClient.THttpClient( "http://{}:{}/{}".format(self.host, self.port, http_path)) if auth == 'KERBEROS': self.set_auth_setting() else: auth_header = HTTPBasicAuth(username, self.password) self._transport.setCustomHeaders( auth_header(Request()).headers) else: raise NotImplementedError( "Only NONE, NOSASL, LDAP, KERBEROS, CUSTOM " "authentication is supported with http mode, got {}". format(auth)) else: raise NotImplementedError( "Only binary, http are supported for the transport mode, " "got {}".format(service_mode)) protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol( self._transport) self._client = TCLIService.Client(protocol) # oldest version that still contains features we care about # "V6 uses binary type for binary payload (was string) and uses columnar result set" protocol_version = ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6 try: self._transport.open() open_session_req = ttypes.TOpenSessionReq( client_protocol=protocol_version, configuration=configuration, username=username, ) response = self._client.OpenSession(open_session_req) _check_status(response) assert response.sessionHandle is not None, "Expected a session from OpenSession" self._sessionHandle = response.sessionHandle assert response.serverProtocolVersion == protocol_version, \ "Unable to handle protocol version {}".format(response.serverProtocolVersion) self._opened = True with contextlib.closing(self.cursor()) as cursor: cursor.execute('USE `{}`'.format(database)) atexit.register(self.close) except Exception as ex: import traceback # If the node fails to access, it will try to reconnect to the remaining node. _logger.warning('Failed to connect to %s:%s. (message = %s)' % (self.host, self.port, 'Error opening session' if isinstance(ex, EOFError) else ex)) last_exception = ex self.close() else: # If any of the remaining nodes passed to zookeeper is successful, return. _logger.info('Connected to %s:%s' % (self.host, self.port)) return # Return the last error I received. raise last_exception