def test_call_session_single(self): finish = ( MAX_NUMBER_OF_SESSIONS.set_for_testing(1), CLOSE_SESSIONS.set_for_testing(False) ) try: with patch('beeswax.server.hive_server2_lib.thrift_util.get_client') as get_client: with patch('beeswax.server.hive_server2_lib.HiveServerClient.open_session') as open_session: with patch('beeswax.server.hive_server2_lib.Session.objects.get_session') as get_session: open_session.return_value = MagicMock(status_code=0) get_session.return_value = None fn = MagicMock(attr='test') req = MagicMock() server_config = get_query_server_config(name='beeswax') client = HiveServerClient(server_config, self.user) (res, session1) = client.call(fn, req, status=None) open_session.assert_called_once() # Reuse session from argument (res, session2) = client.call(fn, req, status=None, session=session1) open_session.assert_called_once() # open_session should not be called again, because we're reusing session assert_equal(session1, session2) # Reuse session from get_session get_session.return_value = session1 (res, session3) = client.call(fn, req, status=None) open_session.assert_called_once() # open_session should not be called again, because we're reusing session assert_equal(session1, session3) finally: for f in finish: f()
def test_call_session_close_idle_limit(self): finish = ( MAX_NUMBER_OF_SESSIONS.set_for_testing(2), CLOSE_SESSIONS.set_for_testing(True) ) try: with patch('beeswax.server.hive_server2_lib.thrift_util.get_client') as get_client: with patch('beeswax.server.hive_server2_lib.HiveServerClient.open_session') as open_session: with patch('beeswax.server.hive_server2_lib.Session.objects.get_n_sessions') as get_n_sessions: get_n_sessions.return_value = MagicMock(count=MagicMock(return_value=2)) open_session.return_value = MagicMock(status_code=0) fn = MagicMock(return_value=MagicMock(status=MagicMock(statusCode=0))) req = MagicMock() server_config = get_query_server_config(name='beeswax') client = HiveServerClient(server_config, self.user) assert_raises(Exception, client.call, fn, req, status=None) get_n_sessions.return_value = MagicMock(count=MagicMock(return_value=1)) (res, session1) = client.call(fn, req, status=None) open_session.assert_called_once() finally: for f in finish: f()
def test_call_session_close_idle(self): finish = ( MAX_NUMBER_OF_SESSIONS.set_for_testing(-1), CLOSE_SESSIONS.set_for_testing(True) ) try: with patch('beeswax.server.hive_server2_lib.thrift_util.get_client') as get_client: with patch('beeswax.server.hive_server2_lib.HiveServerClient.open_session') as open_session: open_session.return_value = MagicMock(status_code=0) fn = MagicMock(return_value=MagicMock(status=MagicMock(statusCode=0))) req = MagicMock() server_config = get_query_server_config(name='beeswax') client = HiveServerClient(server_config, self.user) (res, session1) = client.call(fn, req, status=None) open_session.assert_called_once() # Reuse session from argument (res, session2) = client.call(fn, req, status=None, session=session1) open_session.assert_called_once() # open_session should not be called again, because we're reusing session assert_equal(session1, session2) # Create new session open_session.return_value = MagicMock(status_code=0) (res, session3) = client.call(fn, req, status=None) assert_equal(open_session.call_count, 2) assert_not_equal(session1, session3) finally: for f in finish: f()
def test_call_session_pool_limit(self): finish = (MAX_NUMBER_OF_SESSIONS.set_for_testing(2), CLOSE_SESSIONS.set_for_testing(False)) try: with patch('beeswax.server.hive_server2_lib.thrift_util.get_client' ) as get_client: with patch( 'beeswax.server.hive_server2_lib.HiveServerClient.open_session' ) as open_session: with patch( 'beeswax.server.hive_server2_lib.Session.objects.get_tez_session' ) as get_tez_session: get_tez_session.side_effect = Exception('') open_session.return_value = MagicMock(status_code=0) fn = MagicMock(return_value=MagicMock(status=MagicMock( statusCode=0))) req = MagicMock() server_config = get_query_server_config(name='beeswax') client = HiveServerClient(server_config, self.user) assert_raises(Exception, client.call, fn, req, status=None) finally: for f in finish: f()
def test_call_session_close_idle_managed_queries(self): finish = ( MAX_NUMBER_OF_SESSIONS.set_for_testing(-1), CLOSE_SESSIONS.set_for_testing(True) ) try: with patch('beeswax.server.hive_server2_lib.thrift_util.get_client') as get_client: with patch('beeswax.server.hive_server2_lib.HiveServerClient.open_session') as open_session: with patch('beeswax.server.hive_server2_lib.HiveServerClient.close_session') as close_session: with patch('beeswax.server.hive_server2_lib.HiveServerTRowSet') as HiveServerTRowSet: status = MagicMock(status=MagicMock(statusCode=0)) status_return = MagicMock(return_value=status) get_client.return_value = MagicMock( return_value=status, GetSchemas=status_return, FetchResults=status_return, GetResultSetMetadata=status_return, CloseOperation=status_return, ExecuteStatement=status_return, GetTables=status_return, GetColumns=status_return ) open_session.return_value = MagicMock(status_code=0) server_config = get_query_server_config(name='beeswax') client = HiveServerClient(server_config, self.user) res = client.get_databases() assert_equal(open_session.call_count, 1) assert_equal(close_session.call_count, 1) res = client.get_database(MagicMock()) assert_equal(open_session.call_count, 2) assert_equal(close_session.call_count, 2) res = client.get_tables_meta(MagicMock(), MagicMock()) assert_equal(open_session.call_count, 3) assert_equal(close_session.call_count, 3) res = client.get_tables(MagicMock(), MagicMock()) assert_equal(open_session.call_count, 4) assert_equal(close_session.call_count, 4) res = client.get_table(MagicMock(), MagicMock()) assert_equal(open_session.call_count, 5) assert_equal(close_session.call_count, 5) res = client.get_columns(MagicMock(), MagicMock()) assert_equal(open_session.call_count, 6) assert_equal(close_session.call_count, 6) res = client.get_partitions(MagicMock(), MagicMock()) # get_partitions does 2 requests with 1 session each assert_equal(open_session.call_count, 8) assert_equal(close_session.call_count, 8) finally: for f in finish: f()
def create_session(self, lang='hive', properties=None): application = 'beeswax' if lang == 'hive' or lang == 'llap' else lang if has_session_pool(): session = Session.objects.get_tez_session( self.user, application, MAX_NUMBER_OF_SESSIONS.get()) elif not has_multiple_sessions(): session = Session.objects.get_session(self.user, application=application) else: session = None reuse_session = session is not None if not reuse_session: db = dbms.get(self.user, query_server=get_query_server_config( name=lang, connector=self.interpreter)) session = db.open_session(self.user) response = {'type': lang, 'id': session.id} if not properties: config = None if USE_DEFAULT_CONFIGURATION.get(): config = DefaultConfiguration.objects.get_configuration_for_user( app=lang, user=self.user) if config is not None: properties = config.properties_list else: properties = self.get_properties(lang) response['properties'] = properties response['configuration'] = json.loads(session.properties) response['reuse_session'] = reuse_session response['session_id'] = '' try: decoded_guid = session.get_handle().sessionId.guid response['session_id'] = unpack_guid(decoded_guid) except Exception as e: LOG.warn('Failed to decode session handle: %s' % e) if lang == 'impala' and session: http_addr = _get_impala_server_url(session) response['http_addr'] = http_addr return response
def get_query_server_config(name='beeswax', connector=None): if connector and has_connectors(): # TODO: Give empty connector when no connector in use LOG.debug("Query via connector %s" % name) query_server = get_query_server_config_via_connector(connector) else: LOG.debug("Query via ini %s" % name) if name == "llap": activeEndpoint = cache.get('llap') if activeEndpoint is None: if HIVE_DISCOVERY_LLAP.get(): LOG.debug("Checking zookeeper for discovering Hive LLAP server endpoint") zk = KazooClient(hosts=libzookeeper_conf.ENSEMBLE.get(), read_only=True) zk.start() if HIVE_DISCOVERY_LLAP_HA.get(): znode = "{0}/instances".format(HIVE_DISCOVERY_LLAP_ZNODE.get()) LOG.debug("Setting up Hive LLAP HA with the following node {0}".format(znode)) if zk.exists(znode): hiveservers = zk.get_children(znode) if not hiveservers: raise PopupException(_('There is no running Hive LLAP server available')) LOG.info("Available Hive LLAP servers: {0}".format(hiveservers)) for server in hiveservers: llap_servers = json.loads(zk.get("{0}/{1}".format(znode, server))[0])["internal"][0] if llap_servers["api"] == "activeEndpoint": LOG.info("Selecting Hive LLAP server: {0}".format(llap_servers)) cache.set( "llap", json.dumps({ "host": llap_servers["addresses"][0]["host"], "port": llap_servers["addresses"][0]["port"] }), CACHE_TIMEOUT.get() ) else: LOG.error("Hive LLAP endpoint not found, reverting to config values") cache.set("llap", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}), CACHE_TIMEOUT.get()) else: znode = "{0}".format(HIVE_DISCOVERY_LLAP_ZNODE.get()) LOG.debug("Setting up Hive LLAP with the following node {0}".format(znode)) if zk.exists(znode): hiveservers = zk.get_children(znode) for server in hiveservers: cache.set( "llap", json.dumps({ "host": server.split(';')[0].split('=')[1].split(":")[0], "port": server.split(';')[0].split('=')[1].split(":")[1] }) ) zk.stop() else: LOG.debug("Zookeeper discovery not enabled, reverting to config values") cache.set("llap", json.dumps({"host": LLAP_SERVER_HOST.get(), "port": LLAP_SERVER_THRIFT_PORT.get()}), CACHE_TIMEOUT.get()) activeEndpoint = json.loads(cache.get("llap")) elif name != 'hms' and name != 'impala': activeEndpoint = cache.get("hiveserver2") if activeEndpoint is None: if HIVE_DISCOVERY_HS2.get(): hiveservers = get_zk_hs2() LOG.debug("Available Hive Servers: {0}".format(hiveservers)) if not hiveservers: raise PopupException(_('There is no running Hive server available')) server_to_use = 0 LOG.debug("Selected Hive server {0}: {1}".format(server_to_use, hiveservers[server_to_use])) cache.set( "hiveserver2", json.dumps({ "host": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0], "port": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[1] }) ) else: cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()})) else: # Setting hs2 cache in-case there is no HS2 discovery cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()})) if HIVE_DISCOVERY_HS2.get(): # Replace ActiveEndpoint if the current HS2 is down hiveservers = get_zk_hs2() if hiveservers is not None: server_to_use = 0 hs2_host_name = hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0] hs2_in_active_endpoint = hs2_host_name in activeEndpoint LOG.debug("Is the current HS2 active {0}".format(hs2_in_active_endpoint)) if not hs2_in_active_endpoint: LOG.error( 'Current HiveServer is down, working to connect with the next available HiveServer from Zookeeper') reset_ha() server_to_use = 0 LOG.debug("Selected HiveServer {0}: {1}".format(server_to_use, hiveservers[server_to_use])) cache.set( "hiveserver2", json.dumps({ "host": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0], "port": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[1] }) ) activeEndpoint = json.loads(cache.get("hiveserver2")) if name == 'impala': from impala.dbms import get_query_server_config as impala_query_server_config query_server = impala_query_server_config() elif name == 'hms': kerberos_principal = get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get()) query_server = { 'server_name': 'hms', 'server_host': HIVE_METASTORE_HOST.get() if not cluster_config else cluster_config.get('server_host'), 'server_port': HIVE_METASTORE_PORT.get(), 'principal': kerberos_principal, 'transport_mode': 'http' if hiveserver2_transport_mode() == 'HTTP' else 'socket', 'auth_username': AUTH_USERNAME.get(), 'auth_password': AUTH_PASSWORD.get(), 'use_sasl': HIVE_USE_SASL.get() } else: kerberos_principal = get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get()) query_server = { 'server_name': 'beeswax' if name != 'hplsql' else 'hplsql', 'server_host': activeEndpoint["host"], 'server_port': LLAP_SERVER_PORT.get() if name == 'llap' else HIVE_SERVER_PORT.get(), 'principal': kerberos_principal, 'http_url': '%(protocol)s://%(host)s:%(port)s/%(end_point)s' % { 'protocol': 'https' if hiveserver2_use_ssl() else 'http', 'host': activeEndpoint["host"], 'port': activeEndpoint["port"], 'end_point': hiveserver2_thrift_http_path() }, 'transport_mode': 'http' if hiveserver2_transport_mode() == 'HTTP' else 'socket', 'auth_username': AUTH_USERNAME.get(), 'auth_password': AUTH_PASSWORD.get(), 'use_sasl': HIVE_USE_SASL.get(), 'close_sessions': CLOSE_SESSIONS.get(), 'has_session_pool': has_session_pool(), 'max_number_of_sessions': MAX_NUMBER_OF_SESSIONS.get() } if name == 'sparksql': # Extends Hive as very similar from spark.conf import SQL_SERVER_HOST as SPARK_SERVER_HOST, SQL_SERVER_PORT as SPARK_SERVER_PORT, USE_SASL as SPARK_USE_SASL query_server.update({ 'server_name': 'sparksql', 'server_host': SPARK_SERVER_HOST.get(), 'server_port': SPARK_SERVER_PORT.get(), 'use_sasl': SPARK_USE_SASL.get() }) if not query_server.get('dialect'): query_server['dialect'] = query_server['server_name'] debug_query_server = query_server.copy() debug_query_server['auth_password_used'] = bool(debug_query_server.pop('auth_password', None)) LOG.debug("Query Server: %s" % debug_query_server) return query_server
def create_session(self, lang='hive', properties=None): application = 'beeswax' if lang == 'hive' or lang == 'llap' else lang uses_session_pool = has_session_pool() uses_multiple_sessions = has_multiple_sessions() if lang == 'impala': uses_session_pool = False uses_multiple_sessions = False try: if uses_session_pool: session = Session.objects.get_tez_session( self.user, application, MAX_NUMBER_OF_SESSIONS.get()) elif not uses_multiple_sessions: session = Session.objects.get_session(self.user, application=application) else: session = None except Exception as e: if 'Connection refused' in str( e) or 'Name or service not known' in str(e): LOG.exception( 'Connection being refused or service is not available in either session or in multiple sessions' '- HA failover') reset_ha() reuse_session = session is not None if not reuse_session: db = dbms.get(self.user, query_server=get_query_server_config( name=lang, connector=self.interpreter)) try: session = db.open_session(self.user) except Exception as e: if 'Connection refused' in str( e) or 'Name or service not known' in str(e): LOG.exception( 'Connection being refused or service is not available in reuse session - HA failover' ) reset_ha() response = {'type': lang, 'id': session.id} if not properties: config = None if USE_DEFAULT_CONFIGURATION.get(): config = DefaultConfiguration.objects.get_configuration_for_user( app=lang, user=self.user) if config is not None: properties = config.properties_list else: properties = self.get_properties(lang) response['properties'] = properties response['configuration'] = json.loads(session.properties) response['reuse_session'] = reuse_session response['session_id'] = '' try: decoded_guid = session.get_handle().sessionId.guid response['session_id'] = unpack_guid(decoded_guid) except Exception as e: LOG.warning('Failed to decode session handle: %s' % e) if lang == 'impala' and session: http_addr = _get_impala_server_url(session) response['http_addr'] = http_addr return response
def get_query_server_config(name='beeswax', connector=None): if connector and has_connectors(): # TODO: Give empty connector when no connector in use query_server = get_query_server_config_via_connector(connector) else: LOG.debug("Query cluster %s" % name) if name == "llap": activeEndpoint = cache.get('llap') if activeEndpoint is None: if HIVE_DISCOVERY_LLAP.get(): LOG.debug("Checking zookeeper for Hive Server Interactive endpoint") zk = KazooClient(hosts=libzookeeper_conf.ENSEMBLE.get(), read_only=True) zk.start() if HIVE_DISCOVERY_LLAP_HA.get(): znode = "{0}/instances".format(HIVE_DISCOVERY_LLAP_ZNODE.get()) LOG.debug("Setting up LLAP with the following node {0}".format(znode)) if zk.exists(znode): hiveservers = zk.get_children(znode) for server in hiveservers: llap_servers= json.loads(zk.get("{0}/{1}".format(znode, server))[0])["internal"][0] if llap_servers["api"] == "activeEndpoint": cache.set("llap", json.dumps({"host": llap_servers["addresses"][0]["host"], "port": llap_servers["addresses"][0]["port"]}), CACHE_TIMEOUT.get()) else: LOG.error("LLAP Endpoint not found, reverting to HiveServer2") cache.set("llap", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}), CACHE_TIMEOUT.get()) else: znode = "{0}".format(HIVE_DISCOVERY_LLAP_ZNODE.get()) LOG.debug("Setting up LLAP with the following node {0}".format(znode)) if zk.exists(znode): hiveservers = zk.get_children(znode) for server in hiveservers: cache.set("llap", json.dumps({"host": server.split(';')[0].split('=')[1].split(":")[0], "port": server.split(';')[0].split('=')[1].split(":")[1]})) zk.stop() else: LOG.debug("Zookeeper Discovery not enabled, reverting to config values") cache.set("llap", json.dumps({"host": LLAP_SERVER_HOST.get(), "port": LLAP_SERVER_THRIFT_PORT.get()}), CACHE_TIMEOUT.get()) activeEndpoint = json.loads(cache.get("llap")) elif name != 'hms' and name != 'impala': activeEndpoint = cache.get("hiveserver2") if activeEndpoint is None: if HIVE_DISCOVERY_HS2.get(): zk = KazooClient(hosts=libzookeeper_conf.ENSEMBLE.get(), read_only=True) zk.start() znode = HIVE_DISCOVERY_HIVESERVER2_ZNODE.get() LOG.info("Setting up Hive with the following node {0}".format(znode)) if zk.exists(znode): hiveservers = zk.get_children(znode) server_to_use = 0 # if CONF.HIVE_SPREAD.get() randint(0, len(hiveservers)-1) else 0 cache.set("hiveserver2", json.dumps({"host": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0], "port": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[1]})) else: cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()})) zk.stop() else: cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()})) activeEndpoint = json.loads(cache.get("hiveserver2")) if name == 'impala': from impala.dbms import get_query_server_config as impala_query_server_config query_server = impala_query_server_config() elif name == 'hms': kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get()) query_server = { 'server_name': 'hms', 'server_host': HIVE_METASTORE_HOST.get() if not cluster_config else cluster_config.get('server_host'), 'server_port': HIVE_METASTORE_PORT.get(), 'principal': kerberos_principal, 'transport_mode': 'http' if hive_site.hiveserver2_transport_mode() == 'HTTP' else 'socket', 'auth_username': AUTH_USERNAME.get(), 'auth_password': AUTH_PASSWORD.get(), 'use_sasl': HIVE_USE_SASL.get() } else: kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get()) query_server = { 'server_name': 'beeswax', 'server_host': activeEndpoint["host"], 'server_port': LLAP_SERVER_PORT.get() if name == 'llap' else HIVE_SERVER_PORT.get(), 'principal': kerberos_principal, 'http_url': '%(protocol)s://%(host)s:%(port)s/%(end_point)s' % { 'protocol': 'https' if hiveserver2_use_ssl() else 'http', 'host': activeEndpoint["host"], 'port': activeEndpoint["port"], 'end_point': hive_site.hiveserver2_thrift_http_path() }, 'transport_mode': 'http' if hive_site.hiveserver2_transport_mode() == 'HTTP' else 'socket', 'auth_username': AUTH_USERNAME.get(), 'auth_password': AUTH_PASSWORD.get(), 'use_sasl': HIVE_USE_SASL.get(), 'close_sessions': CLOSE_SESSIONS.get(), 'has_session_pool': has_session_pool(), 'max_number_of_sessions': MAX_NUMBER_OF_SESSIONS.get() } if name == 'sparksql': # Extends Hive as very similar from spark.conf import SQL_SERVER_HOST as SPARK_SERVER_HOST, SQL_SERVER_PORT as SPARK_SERVER_PORT, USE_SASL as SPARK_USE_SASL query_server.update({ 'server_name': 'sparksql', 'server_host': SPARK_SERVER_HOST.get(), 'server_port': SPARK_SERVER_PORT.get(), 'use_sasl': SPARK_USE_SASL.get() }) if not query_server.get('dialect'): query_server['dialect'] = query_server['server_name'] debug_query_server = query_server.copy() debug_query_server['auth_password_used'] = bool(debug_query_server.pop('auth_password', None)) LOG.debug("Query Server: %s" % debug_query_server) return query_server