def test_call_session_single(self):
    finish = (
        MAX_NUMBER_OF_SESSIONS.set_for_testing(1),
        CLOSE_SESSIONS.set_for_testing(False)
    )

    try:
      with patch('beeswax.server.hive_server2_lib.thrift_util.get_client') as get_client:
        with patch('beeswax.server.hive_server2_lib.HiveServerClient.open_session') as open_session:
          with patch('beeswax.server.hive_server2_lib.Session.objects.get_session') as get_session:
            open_session.return_value = MagicMock(status_code=0)
            get_session.return_value = None
            fn = MagicMock(attr='test')
            req = MagicMock()
            server_config = get_query_server_config(name='beeswax')

            client = HiveServerClient(server_config, self.user)

            (res, session1) = client.call(fn, req, status=None)
            open_session.assert_called_once()

            # Reuse session from argument
            (res, session2) = client.call(fn, req, status=None, session=session1)
            open_session.assert_called_once() # open_session should not be called again, because we're reusing session
            assert_equal(session1, session2)

            # Reuse session from get_session
            get_session.return_value = session1
            (res, session3) = client.call(fn, req, status=None)
            open_session.assert_called_once() # open_session should not be called again, because we're reusing session
            assert_equal(session1, session3)
    finally:
      for f in finish:
        f()
  def test_call_session_close_idle_limit(self):
    finish = (
        MAX_NUMBER_OF_SESSIONS.set_for_testing(2),
        CLOSE_SESSIONS.set_for_testing(True)
    )

    try:
      with patch('beeswax.server.hive_server2_lib.thrift_util.get_client') as get_client:
        with patch('beeswax.server.hive_server2_lib.HiveServerClient.open_session') as open_session:
          with patch('beeswax.server.hive_server2_lib.Session.objects.get_n_sessions') as get_n_sessions:
            get_n_sessions.return_value = MagicMock(count=MagicMock(return_value=2))
            open_session.return_value = MagicMock(status_code=0)
            fn = MagicMock(return_value=MagicMock(status=MagicMock(statusCode=0)))
            req = MagicMock()
            server_config = get_query_server_config(name='beeswax')

            client = HiveServerClient(server_config, self.user)
            assert_raises(Exception, client.call, fn, req, status=None)

            get_n_sessions.return_value = MagicMock(count=MagicMock(return_value=1))
            (res, session1) = client.call(fn, req, status=None)
            open_session.assert_called_once()
    finally:
      for f in finish:
        f()
  def test_call_session_close_idle(self):
    finish = (
        MAX_NUMBER_OF_SESSIONS.set_for_testing(-1),
        CLOSE_SESSIONS.set_for_testing(True)
    )

    try:
      with patch('beeswax.server.hive_server2_lib.thrift_util.get_client') as get_client:
        with patch('beeswax.server.hive_server2_lib.HiveServerClient.open_session') as open_session:
          open_session.return_value = MagicMock(status_code=0)
          fn = MagicMock(return_value=MagicMock(status=MagicMock(statusCode=0)))
          req = MagicMock()
          server_config = get_query_server_config(name='beeswax')

          client = HiveServerClient(server_config, self.user)

          (res, session1) = client.call(fn, req, status=None)
          open_session.assert_called_once()

          # Reuse session from argument
          (res, session2) = client.call(fn, req, status=None, session=session1)
          open_session.assert_called_once() # open_session should not be called again, because we're reusing session
          assert_equal(session1, session2)

          # Create new session
          open_session.return_value = MagicMock(status_code=0)
          (res, session3) = client.call(fn, req, status=None)
          assert_equal(open_session.call_count, 2)
          assert_not_equal(session1, session3)
    finally:
      for f in finish:
        f()
예제 #4
0
    def test_call_session_pool_limit(self):
        finish = (MAX_NUMBER_OF_SESSIONS.set_for_testing(2),
                  CLOSE_SESSIONS.set_for_testing(False))

        try:
            with patch('beeswax.server.hive_server2_lib.thrift_util.get_client'
                       ) as get_client:
                with patch(
                        'beeswax.server.hive_server2_lib.HiveServerClient.open_session'
                ) as open_session:
                    with patch(
                            'beeswax.server.hive_server2_lib.Session.objects.get_tez_session'
                    ) as get_tez_session:
                        get_tez_session.side_effect = Exception('')
                        open_session.return_value = MagicMock(status_code=0)
                        fn = MagicMock(return_value=MagicMock(status=MagicMock(
                            statusCode=0)))
                        req = MagicMock()
                        server_config = get_query_server_config(name='beeswax')

                        client = HiveServerClient(server_config, self.user)

                        assert_raises(Exception,
                                      client.call,
                                      fn,
                                      req,
                                      status=None)
        finally:
            for f in finish:
                f()
  def test_call_session_close_idle_managed_queries(self):
    finish = (
        MAX_NUMBER_OF_SESSIONS.set_for_testing(-1),
        CLOSE_SESSIONS.set_for_testing(True)
    )

    try:
      with patch('beeswax.server.hive_server2_lib.thrift_util.get_client') as get_client:
        with patch('beeswax.server.hive_server2_lib.HiveServerClient.open_session') as open_session:
          with patch('beeswax.server.hive_server2_lib.HiveServerClient.close_session') as close_session:
            with patch('beeswax.server.hive_server2_lib.HiveServerTRowSet') as HiveServerTRowSet:
              status = MagicMock(status=MagicMock(statusCode=0))
              status_return = MagicMock(return_value=status)
              get_client.return_value = MagicMock(
                  return_value=status, GetSchemas=status_return, FetchResults=status_return, GetResultSetMetadata=status_return,
                  CloseOperation=status_return, ExecuteStatement=status_return, GetTables=status_return, GetColumns=status_return
              )
              open_session.return_value = MagicMock(status_code=0)
              server_config = get_query_server_config(name='beeswax')

              client = HiveServerClient(server_config, self.user)

              res = client.get_databases()
              assert_equal(open_session.call_count, 1)
              assert_equal(close_session.call_count, 1)

              res = client.get_database(MagicMock())
              assert_equal(open_session.call_count, 2)
              assert_equal(close_session.call_count, 2)

              res = client.get_tables_meta(MagicMock(), MagicMock())
              assert_equal(open_session.call_count, 3)
              assert_equal(close_session.call_count, 3)

              res = client.get_tables(MagicMock(), MagicMock())
              assert_equal(open_session.call_count, 4)
              assert_equal(close_session.call_count, 4)

              res = client.get_table(MagicMock(), MagicMock())
              assert_equal(open_session.call_count, 5)
              assert_equal(close_session.call_count, 5)

              res = client.get_columns(MagicMock(), MagicMock())
              assert_equal(open_session.call_count, 6)
              assert_equal(close_session.call_count, 6)

              res = client.get_partitions(MagicMock(), MagicMock()) # get_partitions does 2 requests with 1 session each
              assert_equal(open_session.call_count, 8)
              assert_equal(close_session.call_count, 8)
    finally:
      for f in finish:
        f()
예제 #6
0
    def create_session(self, lang='hive', properties=None):
        application = 'beeswax' if lang == 'hive' or lang == 'llap' else lang

        if has_session_pool():
            session = Session.objects.get_tez_session(
                self.user, application, MAX_NUMBER_OF_SESSIONS.get())
        elif not has_multiple_sessions():
            session = Session.objects.get_session(self.user,
                                                  application=application)
        else:
            session = None

        reuse_session = session is not None
        if not reuse_session:
            db = dbms.get(self.user,
                          query_server=get_query_server_config(
                              name=lang, connector=self.interpreter))
            session = db.open_session(self.user)

        response = {'type': lang, 'id': session.id}

        if not properties:
            config = None
            if USE_DEFAULT_CONFIGURATION.get():
                config = DefaultConfiguration.objects.get_configuration_for_user(
                    app=lang, user=self.user)

            if config is not None:
                properties = config.properties_list
            else:
                properties = self.get_properties(lang)

        response['properties'] = properties
        response['configuration'] = json.loads(session.properties)
        response['reuse_session'] = reuse_session
        response['session_id'] = ''

        try:
            decoded_guid = session.get_handle().sessionId.guid
            response['session_id'] = unpack_guid(decoded_guid)
        except Exception as e:
            LOG.warn('Failed to decode session handle: %s' % e)

        if lang == 'impala' and session:
            http_addr = _get_impala_server_url(session)
            response['http_addr'] = http_addr

        return response
예제 #7
0
def get_query_server_config(name='beeswax', connector=None):
  if connector and has_connectors(): # TODO: Give empty connector when no connector in use
    LOG.debug("Query via connector %s" % name)
    query_server = get_query_server_config_via_connector(connector)
  else:
    LOG.debug("Query via ini %s" % name)
    if name == "llap":
      activeEndpoint = cache.get('llap')
      if activeEndpoint is None:
        if HIVE_DISCOVERY_LLAP.get():
          LOG.debug("Checking zookeeper for discovering Hive LLAP server endpoint")
          zk = KazooClient(hosts=libzookeeper_conf.ENSEMBLE.get(), read_only=True)
          zk.start()
          if HIVE_DISCOVERY_LLAP_HA.get():
            znode = "{0}/instances".format(HIVE_DISCOVERY_LLAP_ZNODE.get())
            LOG.debug("Setting up Hive LLAP HA with the following node {0}".format(znode))
            if zk.exists(znode):
              hiveservers = zk.get_children(znode)
              if not hiveservers:
                raise PopupException(_('There is no running Hive LLAP server available'))
              LOG.info("Available Hive LLAP servers: {0}".format(hiveservers))
              for server in hiveservers:
                llap_servers = json.loads(zk.get("{0}/{1}".format(znode, server))[0])["internal"][0]
                if llap_servers["api"] == "activeEndpoint":
                  LOG.info("Selecting Hive LLAP server: {0}".format(llap_servers))
                  cache.set(
                    "llap",
                    json.dumps({
                        "host": llap_servers["addresses"][0]["host"],
                        "port": llap_servers["addresses"][0]["port"]
                      }),
                      CACHE_TIMEOUT.get()
                  )
            else:
              LOG.error("Hive LLAP endpoint not found, reverting to config values")
              cache.set("llap", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}), CACHE_TIMEOUT.get())
          else:
            znode = "{0}".format(HIVE_DISCOVERY_LLAP_ZNODE.get())
            LOG.debug("Setting up Hive LLAP with the following node {0}".format(znode))
            if zk.exists(znode):
              hiveservers = zk.get_children(znode)
              for server in hiveservers:
                cache.set(
                  "llap",
                  json.dumps({
                    "host": server.split(';')[0].split('=')[1].split(":")[0],
                    "port": server.split(';')[0].split('=')[1].split(":")[1]
                  })
                )
          zk.stop()
        else:
          LOG.debug("Zookeeper discovery not enabled, reverting to config values")
          cache.set("llap", json.dumps({"host": LLAP_SERVER_HOST.get(), "port": LLAP_SERVER_THRIFT_PORT.get()}), CACHE_TIMEOUT.get())

      activeEndpoint = json.loads(cache.get("llap"))

    elif name != 'hms' and name != 'impala':
      activeEndpoint = cache.get("hiveserver2")
      if activeEndpoint is None:
        if HIVE_DISCOVERY_HS2.get():
          hiveservers = get_zk_hs2()
          LOG.debug("Available Hive Servers: {0}".format(hiveservers))
          if not hiveservers:
            raise PopupException(_('There is no running Hive server available'))
          server_to_use = 0
          LOG.debug("Selected Hive server {0}: {1}".format(server_to_use, hiveservers[server_to_use]))
          cache.set(
            "hiveserver2",
            json.dumps({
              "host": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0],
              "port": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[1]
            })
          )
        else:
          cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}))
      else:
        # Setting hs2 cache in-case there is no HS2 discovery
        cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}))
        if HIVE_DISCOVERY_HS2.get():
          # Replace ActiveEndpoint if the current HS2 is down
          hiveservers = get_zk_hs2()
          if hiveservers is not None:
            server_to_use = 0
            hs2_host_name = hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0]
            hs2_in_active_endpoint = hs2_host_name in activeEndpoint
            LOG.debug("Is the current HS2 active {0}".format(hs2_in_active_endpoint))
            if not hs2_in_active_endpoint:
              LOG.error(
                'Current HiveServer is down, working to connect with the next available HiveServer from Zookeeper')
              reset_ha()
              server_to_use = 0
              LOG.debug("Selected HiveServer {0}: {1}".format(server_to_use, hiveservers[server_to_use]))
              cache.set(
                "hiveserver2",
                json.dumps({
                  "host": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0],
                  "port": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[1]
                })
              )

      activeEndpoint = json.loads(cache.get("hiveserver2"))

    if name == 'impala':
      from impala.dbms import get_query_server_config as impala_query_server_config
      query_server = impala_query_server_config()
    elif name == 'hms':
      kerberos_principal = get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get())
      query_server = {
          'server_name': 'hms',
          'server_host': HIVE_METASTORE_HOST.get() if not cluster_config else cluster_config.get('server_host'),
          'server_port': HIVE_METASTORE_PORT.get(),
          'principal': kerberos_principal,
          'transport_mode': 'http' if hiveserver2_transport_mode() == 'HTTP' else 'socket',
          'auth_username': AUTH_USERNAME.get(),
          'auth_password': AUTH_PASSWORD.get(),
          'use_sasl': HIVE_USE_SASL.get()
      }
    else:
      kerberos_principal = get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get())
      query_server = {
          'server_name': 'beeswax' if name != 'hplsql' else 'hplsql',
          'server_host': activeEndpoint["host"],
          'server_port': LLAP_SERVER_PORT.get() if name == 'llap' else HIVE_SERVER_PORT.get(),
          'principal': kerberos_principal,
          'http_url': '%(protocol)s://%(host)s:%(port)s/%(end_point)s' % {
              'protocol': 'https' if hiveserver2_use_ssl() else 'http',
              'host': activeEndpoint["host"],
              'port': activeEndpoint["port"],
              'end_point': hiveserver2_thrift_http_path()
            },
          'transport_mode': 'http' if hiveserver2_transport_mode() == 'HTTP' else 'socket',
          'auth_username': AUTH_USERNAME.get(),
          'auth_password': AUTH_PASSWORD.get(),
          'use_sasl': HIVE_USE_SASL.get(),
          'close_sessions': CLOSE_SESSIONS.get(),
          'has_session_pool': has_session_pool(),
          'max_number_of_sessions': MAX_NUMBER_OF_SESSIONS.get()
        }

    if name == 'sparksql':  # Extends Hive as very similar
      from spark.conf import SQL_SERVER_HOST as SPARK_SERVER_HOST, SQL_SERVER_PORT as SPARK_SERVER_PORT, USE_SASL as SPARK_USE_SASL

      query_server.update({
          'server_name': 'sparksql',
          'server_host': SPARK_SERVER_HOST.get(),
          'server_port': SPARK_SERVER_PORT.get(),
          'use_sasl': SPARK_USE_SASL.get()
      })

  if not query_server.get('dialect'):
    query_server['dialect'] = query_server['server_name']

  debug_query_server = query_server.copy()
  debug_query_server['auth_password_used'] = bool(debug_query_server.pop('auth_password', None))
  LOG.debug("Query Server: %s" % debug_query_server)

  return query_server
예제 #8
0
    def create_session(self, lang='hive', properties=None):
        application = 'beeswax' if lang == 'hive' or lang == 'llap' else lang

        uses_session_pool = has_session_pool()
        uses_multiple_sessions = has_multiple_sessions()

        if lang == 'impala':
            uses_session_pool = False
            uses_multiple_sessions = False

        try:
            if uses_session_pool:
                session = Session.objects.get_tez_session(
                    self.user, application, MAX_NUMBER_OF_SESSIONS.get())
            elif not uses_multiple_sessions:
                session = Session.objects.get_session(self.user,
                                                      application=application)
            else:
                session = None
        except Exception as e:
            if 'Connection refused' in str(
                    e) or 'Name or service not known' in str(e):
                LOG.exception(
                    'Connection being refused or service is not available in either session or in multiple sessions'
                    '- HA failover')
                reset_ha()

        reuse_session = session is not None
        if not reuse_session:
            db = dbms.get(self.user,
                          query_server=get_query_server_config(
                              name=lang, connector=self.interpreter))
            try:
                session = db.open_session(self.user)
            except Exception as e:
                if 'Connection refused' in str(
                        e) or 'Name or service not known' in str(e):
                    LOG.exception(
                        'Connection being refused or service is not available in reuse session - HA failover'
                    )
                    reset_ha()

        response = {'type': lang, 'id': session.id}

        if not properties:
            config = None
            if USE_DEFAULT_CONFIGURATION.get():
                config = DefaultConfiguration.objects.get_configuration_for_user(
                    app=lang, user=self.user)

            if config is not None:
                properties = config.properties_list
            else:
                properties = self.get_properties(lang)

        response['properties'] = properties
        response['configuration'] = json.loads(session.properties)
        response['reuse_session'] = reuse_session
        response['session_id'] = ''

        try:
            decoded_guid = session.get_handle().sessionId.guid
            response['session_id'] = unpack_guid(decoded_guid)
        except Exception as e:
            LOG.warning('Failed to decode session handle: %s' % e)

        if lang == 'impala' and session:
            http_addr = _get_impala_server_url(session)
            response['http_addr'] = http_addr

        return response
예제 #9
0
def get_query_server_config(name='beeswax', connector=None):
  if connector and has_connectors(): # TODO: Give empty connector when no connector in use
    query_server = get_query_server_config_via_connector(connector)
  else:
    LOG.debug("Query cluster %s" % name)
    if name == "llap":
      activeEndpoint = cache.get('llap')
      if activeEndpoint is None:
        if HIVE_DISCOVERY_LLAP.get():
          LOG.debug("Checking zookeeper for Hive Server Interactive endpoint")
          zk = KazooClient(hosts=libzookeeper_conf.ENSEMBLE.get(), read_only=True)
          zk.start()
          if HIVE_DISCOVERY_LLAP_HA.get():
            znode = "{0}/instances".format(HIVE_DISCOVERY_LLAP_ZNODE.get())
            LOG.debug("Setting up LLAP with the following node {0}".format(znode))
            if zk.exists(znode):
              hiveservers = zk.get_children(znode)
              for server in hiveservers:
                llap_servers= json.loads(zk.get("{0}/{1}".format(znode, server))[0])["internal"][0]
                if llap_servers["api"] == "activeEndpoint":
                  cache.set("llap", json.dumps({"host": llap_servers["addresses"][0]["host"], "port": llap_servers["addresses"][0]["port"]}), CACHE_TIMEOUT.get())
            else:
              LOG.error("LLAP Endpoint not found, reverting to HiveServer2")
              cache.set("llap", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}), CACHE_TIMEOUT.get())
          else:
            znode = "{0}".format(HIVE_DISCOVERY_LLAP_ZNODE.get())
            LOG.debug("Setting up LLAP with the following node {0}".format(znode))
            if zk.exists(znode):
              hiveservers = zk.get_children(znode)
              for server in hiveservers:
                cache.set("llap", json.dumps({"host": server.split(';')[0].split('=')[1].split(":")[0], "port": server.split(';')[0].split('=')[1].split(":")[1]}))
          zk.stop()
        else:
          LOG.debug("Zookeeper Discovery not enabled, reverting to config values")
          cache.set("llap", json.dumps({"host": LLAP_SERVER_HOST.get(), "port": LLAP_SERVER_THRIFT_PORT.get()}), CACHE_TIMEOUT.get())
      activeEndpoint = json.loads(cache.get("llap"))
    elif name != 'hms' and name != 'impala':
      activeEndpoint = cache.get("hiveserver2")
      if activeEndpoint is None:
        if HIVE_DISCOVERY_HS2.get():
          zk = KazooClient(hosts=libzookeeper_conf.ENSEMBLE.get(), read_only=True)
          zk.start()
          znode = HIVE_DISCOVERY_HIVESERVER2_ZNODE.get()
          LOG.info("Setting up Hive with the following node {0}".format(znode))
          if zk.exists(znode):
            hiveservers = zk.get_children(znode)
            server_to_use = 0 # if CONF.HIVE_SPREAD.get() randint(0, len(hiveservers)-1) else 0
            cache.set("hiveserver2", json.dumps({"host": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0], "port": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[1]}))
          else:
            cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}))
          zk.stop()
        else:
          cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}))
      activeEndpoint = json.loads(cache.get("hiveserver2"))

    if name == 'impala':
      from impala.dbms import get_query_server_config as impala_query_server_config
      query_server = impala_query_server_config()
    elif name == 'hms':
      kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get())
      query_server = {
          'server_name': 'hms',
          'server_host': HIVE_METASTORE_HOST.get() if not cluster_config else cluster_config.get('server_host'),
          'server_port': HIVE_METASTORE_PORT.get(),
          'principal': kerberos_principal,
          'transport_mode': 'http' if hive_site.hiveserver2_transport_mode() == 'HTTP' else 'socket',
          'auth_username': AUTH_USERNAME.get(),
          'auth_password': AUTH_PASSWORD.get(),
          'use_sasl': HIVE_USE_SASL.get()
      }
    else:
      kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get())
      query_server = {
          'server_name': 'beeswax',
          'server_host': activeEndpoint["host"],
          'server_port': LLAP_SERVER_PORT.get() if name == 'llap' else HIVE_SERVER_PORT.get(),
          'principal': kerberos_principal,
          'http_url': '%(protocol)s://%(host)s:%(port)s/%(end_point)s' % {
              'protocol': 'https' if hiveserver2_use_ssl() else 'http',
              'host': activeEndpoint["host"],
              'port': activeEndpoint["port"],
              'end_point': hive_site.hiveserver2_thrift_http_path()
            },
          'transport_mode': 'http' if hive_site.hiveserver2_transport_mode() == 'HTTP' else 'socket',
          'auth_username': AUTH_USERNAME.get(),
          'auth_password': AUTH_PASSWORD.get(),
          'use_sasl': HIVE_USE_SASL.get(),
          'close_sessions': CLOSE_SESSIONS.get(),
          'has_session_pool': has_session_pool(),
          'max_number_of_sessions': MAX_NUMBER_OF_SESSIONS.get()
        }

    if name == 'sparksql': # Extends Hive as very similar
      from spark.conf import SQL_SERVER_HOST as SPARK_SERVER_HOST, SQL_SERVER_PORT as SPARK_SERVER_PORT, USE_SASL as SPARK_USE_SASL

      query_server.update({
          'server_name': 'sparksql',
          'server_host': SPARK_SERVER_HOST.get(),
          'server_port': SPARK_SERVER_PORT.get(),
          'use_sasl': SPARK_USE_SASL.get()
      })

  if not query_server.get('dialect'):
    query_server['dialect'] = query_server['server_name']

  debug_query_server = query_server.copy()
  debug_query_server['auth_password_used'] = bool(debug_query_server.pop('auth_password', None))
  LOG.debug("Query Server: %s" % debug_query_server)

  return query_server