Esempio n. 1
0
def query_store_api(request, path=None):
    response = {'status': -1}

    if USE_PROXY.get():
        content_type = 'application/json; charset=UTF-8'
        headers = {'X-Requested-By': 'das', 'Content-Type': content_type}

        client = HttpClient(QUERY_STORE.SERVER_URL.get())
        resource = Resource(client)
        if USE_SASL.get():
            client.set_kerberos_auth()

        try:
            response = resource.invoke(request.method, path,
                                       request.GET.dict(), request.body,
                                       headers)
        except RestException as e:
            ex_response = e.get_parent_ex().response
            response['code'] = ex_response.status_code
            response['message'] = ex_response.reason
            response['content'] = ex_response.text
    else:
        if path == 'api/query/search':
            filters = json.loads(request.body)
            resp = get_api(request.user,
                           interface='queries-hive').apps(filters['search'])
            response = resp['apps']

    return JsonResponse(response)
Esempio n. 2
0
def _query_store_proxy(request, path=None):
    response = {'status': -1}

    headers = {
        'x-do-as': request.user.username,
        'X-Requested-By': 'das',
        'Content-Type': 'application/json; charset=UTF-8'
    }

    client = HttpClient(QUERY_STORE.SERVER_URL.get())
    resource = Resource(client)

    if USE_SASL.get():
        client.set_kerberos_auth()

    try:
        response = resource.invoke(request.method, path, request.GET.dict(),
                                   request.body, headers)
    except RestException as e:
        ex_response = e.get_parent_ex().response

        if ex_response is not None:
            response['code'] = ex_response.status_code
            response['message'] = ex_response.reason
            response['content'] = ex_response.text
        else:
            response['message'] = 'Query store not reachable!'
            response['content'] = e.message

    return response
Esempio n. 3
0
def query_store_download_bundle(request, id=None):
  response = {}

  client = HttpClient(QUERY_STORE.SERVER_URL.get())
  resource = Resource(client)
  if USE_SASL.get():
    client.set_kerberos_auth()

  app = resource.get('api/data-bundle/' + id)

  response = FileResponse((app, 'rb'), content_type='application/octet-stream')
  response['Content-Disposition'] = 'attachment; filename=' + id + '.zip'

  return response
Esempio n. 4
0
def _create_query_store_client(request,
                               content_type='application/json; charset=UTF-8'):
    headers = {
        'x-do-as': request.user.username,
        'X-Requested-By': 'das',
        'Content-Type': content_type
    }

    client = HttpClient(QUERY_STORE.SERVER_URL.get())
    client.set_headers(headers)

    if USE_SASL.get():
        client.set_kerberos_auth()

    return client
Esempio n. 5
0
def get_query_server_config(name='beeswax', connector=None):
  if connector and has_connectors(): # TODO: Give empty connector when no connector in use
    LOG.debug("Query via connector %s" % name)
    query_server = get_query_server_config_via_connector(connector)
  else:
    LOG.debug("Query via ini %s" % name)
    if name == "llap":
      activeEndpoint = cache.get('llap')
      if activeEndpoint is None:
        if HIVE_DISCOVERY_LLAP.get():
          LOG.debug("Checking zookeeper for discovering Hive LLAP server endpoint")
          zk = KazooClient(hosts=libzookeeper_conf.ENSEMBLE.get(), read_only=True)
          zk.start()
          if HIVE_DISCOVERY_LLAP_HA.get():
            znode = "{0}/instances".format(HIVE_DISCOVERY_LLAP_ZNODE.get())
            LOG.debug("Setting up Hive LLAP HA with the following node {0}".format(znode))
            if zk.exists(znode):
              hiveservers = zk.get_children(znode)
              if not hiveservers:
                raise PopupException(_('There is no running Hive LLAP server available'))
              LOG.info("Available Hive LLAP servers: {0}".format(hiveservers))
              for server in hiveservers:
                llap_servers = json.loads(zk.get("{0}/{1}".format(znode, server))[0])["internal"][0]
                if llap_servers["api"] == "activeEndpoint":
                  LOG.info("Selecting Hive LLAP server: {0}".format(llap_servers))
                  cache.set(
                    "llap",
                    json.dumps({
                        "host": llap_servers["addresses"][0]["host"],
                        "port": llap_servers["addresses"][0]["port"]
                      }),
                      CACHE_TIMEOUT.get()
                  )
            else:
              LOG.error("Hive LLAP endpoint not found, reverting to config values")
              cache.set("llap", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}), CACHE_TIMEOUT.get())
          else:
            znode = "{0}".format(HIVE_DISCOVERY_LLAP_ZNODE.get())
            LOG.debug("Setting up Hive LLAP with the following node {0}".format(znode))
            if zk.exists(znode):
              hiveservers = zk.get_children(znode)
              for server in hiveservers:
                cache.set(
                  "llap",
                  json.dumps({
                    "host": server.split(';')[0].split('=')[1].split(":")[0],
                    "port": server.split(';')[0].split('=')[1].split(":")[1]
                  })
                )
          zk.stop()
        else:
          LOG.debug("Zookeeper discovery not enabled, reverting to config values")
          cache.set("llap", json.dumps({"host": LLAP_SERVER_HOST.get(), "port": LLAP_SERVER_THRIFT_PORT.get()}), CACHE_TIMEOUT.get())

      activeEndpoint = json.loads(cache.get("llap"))

    elif name != 'hms' and name != 'impala':
      activeEndpoint = cache.get("hiveserver2")
      if activeEndpoint is None:
        if HIVE_DISCOVERY_HS2.get():
          hiveservers = get_zk_hs2()
          LOG.debug("Available Hive Servers: {0}".format(hiveservers))
          if not hiveservers:
            raise PopupException(_('There is no running Hive server available'))
          server_to_use = 0
          LOG.debug("Selected Hive server {0}: {1}".format(server_to_use, hiveservers[server_to_use]))
          cache.set(
            "hiveserver2",
            json.dumps({
              "host": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0],
              "port": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[1]
            })
          )
        else:
          cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}))
      else:
        # Setting hs2 cache in-case there is no HS2 discovery
        cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}))
        if HIVE_DISCOVERY_HS2.get():
          # Replace ActiveEndpoint if the current HS2 is down
          hiveservers = get_zk_hs2()
          if hiveservers is not None:
            server_to_use = 0
            hs2_host_name = hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0]
            hs2_in_active_endpoint = hs2_host_name in activeEndpoint
            LOG.debug("Is the current HS2 active {0}".format(hs2_in_active_endpoint))
            if not hs2_in_active_endpoint:
              LOG.error(
                'Current HiveServer is down, working to connect with the next available HiveServer from Zookeeper')
              reset_ha()
              server_to_use = 0
              LOG.debug("Selected HiveServer {0}: {1}".format(server_to_use, hiveservers[server_to_use]))
              cache.set(
                "hiveserver2",
                json.dumps({
                  "host": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0],
                  "port": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[1]
                })
              )

      activeEndpoint = json.loads(cache.get("hiveserver2"))

    if name == 'impala':
      from impala.dbms import get_query_server_config as impala_query_server_config
      query_server = impala_query_server_config()
    elif name == 'hms':
      kerberos_principal = get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get())
      query_server = {
          'server_name': 'hms',
          'server_host': HIVE_METASTORE_HOST.get() if not cluster_config else cluster_config.get('server_host'),
          'server_port': HIVE_METASTORE_PORT.get(),
          'principal': kerberos_principal,
          'transport_mode': 'http' if hiveserver2_transport_mode() == 'HTTP' else 'socket',
          'auth_username': AUTH_USERNAME.get(),
          'auth_password': AUTH_PASSWORD.get(),
          'use_sasl': HIVE_USE_SASL.get()
      }
    else:
      kerberos_principal = get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get())
      query_server = {
          'server_name': 'beeswax' if name != 'hplsql' else 'hplsql',
          'server_host': activeEndpoint["host"],
          'server_port': LLAP_SERVER_PORT.get() if name == 'llap' else HIVE_SERVER_PORT.get(),
          'principal': kerberos_principal,
          'http_url': '%(protocol)s://%(host)s:%(port)s/%(end_point)s' % {
              'protocol': 'https' if hiveserver2_use_ssl() else 'http',
              'host': activeEndpoint["host"],
              'port': activeEndpoint["port"],
              'end_point': hiveserver2_thrift_http_path()
            },
          'transport_mode': 'http' if hiveserver2_transport_mode() == 'HTTP' else 'socket',
          'auth_username': AUTH_USERNAME.get(),
          'auth_password': AUTH_PASSWORD.get(),
          'use_sasl': HIVE_USE_SASL.get(),
          'close_sessions': CLOSE_SESSIONS.get(),
          'has_session_pool': has_session_pool(),
          'max_number_of_sessions': MAX_NUMBER_OF_SESSIONS.get()
        }

    if name == 'sparksql':  # Extends Hive as very similar
      from spark.conf import SQL_SERVER_HOST as SPARK_SERVER_HOST, SQL_SERVER_PORT as SPARK_SERVER_PORT, USE_SASL as SPARK_USE_SASL

      query_server.update({
          'server_name': 'sparksql',
          'server_host': SPARK_SERVER_HOST.get(),
          'server_port': SPARK_SERVER_PORT.get(),
          'use_sasl': SPARK_USE_SASL.get()
      })

  if not query_server.get('dialect'):
    query_server['dialect'] = query_server['server_name']

  debug_query_server = query_server.copy()
  debug_query_server['auth_password_used'] = bool(debug_query_server.pop('auth_password', None))
  LOG.debug("Query Server: %s" % debug_query_server)

  return query_server
Esempio n. 6
0
def get_query_server_config(name='beeswax', connector=None):
  if connector and has_connectors(): # TODO: Give empty connector when no connector in use
    query_server = get_query_server_config_via_connector(connector)
  else:
    LOG.debug("Query cluster %s" % name)
    if name == "llap":
      activeEndpoint = cache.get('llap')
      if activeEndpoint is None:
        if HIVE_DISCOVERY_LLAP.get():
          LOG.debug("Checking zookeeper for Hive Server Interactive endpoint")
          zk = KazooClient(hosts=libzookeeper_conf.ENSEMBLE.get(), read_only=True)
          zk.start()
          if HIVE_DISCOVERY_LLAP_HA.get():
            znode = "{0}/instances".format(HIVE_DISCOVERY_LLAP_ZNODE.get())
            LOG.debug("Setting up LLAP with the following node {0}".format(znode))
            if zk.exists(znode):
              hiveservers = zk.get_children(znode)
              for server in hiveservers:
                llap_servers= json.loads(zk.get("{0}/{1}".format(znode, server))[0])["internal"][0]
                if llap_servers["api"] == "activeEndpoint":
                  cache.set("llap", json.dumps({"host": llap_servers["addresses"][0]["host"], "port": llap_servers["addresses"][0]["port"]}), CACHE_TIMEOUT.get())
            else:
              LOG.error("LLAP Endpoint not found, reverting to HiveServer2")
              cache.set("llap", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}), CACHE_TIMEOUT.get())
          else:
            znode = "{0}".format(HIVE_DISCOVERY_LLAP_ZNODE.get())
            LOG.debug("Setting up LLAP with the following node {0}".format(znode))
            if zk.exists(znode):
              hiveservers = zk.get_children(znode)
              for server in hiveservers:
                cache.set("llap", json.dumps({"host": server.split(';')[0].split('=')[1].split(":")[0], "port": server.split(';')[0].split('=')[1].split(":")[1]}))
          zk.stop()
        else:
          LOG.debug("Zookeeper Discovery not enabled, reverting to config values")
          cache.set("llap", json.dumps({"host": LLAP_SERVER_HOST.get(), "port": LLAP_SERVER_THRIFT_PORT.get()}), CACHE_TIMEOUT.get())
      activeEndpoint = json.loads(cache.get("llap"))
    elif name != 'hms' and name != 'impala':
      activeEndpoint = cache.get("hiveserver2")
      if activeEndpoint is None:
        if HIVE_DISCOVERY_HS2.get():
          zk = KazooClient(hosts=libzookeeper_conf.ENSEMBLE.get(), read_only=True)
          zk.start()
          znode = HIVE_DISCOVERY_HIVESERVER2_ZNODE.get()
          LOG.info("Setting up Hive with the following node {0}".format(znode))
          if zk.exists(znode):
            hiveservers = zk.get_children(znode)
            server_to_use = 0 # if CONF.HIVE_SPREAD.get() randint(0, len(hiveservers)-1) else 0
            cache.set("hiveserver2", json.dumps({"host": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0], "port": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[1]}))
          else:
            cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}))
          zk.stop()
        else:
          cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}))
      activeEndpoint = json.loads(cache.get("hiveserver2"))

    if name == 'impala':
      from impala.dbms import get_query_server_config as impala_query_server_config
      query_server = impala_query_server_config()
    elif name == 'hms':
      kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get())
      query_server = {
          'server_name': 'hms',
          'server_host': HIVE_METASTORE_HOST.get() if not cluster_config else cluster_config.get('server_host'),
          'server_port': HIVE_METASTORE_PORT.get(),
          'principal': kerberos_principal,
          'transport_mode': 'http' if hive_site.hiveserver2_transport_mode() == 'HTTP' else 'socket',
          'auth_username': AUTH_USERNAME.get(),
          'auth_password': AUTH_PASSWORD.get(),
          'use_sasl': HIVE_USE_SASL.get()
      }
    else:
      kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get())
      query_server = {
          'server_name': 'beeswax',
          'server_host': activeEndpoint["host"],
          'server_port': LLAP_SERVER_PORT.get() if name == 'llap' else HIVE_SERVER_PORT.get(),
          'principal': kerberos_principal,
          'http_url': '%(protocol)s://%(host)s:%(port)s/%(end_point)s' % {
              'protocol': 'https' if hiveserver2_use_ssl() else 'http',
              'host': activeEndpoint["host"],
              'port': activeEndpoint["port"],
              'end_point': hive_site.hiveserver2_thrift_http_path()
            },
          'transport_mode': 'http' if hive_site.hiveserver2_transport_mode() == 'HTTP' else 'socket',
          'auth_username': AUTH_USERNAME.get(),
          'auth_password': AUTH_PASSWORD.get(),
          'use_sasl': HIVE_USE_SASL.get()
        }

    if name == 'sparksql': # Extends Hive as very similar
      from spark.conf import SQL_SERVER_HOST as SPARK_SERVER_HOST, SQL_SERVER_PORT as SPARK_SERVER_PORT, USE_SASL as SPARK_USE_SASL

      query_server.update({
          'server_name': 'sparksql',
          'server_host': SPARK_SERVER_HOST.get(),
          'server_port': SPARK_SERVER_PORT.get(),
          'use_sasl': SPARK_USE_SASL.get()
      })

  debug_query_server = query_server.copy()
  debug_query_server['auth_password_used'] = bool(debug_query_server.pop('auth_password', None))
  LOG.debug("Query Server: %s" % debug_query_server)

  return query_server