def kinit(kinit_path_local, keytab_path, principal_name, execute_user=None):
    # prevent concurrent kinit
    kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS)
    kinit_lock.acquire()
    kinitcmd = "{0} -kt {1} {2}; ".format(kinit_path_local, keytab_path, principal_name)
    Logger.info("kinit command: " + kinitcmd + " as user: " + str(execute_user))
    try:
        if execute_user is None:
            Execute(kinitcmd)
        else:
            Execute(kinitcmd, user=execute_user)
    finally:
        kinit_lock.release()
Beispiel #2
0
def check_thrift_port_sasl(address, port, hive_auth="NOSASL", key=None, kinitcmd=None, smokeuser='******',
                           transport_mode="binary", http_endpoint="cliservice", ssl=False, ssl_keystore=None,
                           ssl_password=None, check_command_timeout=30):
  """
  Hive thrift SASL port check
  """

  # check params to be correctly passed, if not - try to cast them
  if isinstance(port, str):
    port = int(port)

  if isinstance(ssl, str):
    ssl = bool(ssl)

  # to pass as beeline argument
  ssl_str = str(ssl).lower()
  beeline_url = ['jdbc:hive2://{address}:{port}/', "transportMode={transport_mode}"]

  # append url according to used transport
  if transport_mode == "http":
    beeline_url.append('httpPath={http_endpoint}')

  # append url according to used auth
  if hive_auth == "NOSASL":
    beeline_url.append('auth=noSasl')

  # append url according to ssl configuration
  if ssl and ssl_keystore is not None and ssl_password is not None:
    beeline_url.extend(['ssl={ssl_str}', 'sslTrustStore={ssl_keystore}', 'trustStorePassword={ssl_password!p}'])

  # append url according to principal and execute kinit
  if kinitcmd:
    beeline_url.append('principal={key}')

    # prevent concurrent kinit
    kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS)
    kinit_lock.acquire()
    try:
      Execute(kinitcmd, user=smokeuser)
    finally:
      kinit_lock.release()

  cmd = "! beeline -u '%s' -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'" % \
        format(";".join(beeline_url))

  Execute(cmd,
    user=smokeuser,
    path=["/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/"],
    timeout=check_command_timeout)
def curl_krb_request(tmp_dir, keytab, principal, url, cache_file_prefix,
    krb_exec_search_paths, return_only_http_code, caller_label, user,
    connection_timeout = CONNECTION_TIMEOUT_DEFAULT,
    kinit_timer_ms=DEFAULT_KERBEROS_KINIT_TIMER_MS, method = '',body='',header=''):
  """
  Makes a curl request using the kerberos credentials stored in a calculated cache file. The
  cache file is created by combining the supplied principal, keytab, user, and request name into
  a unique hash.

  This function will use the klist command to determine if the cache is expired and will perform
  a kinit if necessary. Additionally, it has an internal timer to force a kinit after a
  configurable amount of time. This is to prevent boundary issues where requests hit the edge
  of a ticket's lifetime.

  :param tmp_dir: the directory to use for storing the local kerberos cache for this request.
  :param keytab: the location of the keytab to use when performing a kinit
  :param principal: the principal to use when performing a kinit
  :param url: the URL to request
  :param cache_file_prefix: an identifier used to build the unique cache name for this request.
                            This ensures that multiple requests can use the same cache.
  :param krb_exec_search_paths: the search path to use for invoking kerberos binaries
  :param return_only_http_code: True to return only the HTTP code, False to return GET content
  :param caller_label: an identifier to give context into the caller of this module (used for logging)
  :param user: the user to invoke the curl command as
  :param connection_timeout: if specified, a connection timeout for curl (default 10 seconds)
  :param kinit_timer_ms: if specified, the time (in ms), before forcing a kinit even if the
                         klist cache is still valid.
  :return:
  """

  import uuid

  # start off false
  is_kinit_required = False

  # Create the kerberos credentials cache (ccache) file and set it in the environment to use
  # when executing curl. Use the md5 hash of the combination of the principal and keytab file
  # to generate a (relatively) unique cache filename so that we can use it as needed. Scope
  # this file by user in order to prevent sharing of cache files by multiple users.
  ccache_file_name = _md5("{0}|{1}".format(principal, keytab)).hexdigest()

  curl_krb_cache_path = os.path.join(tmp_dir, "curl_krb_cache")
  if not os.path.exists(curl_krb_cache_path):
    os.makedirs(curl_krb_cache_path)
  os.chmod(curl_krb_cache_path, 0777)

  ccache_file_path = "{0}{1}{2}_{3}_cc_{4}".format(curl_krb_cache_path, os.sep, cache_file_prefix, user, ccache_file_name)
  kerberos_env = {'KRB5CCNAME': ccache_file_path}

  # concurrent kinit's can cause the following error:
  # Internal credentials cache error while storing credentials while getting initial credentials
  kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS)
  kinit_lock.acquire()
  try:
    # If there are no tickets in the cache or they are expired, perform a kinit, else use what
    # is in the cache
    if krb_exec_search_paths:
      klist_path_local = get_klist_path(krb_exec_search_paths)
    else:
      klist_path_local = get_klist_path()

    # take a look at the last time kinit was run for the specified cache and force a new
    # kinit if it's time; this helps to avoid problems approaching ticket boundary when
    # executing a klist and then a curl
    last_kinit_time = _KINIT_CACHE_TIMES.get(ccache_file_name, 0)
    current_time = long(time.time())
    if current_time - kinit_timer_ms > last_kinit_time:
      is_kinit_required = True

    # if the time has not expired, double-check that the cache still has a valid ticket
    if not is_kinit_required:
      klist_command = "{0} -s {1}".format(klist_path_local, ccache_file_path)
      is_kinit_required = (shell.call(klist_command, user=user)[0] != 0)

    # if kinit is required, the perform the kinit
    if is_kinit_required:
      if krb_exec_search_paths:
        kinit_path_local = get_kinit_path(krb_exec_search_paths)
      else:
        kinit_path_local = get_kinit_path()

      logger.debug("Enabling Kerberos authentication for %s via GSSAPI using ccache at %s",
        caller_label, ccache_file_path)

      # kinit; there's no need to set a ticket timeout as this will use the default invalidation
      # configured in the krb5.conf - regenerating keytabs will not prevent an existing cache
      # from working correctly
      shell.checked_call("{0} -c {1} -kt {2} {3} > /dev/null".format(kinit_path_local,
        ccache_file_path, keytab, principal), user=user)

      # record kinit time
      _KINIT_CACHE_TIMES[ccache_file_name] = current_time
    else:
      # no kinit needed, use the cache
      logger.debug("Kerberos authentication for %s via GSSAPI already enabled using ccache at %s.",
        caller_label, ccache_file_path)
  finally:
    kinit_lock.release()

  # check if cookies dir exists, if not then create it
  cookies_dir = os.path.join(tmp_dir, "cookies")

  if not os.path.exists(cookies_dir):
    os.makedirs(cookies_dir)

  cookie_file_name = str(uuid.uuid4())
  cookie_file = os.path.join(cookies_dir, cookie_file_name)

  start_time = time.time()
  error_msg = None

  # setup timeouts for the request; ensure we use integers since that is what curl needs
  connection_timeout = int(connection_timeout)
  maximum_timeout = connection_timeout + 2

  try:
    if return_only_http_code:
      _, curl_stdout, curl_stderr = get_user_call_output(['curl', '-L', '-k', '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file, '-w',
                             '%{http_code}', url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout), '-o', '/dev/null'],
                             user=user, env=kerberos_env)
    else:
      curl_command = ['curl', '-L', '-k', '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file,
                      url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout)]
      # returns response body
      if len(method) > 0 and len(body) == 0 and len(header) == 0:
        curl_command.extend(['-X', method])

      elif len(method) > 0 and len(body) == 0 and len(header) > 0:
        curl_command.extend(['-H', header, '-X', method])

      elif len(method) > 0 and len(body) > 0 and len(header) == 0:
        curl_command.extend(['-X', method, '-d', body])

      elif len(method) > 0 and len(body) > 0 and len(header) > 0:
        curl_command.extend(['-H', header, '-X', method, '-d', body])

      _, curl_stdout, curl_stderr = get_user_call_output(curl_command, user=user, env=kerberos_env)

  except Fail:
    if logger.isEnabledFor(logging.DEBUG):
      logger.exception("Unable to make a curl request for {0}.".format(caller_label))
    raise
  finally:
    if os.path.isfile(cookie_file):
      os.remove(cookie_file)

  # empty quotes evaluates to false
  if curl_stderr:
    error_msg = curl_stderr

  time_millis = time.time() - start_time

  # empty quotes evaluates to false
  if curl_stdout:
    if return_only_http_code:
      return (int(curl_stdout), error_msg, time_millis)
    else:
      return (curl_stdout, error_msg, time_millis)

  logger.debug("The curl response for %s is empty; standard error = %s",
    caller_label, str(error_msg))

  return ("", error_msg, time_millis)
def get_check_command(oozie_url, host_name, configurations, parameters,
                      only_kinit):
    kerberos_env = None

    user = USER_DEFAULT
    if USER_KEY in configurations:
        user = configurations[USER_KEY]

    if is_security_enabled(configurations):
        # defaults
        user_keytab = USER_KEYTAB_DEFAULT
        user_principal = USER_PRINCIPAL_DEFAULT

        # check script params
        if USER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters:
            user_principal = parameters[USER_PRINCIPAL_SCRIPT_PARAM_KEY]
            user_principal = user_principal.replace('_HOST', host_name.lower())
        if USER_KEYTAB_SCRIPT_PARAM_KEY in parameters:
            user_keytab = parameters[USER_KEYTAB_SCRIPT_PARAM_KEY]

        # check configurations last as they should always take precedence
        if USER_PRINCIPAL_KEY in configurations:
            user_principal = configurations[USER_PRINCIPAL_KEY]
            user_principal = user_principal.replace('_HOST', host_name.lower())
        if USER_KEYTAB_KEY in configurations:
            user_keytab = configurations[USER_KEYTAB_KEY]

        # Create the kerberos credentials cache (ccache) file and set it in the environment to use
        # when executing curl
        env = Environment.get_instance()
        ccache_file = "{0}{1}oozie_alert_cc_{2}".format(
            env.tmp_dir, os.sep, os.getpid())
        kerberos_env = {'KRB5CCNAME': ccache_file}

        # Get the configured Kerberos executable search paths, if any
        kerberos_executable_search_paths = None
        if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
            kerberos_executable_search_paths = configurations[
                KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]

        klist_path_local = get_klist_path(kerberos_executable_search_paths)
        kinit_path_local = get_kinit_path(kerberos_executable_search_paths)
        kinit_part_command = format(
            "{kinit_path_local} -l 5m20s -c {ccache_file} -kt {user_keytab} {user_principal}; "
        )

        # Determine if we need to kinit by testing to see if the relevant cache exists and has
        # non-expired tickets.  Tickets are marked to expire after 5 minutes to help reduce the number
        # it kinits we do but recover quickly when keytabs are regenerated

        if only_kinit:
            kinit_command = kinit_part_command
        else:
            kinit_command = "{0} -s {1} || ".format(
                klist_path_local, ccache_file) + kinit_part_command

        # prevent concurrent kinit
        kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS)
        kinit_lock.acquire()
        try:
            Execute(kinit_command, environment=kerberos_env, user=user)
        finally:
            kinit_lock.release()

    # oozie configuration directory using a symlink
    oozie_config_directory = OOZIE_CONF_DIR_LEGACY
    if os.path.exists(OOZIE_CONF_DIR):
        oozie_config_directory = OOZIE_CONF_DIR

    command = "source {0}/oozie-env.sh ; oozie admin -oozie {1} -status".format(
        oozie_config_directory, oozie_url)

    return (command, kerberos_env, user)
def execute(configurations={}, parameters={}, host_name=None):
    """
  Returns a tuple containing the result code and a pre-formatted result label

  Keyword arguments:
  configurations (dictionary): a mapping of configuration key to value
  parameters (dictionary): a mapping of script parameter key to value
  host_name (string): the name of this host where the alert is running
  """

    LLAP_APP_STATUS_CMD_TIMEOUT = 0

    if configurations is None:
        return ('UNKNOWN',
                ['There were no configurations supplied to the script.'])

    result_code = None

    try:
        security_enabled = False
        if SECURITY_ENABLED_KEY in configurations:
            security_enabled = str(
                configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'

        check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT
        if CHECK_COMMAND_TIMEOUT_KEY in configurations:
            check_command_timeout = int(parameters[CHECK_COMMAND_TIMEOUT_KEY])

        hive_user = HIVE_USER_DEFAULT
        if HIVE_USER_KEY in configurations:
            hive_user = configurations[HIVE_USER_KEY]

        llap_app_name = LLAP_APP_NAME_DEFAULT
        if LLAP_APP_NAME_KEY in configurations:
            llap_app_name = configurations[LLAP_APP_NAME_KEY]

        if security_enabled:
            if HIVE_PRINCIPAL_KEY in configurations:
                llap_principal = configurations[HIVE_PRINCIPAL_KEY]
            else:
                llap_principal = HIVE_PRINCIPAL_DEFAULT
            llap_principal = llap_principal.replace('_HOST', host_name.lower())

            llap_keytab = HIVE_PRINCIPAL_KEYTAB_DEFAULT
            if HIVE_PRINCIPAL_KEYTAB_KEY in configurations:
                llap_keytab = configurations[HIVE_PRINCIPAL_KEYTAB_KEY]

            # Get the configured Kerberos executable search paths, if any
            if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
                kerberos_executable_search_paths = configurations[
                    KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
            else:
                kerberos_executable_search_paths = None

            kinit_path_local = get_kinit_path(kerberos_executable_search_paths)
            kinitcmd = format(
                "{kinit_path_local} -kt {llap_keytab} {llap_principal}; ")

            # prevent concurrent kinit
            kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS)
            kinit_lock.acquire()
            try:
                Execute(kinitcmd,
                        user=hive_user,
                        path=[
                            "/bin/", "/usr/bin/", "/usr/lib/hive/bin/",
                            "/usr/sbin/"
                        ],
                        timeout=10)
            finally:
                kinit_lock.release()

        start_time = time.time()
        if STACK_NAME in configurations and STACK_ROOT in configurations:
            stack_root = stack_tools.get_stack_root(configurations[STACK_NAME],
                                                    configurations[STACK_ROOT])

            llap_status_cmd = stack_root + format(
                "/current/hive-server2-hive2/bin/hive --service llapstatus --name {llap_app_name}  --findAppTimeout {LLAP_APP_STATUS_CMD_TIMEOUT}"
            )
        else:
            llap_status_cmd = STACK_ROOT_DEFAULT + format(
                "/current/hive-server2-hive2/bin/hive --service llapstatus --name {llap_app_name} --findAppTimeout {LLAP_APP_STATUS_CMD_TIMEOUT}"
            )

        code, output, error = shell.checked_call(llap_status_cmd,
                                                 user=hive_user,
                                                 stderr=subprocess.PIPE,
                                                 timeout=check_command_timeout,
                                                 logoutput=False)
        # Call for getting JSON
        llap_app_info = make_valid_json(output)

        if llap_app_info is None or 'state' not in llap_app_info:
            alert_label = traceback.format_exc()
            result_code = UKNOWN_STATUS_CODE
            return (result_code, [alert_label])

        retrieved_llap_app_state = llap_app_info['state'].upper()
        if retrieved_llap_app_state in ['RUNNING_ALL']:
            result_code = OK_RESULT_CODE
            total_time = time.time() - start_time
            alert_label = OK_MESSAGE.format(
                llap_app_state_dict.get(retrieved_llap_app_state,
                                        retrieved_llap_app_state), total_time)
        elif retrieved_llap_app_state in ['RUNNING_PARTIAL']:
            live_instances = 0
            desired_instances = 0
            percentInstancesUp = 0
            percent_desired_instances_to_be_up = 80
            # Get 'live' and 'desired' instances
            if 'liveInstances' not in llap_app_info or 'desiredInstances' not in llap_app_info:
                result_code = CRITICAL_RESULT_CODE
                total_time = time.time() - start_time
                alert_label = CRITICAL_MESSAGE_WITH_STATE.format(
                    llap_app_state_dict.get(retrieved_llap_app_state,
                                            retrieved_llap_app_state),
                    total_time)
                return (result_code, [alert_label])

            live_instances = llap_app_info['liveInstances']
            desired_instances = llap_app_info['desiredInstances']
            if live_instances < 0 or desired_instances <= 0:
                result_code = CRITICAL_RESULT_CODE
                total_time = time.time() - start_time
                alert_label = CRITICAL_MESSAGE_WITH_STATE.format(
                    llap_app_state_dict.get(retrieved_llap_app_state,
                                            retrieved_llap_app_state),
                    total_time)
                return (result_code, [alert_label])

            percentInstancesUp = float(
                live_instances) / desired_instances * 100
            if percentInstancesUp >= percent_desired_instances_to_be_up:
                result_code = OK_RESULT_CODE
                total_time = time.time() - start_time
                alert_label = MESSAGE_WITH_STATE_AND_INSTANCES.format(
                    llap_app_state_dict.get(retrieved_llap_app_state,
                                            retrieved_llap_app_state),
                    total_time, llap_app_info['liveInstances'],
                    llap_app_info['desiredInstances'])
            else:
                result_code = CRITICAL_RESULT_CODE
                total_time = time.time() - start_time
                alert_label = MESSAGE_WITH_STATE_AND_INSTANCES.format(
                    llap_app_state_dict.get(retrieved_llap_app_state,
                                            retrieved_llap_app_state),
                    total_time, llap_app_info['liveInstances'],
                    llap_app_info['desiredInstances'])
        else:
            result_code = CRITICAL_RESULT_CODE
            total_time = time.time() - start_time
            alert_label = CRITICAL_MESSAGE_WITH_STATE.format(
                llap_app_state_dict.get(retrieved_llap_app_state,
                                        retrieved_llap_app_state), total_time)
    except:
        alert_label = traceback.format_exc()
        traceback.format_exc()
        result_code = UKNOWN_STATUS_CODE
    return (result_code, [alert_label])
Beispiel #6
0
def execute(configurations={}, parameters={}, host_name=None):
    """
  Returns a tuple containing the result code and a pre-formatted result label

  Keyword arguments:
  configurations (dictionary): a mapping of configuration key to value
  parameters (dictionary): a mapping of script parameter key to value
  host_name (string): the name of this host where the alert is running
  """

    if configurations is None:
        return (('UNKNOWN',
                 ['There were no configurations supplied to the script.']))

    if not HIVE_METASTORE_URIS_KEY in configurations:
        return (('UNKNOWN',
                 ['Hive metastore uris were not supplied to the script.']))

    metastore_uris = configurations[HIVE_METASTORE_URIS_KEY].split(',')

    security_enabled = False
    if SECURITY_ENABLED_KEY in configurations:
        security_enabled = str(
            configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'

    check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT
    if CHECK_COMMAND_TIMEOUT_KEY in parameters:
        check_command_timeout = float(parameters[CHECK_COMMAND_TIMEOUT_KEY])

    # defaults
    smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
    smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT
    smokeuser = SMOKEUSER_DEFAULT

    # check script params
    if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters:
        smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY]

    if SMOKEUSER_SCRIPT_PARAM_KEY in parameters:
        smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY]

    if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters:
        smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY]

    # check configurations last as they should always take precedence
    if SMOKEUSER_PRINCIPAL_KEY in configurations:
        smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY]

    if SMOKEUSER_KEY in configurations:
        smokeuser = configurations[SMOKEUSER_KEY]

    result_code = None

    try:
        if security_enabled:
            if SMOKEUSER_KEYTAB_KEY in configurations:
                smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY]

            # Get the configured Kerberos executable search paths, if any
            if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
                kerberos_executable_search_paths = configurations[
                    KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
            else:
                kerberos_executable_search_paths = None

            kinit_path_local = get_kinit_path(kerberos_executable_search_paths)
            kinitcmd = format(
                "{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}; "
            )

            # prevent concurrent kinit
            kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS)
            kinit_lock.acquire()
            try:
                Execute(kinitcmd,
                        user=smokeuser,
                        path=[
                            "/bin/", "/usr/bin/", "/usr/lib/hive/bin/",
                            "/usr/sbin/"
                        ],
                        timeout=10)
            finally:
                kinit_lock.release()

        if host_name is None:
            host_name = socket.getfqdn()

        for uri in metastore_uris:
            if host_name in uri:
                metastore_uri = uri

        conf_dir = HIVE_CONF_DIR_LEGACY
        bin_dir = HIVE_BIN_DIR_LEGACY

        if STACK_ROOT in configurations:
            hive_conf_dir = configurations[STACK_ROOT] + format(
                "/current/hive-metastore/conf/conf.server")
            hive_bin_dir = configurations[STACK_ROOT] + format(
                "/current/hive-metastore/bin")

            if os.path.exists(hive_conf_dir):
                conf_dir = hive_conf_dir
                bin_dir = hive_bin_dir

        cmd = format("export HIVE_CONF_DIR='{conf_dir}' ; "
                     "hive --hiveconf hive.metastore.uris={metastore_uri}\
                 --hiveconf hive.metastore.client.connect.retry.delay=1\
                 --hiveconf hive.metastore.failure.retries=1\
                 --hiveconf hive.metastore.connect.retries=1\
                 --hiveconf hive.metastore.client.socket.timeout=14\
                 --hiveconf hive.execution.engine=mr -e 'show databases;'")

        start_time = time.time()

        try:
            Execute(
                cmd,
                user=smokeuser,
                path=["/bin/", "/usr/bin/", "/usr/sbin/", bin_dir],
                timeout=int(check_command_timeout),
                timeout_kill_strategy=TerminateStrategy.KILL_PROCESS_TREE,
            )

            total_time = time.time() - start_time

            result_code = 'OK'
            label = OK_MESSAGE.format(total_time)
        except:
            result_code = 'CRITICAL'
            label = CRITICAL_MESSAGE.format(host_name, traceback.format_exc())

    except:
        label = traceback.format_exc()
        result_code = 'UNKNOWN'

    return ((result_code, [label]))
Beispiel #7
0
def curl_krb_request(tmp_dir,
                     keytab,
                     principal,
                     url,
                     cache_file_prefix,
                     krb_exec_search_paths,
                     return_only_http_code,
                     caller_label,
                     user,
                     connection_timeout=CONNECTION_TIMEOUT_DEFAULT,
                     ca_certs=None,
                     kinit_timer_ms=DEFAULT_KERBEROS_KINIT_TIMER_MS,
                     method='',
                     body='',
                     header=''):
    """
    Makes a curl request using the kerberos credentials stored in a calculated cache file. The
    cache file is created by combining the supplied principal, keytab, user, and request name into
    a unique hash.

    This function will use the klist command to determine if the cache is expired and will perform
    a kinit if necessary. Additionally, it has an internal timer to force a kinit after a
    configurable amount of time. This is to prevent boundary issues where requests hit the edge
    of a ticket's lifetime.

    :param tmp_dir: the directory to use for storing the local kerberos cache for this request.
    :param keytab: the location of the keytab to use when performing a kinit
    :param principal: the principal to use when performing a kinit
    :param url: the URL to request
    :param cache_file_prefix: an identifier used to build the unique cache name for this request.
                              This ensures that multiple requests can use the same cache.
    :param krb_exec_search_paths: the search path to use for invoking kerberos binaries
    :param return_only_http_code: True to return only the HTTP code, False to return GET content
    :param caller_label: an identifier to give context into the caller of this module (used for logging)
    :param user: the user to invoke the curl command as
    :param connection_timeout: if specified, a connection timeout for curl (default 10 seconds)
    :param ca_certs: path to certificates
    :param kinit_timer_ms: if specified, the time (in ms), before forcing a kinit even if the
                           klist cache is still valid.
    :return:
    """

    import uuid
    # backward compatibility with old code and management packs, etc. All new code need pass ca_certs explicitly
    if ca_certs is None:
        try:
            from ambari_agent.AmbariConfig import AmbariConfig
            ca_certs = AmbariConfig.get_resolved_config(
            ).get_ca_cert_file_path()
        except:
            pass
    # start off false
    is_kinit_required = False

    # Create the kerberos credentials cache (ccache) file and set it in the environment to use
    # when executing curl. Use a hash of the combination of the principal and keytab file
    # to generate a (relatively) unique cache filename so that we can use it as needed. Scope
    # this file by user in order to prevent sharing of cache files by multiple users.
    ccache_file_name = HASH_ALGORITHM("{0}|{1}".format(principal,
                                                       keytab)).hexdigest()

    curl_krb_cache_path = os.path.join(tmp_dir, "curl_krb_cache")
    if not os.path.exists(curl_krb_cache_path):
        os.makedirs(curl_krb_cache_path)
    os.chmod(curl_krb_cache_path, 01777)

    ccache_file_path = "{0}{1}{2}_{3}_cc_{4}".format(curl_krb_cache_path,
                                                     os.sep, cache_file_prefix,
                                                     user, ccache_file_name)
    kerberos_env = {'KRB5CCNAME': ccache_file_path}

    # concurrent kinit's can cause the following error:
    # Internal credentials cache error while storing credentials while getting initial credentials
    kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS)
    kinit_lock.acquire()
    try:
        # If there are no tickets in the cache or they are expired, perform a kinit, else use what
        # is in the cache
        if krb_exec_search_paths:
            klist_path_local = get_klist_path(krb_exec_search_paths)
        else:
            klist_path_local = get_klist_path()

        # take a look at the last time kinit was run for the specified cache and force a new
        # kinit if it's time; this helps to avoid problems approaching ticket boundary when
        # executing a klist and then a curl
        last_kinit_time = _KINIT_CACHE_TIMES.get(ccache_file_name, 0)
        current_time = long(time.time())
        if current_time - kinit_timer_ms > last_kinit_time:
            is_kinit_required = True

        # if the time has not expired, double-check that the cache still has a valid ticket
        if not is_kinit_required:
            klist_command = "{0} -s {1}".format(klist_path_local,
                                                ccache_file_path)
            is_kinit_required = (shell.call(klist_command, user=user)[0] != 0)

        # if kinit is required, the perform the kinit
        if is_kinit_required:
            if krb_exec_search_paths:
                kinit_path_local = get_kinit_path(krb_exec_search_paths)
            else:
                kinit_path_local = get_kinit_path()

            logger.debug(
                "Enabling Kerberos authentication for %s via GSSAPI using ccache at %s",
                caller_label, ccache_file_path)

            # kinit; there's no need to set a ticket timeout as this will use the default invalidation
            # configured in the krb5.conf - regenerating keytabs will not prevent an existing cache
            # from working correctly
            shell.checked_call("{0} -c {1} -kt {2} {3} > /dev/null".format(
                kinit_path_local, ccache_file_path, keytab, principal),
                               user=user)

            # record kinit time
            _KINIT_CACHE_TIMES[ccache_file_name] = current_time
        else:
            # no kinit needed, use the cache
            logger.debug(
                "Kerberos authentication for %s via GSSAPI already enabled using ccache at %s.",
                caller_label, ccache_file_path)
    finally:
        kinit_lock.release()

    # check if cookies dir exists, if not then create it
    cookies_dir = os.path.join(tmp_dir, "cookies")

    if not os.path.exists(cookies_dir):
        os.makedirs(cookies_dir)

    cookie_file_name = str(uuid.uuid4())
    cookie_file = os.path.join(cookies_dir, cookie_file_name)

    start_time = time.time()
    error_msg = None

    # setup timeouts for the request; ensure we use integers since that is what curl needs
    connection_timeout = int(connection_timeout)
    maximum_timeout = connection_timeout + 2

    ssl_options = ['-k']
    if ca_certs:
        ssl_options = ['--cacert', ca_certs]
    try:
        if return_only_http_code:
            _, curl_stdout, curl_stderr = get_user_call_output(
                ['curl', '--location-trusted'] + ssl_options + [
                    '--negotiate', '-u', admin_username + ':' + admin_password,
                    '-b', cookie_file, '-c', cookie_file, '-w', '%{http_code}',
                    url, '--connect-timeout',
                    str(connection_timeout), '--max-time',
                    str(maximum_timeout), '-o', '/dev/null'
                ],
                user=user,
                env=kerberos_env)
        else:
            curl_command = ['curl', '--location-trusted'] + ssl_options + [
                '--negotiate', '-u', admin_username + ':' + admin_password,
                '-b', cookie_file, '-c', cookie_file, url, '--connect-timeout',
                str(connection_timeout), '--max-time',
                str(maximum_timeout)
            ]
            # returns response body
            if len(method) > 0 and len(body) == 0 and len(header) == 0:
                curl_command.extend(['-X', method])

            elif len(method) > 0 and len(body) == 0 and len(header) > 0:
                curl_command.extend(['-H', header, '-X', method])

            elif len(method) > 0 and len(body) > 0 and len(header) == 0:
                curl_command.extend(['-X', method, '-d', body])

            elif len(method) > 0 and len(body) > 0 and len(header) > 0:
                curl_command.extend(['-H', header, '-X', method, '-d', body])

            _, curl_stdout, curl_stderr = get_user_call_output(
                curl_command, user=user, env=kerberos_env)

    except Fail:
        if logger.isEnabledFor(logging.DEBUG):
            logger.exception(
                "Unable to make a curl request for {0}.".format(caller_label))
        raise
    finally:
        if os.path.isfile(cookie_file):
            os.remove(cookie_file)

    # empty quotes evaluates to false
    if curl_stderr:
        error_msg = curl_stderr

    time_millis = time.time() - start_time

    # empty quotes evaluates to false
    if curl_stdout:
        if return_only_http_code:
            return (int(curl_stdout), error_msg, time_millis)
        else:
            return (curl_stdout, error_msg, time_millis)

    logger.debug("The curl response for %s is empty; standard error = %s",
                 caller_label, str(error_msg))

    return ("", error_msg, time_millis)
def execute(configurations={}, parameters={}, host_name=None):
    """
    Returns a tuple containing the result code and a pre-formatted result label

    Keyword arguments:
    configurations (dictionary): a mapping of configuration key to value
    parameters (dictionary): a mapping of script parameter key to value
    host_name (string): the name of this host where the alert is running
    """

    if configurations is None:
        return (UKNOWN_STATUS_CODE,
                ['There were no configurations supplied to the script.'])

    result_code = None

    try:
        use_external_hbase = False
        if USE_EXTERNAL_HBASE_KEY in configurations:
            use_external_hbase = str(
                configurations[USE_EXTERNAL_HBASE_KEY]).upper() == 'TRUE'

        if use_external_hbase:
            return (OK_RESULT_CODE, ['use_external_hbase set to true.'])

        is_hbase_system_service_launch = False
        if ATS_HBASE_SYSTEM_SERVICE_LAUNCH_KEY in configurations:
            is_hbase_system_service_launch = str(
                configurations[ATS_HBASE_SYSTEM_SERVICE_LAUNCH_KEY]).upper(
                ) == 'TRUE'

        yarn_hbase_user = "******"
        if ATS_HBASE_USER_KEY in configurations:
            yarn_hbase_user = configurations[ATS_HBASE_USER_KEY]

        if not is_hbase_system_service_launch:
            yarn_hbase_pid_dir_prefix = ""
            if ATS_HBASE_PID_DIR_PREFIX in configurations:
                yarn_hbase_pid_dir_prefix = configurations[
                    ATS_HBASE_PID_DIR_PREFIX]
            else:
                return (UKNOWN_STATUS_CODE, [
                    'The yarn_hbase_pid_dir_prefix is a required parameter.'
                ])
            yarn_hbase_pid_dir = format(
                "{yarn_hbase_pid_dir_prefix}/{yarn_hbase_user}")
            master_pid_file = format(
                "{yarn_hbase_pid_dir}/hbase-{yarn_hbase_user}-master.pid")
            rs_pid_file = format(
                "{yarn_hbase_pid_dir}/hbase-{yarn_hbase_user}-regionserver.pid"
            )

            if host_name is None:
                host_name = socket.getfqdn()

            master_process_running = is_monitor_process_live(master_pid_file)
            rs_process_running = is_monitor_process_live(rs_pid_file)

            alert_state = OK_RESULT_CODE if master_process_running and rs_process_running else CRITICAL_RESULT_CODE

            alert_label = 'ATS embedded HBase is running on {0}' if master_process_running and rs_process_running else 'ATS embedded HBase is NOT running on {0}'
            alert_label = alert_label.format(host_name)

            return (alert_state, [alert_label])
        else:
            security_enabled = False
            if SECURITY_ENABLED_KEY in configurations:
                security_enabled = str(
                    configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'

            check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT
            if CHECK_COMMAND_TIMEOUT_KEY in configurations:
                check_command_timeout = int(
                    parameters[CHECK_COMMAND_TIMEOUT_KEY])

            if security_enabled:
                if ATS_HBASE_PRINCIPAL_KEY in configurations:
                    ats_hbase_app_principal = configurations[
                        ATS_HBASE_PRINCIPAL_KEY]
                    ats_hbase_app_principal = ats_hbase_app_principal.replace(
                        '_HOST', host_name.lower())

                if ATS_HBASE_PRINCIPAL_KEYTAB_KEY in configurations:
                    ats_hbase_app_keytab = configurations[
                        ATS_HBASE_PRINCIPAL_KEYTAB_KEY]

                # Get the configured Kerberos executable search paths, if any
                if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
                    kerberos_executable_search_paths = configurations[
                        KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
                else:
                    kerberos_executable_search_paths = None

                kinit_path_local = get_kinit_path(
                    kerberos_executable_search_paths)
                kinitcmd = format(
                    "{kinit_path_local} -kt {ats_hbase_app_keytab} {ats_hbase_app_principal}; "
                )

                # prevent concurrent kinit
                kinit_lock = global_lock.get_lock(
                    global_lock.LOCK_TYPE_KERBEROS)
                kinit_lock.acquire()
                try:
                    Execute(kinitcmd,
                            user=yarn_hbase_user,
                            path=["/bin/", "/usr/bin/", "/usr/sbin/"],
                            timeout=10)
                finally:
                    kinit_lock.release()

            start_time = time.time()
            ats_hbase_status_cmd = STACK_ROOT_DEFAULT + format(
                "/current/hadoop-yarn-client/bin/yarn app -status ats-hbase")

            code, output, error = shell.checked_call(
                ats_hbase_status_cmd,
                user=yarn_hbase_user,
                stderr=subprocess.PIPE,
                timeout=check_command_timeout,
                logoutput=False)
            if code != 0:
                alert_label = traceback.format_exc()
                result_code = UKNOWN_STATUS_CODE
                return (result_code, [alert_label])

            # Call for getting JSON
            ats_hbase_app_info = make_valid_json(output)

            if ats_hbase_app_info is None:
                alert_label = CRITICAL_MESSAGE
                result_code = CRITICAL_RESULT_CODE
                return (result_code, [alert_label])

            if 'state' not in ats_hbase_app_info:
                alert_label = traceback.format_exc()
                result_code = UKNOWN_STATUS_CODE
                return (result_code, [alert_label])

            retrieved_ats_hbase_app_state = ats_hbase_app_info['state'].upper()

            if retrieved_ats_hbase_app_state in ['STABLE']:
                result_code = OK_RESULT_CODE
                total_time = time.time() - start_time
                alert_label = OK_MESSAGE.format(retrieved_ats_hbase_app_state,
                                                total_time)
            else:
                result_code = CRITICAL_RESULT_CODE
                total_time = time.time() - start_time
                alert_label = CRITICAL_MESSAGE_WITH_STATE.format(
                    retrieved_ats_hbase_app_state, total_time)
    except:
        alert_label = traceback.format_exc()
        traceback.format_exc()
        result_code = CRITICAL_RESULT_CODE
    return (result_code, [alert_label])
Beispiel #9
0
def check_thrift_port_sasl(address,
                           port,
                           hive_auth="NOSASL",
                           key=None,
                           kinitcmd=None,
                           smokeuser='******',
                           hive_user='******',
                           transport_mode="binary",
                           http_endpoint="cliservice",
                           ssl=False,
                           ssl_keystore=None,
                           ssl_password=None,
                           check_command_timeout=30,
                           ldap_username="",
                           ldap_password=""):
    """
  Hive thrift SASL port check
  """

    # check params to be correctly passed, if not - try to cast them
    if isinstance(port, str):
        port = int(port)

    if isinstance(ssl, str):
        ssl = bool(ssl)

    # to pass as beeline argument
    ssl_str = str(ssl).lower()
    beeline_url = [
        'jdbc:hive2://{address}:{port}/', "transportMode={transport_mode}"
    ]

    # append url according to used transport
    if transport_mode == "http":
        beeline_url.append('httpPath={http_endpoint}')

    # append url according to used auth
    if hive_auth == "NOSASL":
        beeline_url.append('auth=noSasl')

    credential_str = "-n {hive_user}"

    # append username and password for LDAP
    if hive_auth == "LDAP":
        # password might contain special characters that need to be escaped
        quoted_ldap_password = quote_bash_args(ldap_password)
        credential_str = "-n {ldap_username} -p {quoted_ldap_password!p}"

    # append url according to ssl configuration
    if ssl and ssl_keystore is not None and ssl_password is not None:
        beeline_url.extend([
            'ssl={ssl_str}', 'sslTrustStore={ssl_keystore}',
            'trustStorePassword={ssl_password!p}'
        ])

    # append url according to principal and execute kinit
    if kinitcmd and hive_auth != "LDAP":
        beeline_url.append('principal={key}')

        # prevent concurrent kinit
        kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS)
        kinit_lock.acquire()
        try:
            Execute(kinitcmd, user=smokeuser)
        finally:
            kinit_lock.release()

    # -n the user to connect as (ignored when using the hive principal in the URL, can be different from the user running the beeline command)
    # -e ';' executes a SQL commmand of NOOP
    cmd = ("! (beeline -u '%s' %s -e ';' 2>&1 | awk '{print}' | grep -vz -i " + \
           "-e 'Connected to:' -e 'Transaction isolation:' -e 'inactive HS2 instance; use service discovery')") % \
          (format(";".join(beeline_url)), format(credential_str))

    Execute(
        cmd,
        user=smokeuser,
        path=["/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/"],
        timeout=check_command_timeout,
        timeout_kill_strategy=TerminateStrategy.KILL_PROCESS_TREE,
    )
def execute(configurations={}, parameters={}, host_name=None):
    """
    Returns a tuple containing the result code and a pre-formatted result label

    Keyword arguments:
    configurations (dictionary): a mapping of configuration key to value
    parameters (dictionary): a mapping of script parameter key to value
    host_name (string): the name of this host where the alert is running
    """

    spark_home = os.path.join(stack_root, "current", 'spark-client')

    if configurations is None:
        return ('UNKNOWN',
                ['There were no configurations supplied to the script.'])

    transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT
    if HIVE_SERVER_TRANSPORT_MODE_KEY in configurations:
        transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY]

    port = THRIFT_PORT_DEFAULT
    if transport_mode.lower(
    ) == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in configurations:
        port = int(configurations[HIVE_SERVER_THRIFT_PORT_KEY])

    security_enabled = False
    if SECURITY_ENABLED_KEY in configurations:
        security_enabled = str(
            configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'

    hive_kerberos_keytab = None
    if HIVE_SERVER2_KERBEROS_KEYTAB in configurations:
        hive_kerberos_keytab = configurations[HIVE_SERVER2_KERBEROS_KEYTAB]

    if host_name is None:
        host_name = socket.getfqdn()

    hive_principal = None
    if HIVE_SERVER2_PRINCIPAL_KEY in configurations:
        hive_principal = configurations[HIVE_SERVER2_PRINCIPAL_KEY]
        hive_principal = hive_principal.replace('_HOST', host_name.lower())

    # Get the configured Kerberos executable search paths, if any
    if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
        kerberos_executable_search_paths = configurations[
            KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
    else:
        kerberos_executable_search_paths = None

    kinit_path_local = get_kinit_path(kerberos_executable_search_paths)

    hiveruser = HIVEUSER_DEFAULT

    if security_enabled:
        kinitcmd = format(
            "{kinit_path_local} -kt {hive_kerberos_keytab} {hive_principal}; ")
        # prevent concurrent kinit
        kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS)
        kinit_lock.acquire()
        try:
            Execute(kinitcmd, user=hiveruser)
        finally:
            kinit_lock.release()

    result_code = None
    try:
        if host_name is None:
            host_name = socket.getfqdn()

        if security_enabled:
            beeline_url = [
                "'jdbc:hive2://{host_name}:{port}/default;principal={hive_principal}'",
                "transportMode={transport_mode}"
            ]
        else:
            beeline_url = [
                "'jdbc:hive2://{host_name}:{port}/default'",
                "transportMode={transport_mode}"
            ]
        # append url according to used transport

        beeline_cmd = os.path.join(spark_home, "bin", "beeline")
        cmd = "! beeline -u %s  -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'" % \
         (format(" ".join(beeline_url)))

        start_time = time.time()
        try:
            Execute(cmd,
                    user=hiveruser,
                    path=[beeline_cmd],
                    timeout=CHECK_COMMAND_TIMEOUT_DEFAULT)
            total_time = time.time() - start_time
            result_code = 'OK'
            label = OK_MESSAGE.format(total_time, port)
        except:
            result_code = 'CRITICAL'
            label = CRITICAL_MESSAGE.format(host_name, port,
                                            traceback.format_exc())
    except:
        label = traceback.format_exc()
        result_code = 'UNKNOWN'

    return (result_code, [label])
Beispiel #11
0
def check_thrift_port_sasl(address,
                           port,
                           hive_auth="NOSASL",
                           key=None,
                           kinitcmd=None,
                           smokeuser='******',
                           transport_mode="binary",
                           http_endpoint="cliservice",
                           ssl=False,
                           ssl_keystore=None,
                           ssl_password=None,
                           check_command_timeout=30,
                           ldap_username="",
                           ldap_password=""):
    """
  Hive thrift SASL port check
  """

    # check params to be correctly passed, if not - try to cast them
    if isinstance(port, str):
        port = int(port)

    if isinstance(ssl, str):
        ssl = bool(ssl)

    # to pass as beeline argument
    ssl_str = str(ssl).lower()
    beeline_url = [
        'jdbc:hive2://{address}:{port}/', "transportMode={transport_mode}"
    ]

    # append url according to used transport
    if transport_mode == "http":
        beeline_url.append('httpPath={http_endpoint}')

    # append url according to used auth
    if hive_auth == "NOSASL":
        beeline_url.append('auth=noSasl')

    credential_str = ""
    # append username and password for LDAP
    if hive_auth == "LDAP":
        credential_str = "-n '{ldap_username}' -p '{ldap_password!p}'"

    # append url according to ssl configuration
    if ssl and ssl_keystore is not None and ssl_password is not None:
        beeline_url.extend([
            'ssl={ssl_str}', 'sslTrustStore={ssl_keystore}',
            'trustStorePassword={ssl_password!p}'
        ])

    # append url according to principal and execute kinit
    if kinitcmd and hive_auth != "LDAP":
        beeline_url.append('principal={key}')

        # prevent concurrent kinit
        kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS)
        kinit_lock.acquire()
        try:
            Execute(kinitcmd, user=smokeuser)
        finally:
            kinit_lock.release()

    cmd = "! beeline -u '%s' %s -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'" % \
          (format(";".join(beeline_url)), format(credential_str))

    Execute(
        cmd,
        user=smokeuser,
        path=["/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/"],
        timeout=check_command_timeout,
        timeout_kill_strategy=TerminateStrategy.KILL_PROCESS_TREE,
    )
def execute(configurations={}, parameters={}, host_name=None):
    """
    Returns a tuple containing the result code and a pre-formatted result label

    Keyword arguments:
    configurations (dictionary): a mapping of configuration key to value
    parameters (dictionary): a mapping of script parameter key to value
    host_name (string): the name of this host where the alert is running
    """

    if configurations is None:
        return ('UNKNOWN',
                ['There were no configurations supplied to the script.'])

    LIVY_PORT_DEFAULT = 8999

    port = LIVY_PORT_DEFAULT
    if LIVY_SERVER_PORT_KEY in configurations:
        port = int(configurations[LIVY_SERVER_PORT_KEY])

    if LIVY_SERVER_HOST_KEY in configurations:
        host_name = str(configurations[LIVY_SERVER_HOST_KEY])

    if host_name is None:
        host_name = socket.getfqdn()

    livyuser = configurations[SMOKEUSER_KEY]

    security_enabled = False
    if SECURITY_ENABLED_KEY in configurations:
        security_enabled = str(
            configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'

    smokeuser_kerberos_keytab = None
    if SMOKEUSER_KEYTAB_KEY in configurations:
        smokeuser_kerberos_keytab = configurations[SMOKEUSER_KEYTAB_KEY]

    if host_name is None:
        host_name = socket.getfqdn()

    smokeuser_principal = None
    if SMOKEUSER_PRINCIPAL_KEY in configurations:
        smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY]
        smokeuser_principal = smokeuser_principal.replace(
            '_HOST', host_name.lower())

    # Get the configured Kerberos executable search paths, if any
    if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
        kerberos_executable_search_paths = configurations[
            KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
    else:
        kerberos_executable_search_paths = None

    kinit_path_local = get_kinit_path(kerberos_executable_search_paths)

    if security_enabled:
        kinitcmd = format(
            "{kinit_path_local} -kt {smokeuser_kerberos_keytab} {smokeuser_principal}; "
        )
        # prevent concurrent kinit
        kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS)
        kinit_lock.acquire()
        try:
            Execute(kinitcmd, user=livyuser)
        finally:
            kinit_lock.release()

    http_scheme = 'https' if LIVY_SSL_ENABLED_KEY in configurations else 'http'
    result_code = None
    try:
        start_time = time.time()
        try:
            livy2_livyserver_host = str(host_name)

            livy_cmd = format(
                "curl -s -o /dev/null -w'%{{http_code}}' --negotiate -u: -k {http_scheme}://{livy2_livyserver_host}:{port}/sessions | grep 200 "
            )

            Execute(livy_cmd,
                    tries=3,
                    try_sleep=1,
                    logoutput=True,
                    user=livyuser)

            total_time = time.time() - start_time
            result_code = 'OK'
            label = OK_MESSAGE.format(total_time, port)
        except:
            result_code = 'CRITICAL'
            label = CRITICAL_MESSAGE.format(host_name, port,
                                            traceback.format_exc())
    except:
        label = traceback.format_exc()
        result_code = 'UNKNOWN'

    return (result_code, [label])
Beispiel #13
0
def execute(configurations={}, parameters={}, host_name=None):
    """
  Returns a tuple containing the result code and a pre-formatted result label

  Keyword arguments:
  configurations (dictionary): a mapping of configuration key to value
  parameters (dictionary): a mapping of script parameter key to value
  host_name (string): the name of this host where the alert is running
  """

    if configurations is None:
        return (('UNKNOWN',
                 ['There were no configurations supplied to the script.']))

    if not HIVE_METASTORE_URIS_KEY in configurations:
        return (('UNKNOWN',
                 ['Hive metastore uris were not supplied to the script.']))

    metastore_uris = configurations[HIVE_METASTORE_URIS_KEY].split(',')

    security_enabled = False
    if SECURITY_ENABLED_KEY in configurations:
        security_enabled = str(
            configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'

    check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT
    if CHECK_COMMAND_TIMEOUT_KEY in parameters:
        check_command_timeout = float(parameters[CHECK_COMMAND_TIMEOUT_KEY])

    # defaults
    smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
    smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT
    smokeuser = SMOKEUSER_DEFAULT

    # check script params
    if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters:
        smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY]

    if SMOKEUSER_SCRIPT_PARAM_KEY in parameters:
        smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY]

    if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters:
        smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY]

    # check configurations last as they should always take precedence
    if SMOKEUSER_PRINCIPAL_KEY in configurations:
        smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY]

    if SMOKEUSER_KEY in configurations:
        smokeuser = configurations[SMOKEUSER_KEY]

    result_code = None

    try:
        if security_enabled:
            if SMOKEUSER_KEYTAB_KEY in configurations:
                smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY]

            # Get the configured Kerberos executable search paths, if any
            if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
                kerberos_executable_search_paths = configurations[
                    KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
            else:
                kerberos_executable_search_paths = None

            kinit_path_local = get_kinit_path(kerberos_executable_search_paths)
            kinitcmd = format(
                "{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}; "
            )

            # prevent concurrent kinit
            kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS)
            kinit_lock.acquire()
            try:
                Execute(kinitcmd,
                        user=smokeuser,
                        path=[
                            "/bin/", "/usr/bin/", "/usr/lib/hive/bin/",
                            "/usr/sbin/"
                        ],
                        timeout=10)
            finally:
                kinit_lock.release()

        if host_name is None:
            host_name = socket.getfqdn()

        port = None

        for uri in metastore_uris:
            if host_name in uri:
                parts = urlparse(uri)
                port = parts.port

        start_time = time.time()

        try:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            result = sock.connect_ex((host_name, port))

            total_time = time.time() - start_time

            if result == 0:
                result_code = 'OK'
                label = OK_MESSAGE.format(total_time)
            else:
                result_code = 'CRITICAL'
                label = NOT_LISTENING_MESSAGE.format(host_name, port)
        except:
            result_code = 'CRITICAL'
            label = CRITICAL_MESSAGE.format(host_name, traceback.format_exc())

    except:
        label = traceback.format_exc()
        result_code = 'UNKNOWN'

    return ((result_code, [label]))