Example #1
0
    def _build_web_query(self, alert_uri):
        """
    Builds a URL out of the URI structure. If the URI is already a URL of
    the form http[s]:// then this will return the URI as the URL; otherwise,
    it will build the URL from the URI structure's elements
    """
        # shortcut if the supplied URI starts with the information needed
        string_uri = str(alert_uri.uri)
        if string_uri.startswith('http://') or string_uri.startswith(
                'https://'):
            return alert_uri.uri

        # start building the URL manually
        host = BaseAlert.get_host_from_url(alert_uri.uri)
        if host is None:
            host = self.host_name

        # maybe slightly realistic
        port = 80
        if alert_uri.is_ssl_enabled is True:
            port = 443

        # extract the port
        try:
            port = int(get_port_from_url(alert_uri.uri))
        except:
            pass

        scheme = 'http'
        if alert_uri.is_ssl_enabled is True:
            scheme = 'https'
        if OSCheck.is_windows_family():
            # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1
            host = resolve_address(host)
        return "{0}://{1}:{2}".format(scheme, host, str(port))
Example #2
0
  def _build_web_query(self, alert_uri):
    """
    Builds a URL out of the URI structure. If the URI is already a URL of
    the form http[s]:// then this will return the URI as the URL; otherwise,
    it will build the URL from the URI structure's elements
    """
    # shortcut if the supplied URI starts with the information needed
    string_uri = str(alert_uri.uri)
    if string_uri.startswith('http://') or string_uri.startswith('https://'):
      return alert_uri.uri

    uri_path = None
    if string_uri and string_uri != str(None):
      uri_path = get_path_from_url(string_uri)

    # start building the URL manually
    host = BaseAlert.get_host_from_url(alert_uri.uri)
    if host is None:
      host = self.host_name

    # maybe slightly realistic
    port = 80
    if alert_uri.is_ssl_enabled is True:
      port = 443

    # extract the port
    try:
      port = int(get_port_from_url(alert_uri.uri))
    except:
      pass

    scheme = 'http'
    if alert_uri.is_ssl_enabled is True:
      scheme = 'https'

    if OSCheck.is_windows_family():
      # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1
      host = resolve_address(host)

    if uri_path:
      return "{0}://{1}:{2}/{3}".format(scheme, host, str(port), uri_path)
    else:
      return "{0}://{1}:{2}".format(scheme, host, str(port))
Example #3
0
def execute(parameters=None, host_name=None):
    """
  Returns a tuple containing the result code and a pre-formatted result label

  Keyword arguments:
  parameters (dictionary): a mapping of parameter key to value
  host_name (string): the name of this host where the alert is running
  """
    result_code = RESULT_CODE_UNKNOWN

    if parameters is None:
        return (result_code,
                ['There were no parameters supplied to the script.'])

    scheme = 'http'
    http_uri = None
    https_uri = None
    http_policy = 'HTTP_ONLY'

    if NODEMANAGER_HTTP_ADDRESS_KEY in parameters:
        http_uri = parameters[NODEMANAGER_HTTP_ADDRESS_KEY]

    if NODEMANAGER_HTTPS_ADDRESS_KEY in parameters:
        https_uri = parameters[NODEMANAGER_HTTPS_ADDRESS_KEY]

    if YARN_HTTP_POLICY_KEY in parameters:
        http_policy = parameters[YARN_HTTP_POLICY_KEY]

    # determine the right URI and whether to use SSL
    uri = http_uri
    if http_policy == 'HTTPS_ONLY':
        scheme = 'https'

        if https_uri is not None:
            uri = https_uri

    label = ''
    url_response = None
    node_healthy = 'false'
    total_time = 0

    # some yarn-site structures don't have the web ui address
    if uri is None:
        if host_name is None:
            host_name = socket.getfqdn()

        uri = '{0}:{1}'.format(host_name, NODEMANAGER_DEFAULT_PORT)
    if OSCheck.is_windows_family():
        uri_host, uri_port = uri.split(':')
        # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1
        uri_host = resolve_address(uri_host)
        uri = '{0}:{1}'.format(uri_host, uri_port)
    try:
        query = "{0}://{1}/ws/v1/node/info".format(scheme, uri)

        # execute the query for the JSON that includes templeton status
        url_response = urllib2.urlopen(query)
    except:
        label = CRITICAL_CONNECTION_MESSAGE.format(uri)
        return (RESULT_CODE_CRITICAL, [label])

    # URL response received, parse it
    try:
        json_response = json.loads(url_response.read())
        node_healthy = json_response['nodeInfo']['nodeHealthy']

        # convert boolean to string
        node_healthy = str(node_healthy)
    except:
        return (RESULT_CODE_CRITICAL, [query])

    # proper JSON received, compare against known value
    if node_healthy.lower() == 'true':
        result_code = RESULT_CODE_OK
        label = OK_MESSAGE
    else:
        result_code = RESULT_CODE_CRITICAL
        label = CRITICAL_NODEMANAGER_STATUS_MESSAGE.format(node_healthy)

    return (result_code, [label])
def execute(configurations={}, parameters={}, host_name=None):
  """
  Returns a tuple containing the result code and a pre-formatted result label

  Keyword arguments:
  configurations (dictionary): a mapping of configuration key to value
  parameters (dictionary): a mapping of script parameter key to value
  host_name (string): the name of this host where the alert is running
  """
  result_code = RESULT_CODE_UNKNOWN

  if configurations is None:
    return (result_code, ['There were no configurations supplied to the script.'])

  scheme = 'http'
  http_uri = None
  https_uri = None
  http_policy = 'HTTP_ONLY'

  if SMOKEUSER_KEY in configurations:
    smokeuser = configurations[SMOKEUSER_KEY]
    
  security_enabled = False
  if SECURITY_ENABLED_KEY in configurations:
    security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'

  kerberos_keytab = None
  if KERBEROS_KEYTAB in configurations:
    kerberos_keytab = configurations[KERBEROS_KEYTAB]

  kerberos_principal = None
  if KERBEROS_PRINCIPAL in configurations:
    kerberos_principal = configurations[KERBEROS_PRINCIPAL]
    kerberos_principal = kerberos_principal.replace('_HOST', host_name)

  if NODEMANAGER_HTTP_ADDRESS_KEY in configurations:
    http_uri = configurations[NODEMANAGER_HTTP_ADDRESS_KEY]

  if NODEMANAGER_HTTPS_ADDRESS_KEY in configurations:
    https_uri = configurations[NODEMANAGER_HTTPS_ADDRESS_KEY]

  if YARN_HTTP_POLICY_KEY in configurations:
    http_policy = configurations[YARN_HTTP_POLICY_KEY]


  # parse script arguments
  connection_timeout = CONNECTION_TIMEOUT_DEFAULT
  if CONNECTION_TIMEOUT_KEY in parameters:
    connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])


  # determine the right URI and whether to use SSL
  uri = http_uri
  if http_policy == 'HTTPS_ONLY':
    scheme = 'https'

    if https_uri is not None:
      uri = https_uri

  label = ''
  url_response = None
  node_healthy = 'false'
  total_time = 0

  # some yarn-site structures don't have the web ui address
  if uri is None:
    if host_name is None:
      host_name = socket.getfqdn()

    uri = '{0}:{1}'.format(host_name, NODEMANAGER_DEFAULT_PORT)
    
  if OSCheck.is_windows_family():
    uri_host, uri_port = uri.split(':')
    # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1
    uri_host = resolve_address(uri_host)
    uri = '{0}:{1}'.format(uri_host, uri_port)

  query = "{0}://{1}/ws/v1/node/info".format(scheme,uri)

  try:
    if kerberos_principal is not None and kerberos_keytab is not None and security_enabled:
      env = Environment.get_instance()

      # curl requires an integer timeout
      curl_connection_timeout = int(connection_timeout)

      url_response, error_msg, time_millis  = curl_krb_request(env.tmp_dir, kerberos_keytab, kerberos_principal,
        query, "nm_health_alert", None, False, "NodeManager Health", smokeuser,
        connection_timeout=curl_connection_timeout)

      json_response = json.loads(url_response)
    else:
      # execute the query for the JSON that includes templeton status
      url_response = urllib2.urlopen(query, timeout=connection_timeout)
      json_response = json.loads(url_response.read())
  except urllib2.HTTPError, httpError:
    label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query,
      str(httpError))

    return (RESULT_CODE_CRITICAL, [label])
Example #5
0
  def _collect(self):
    # can be parameterized or static
    # if not parameterized, this will return the static value
    uri_value = self._get_configuration_value(self.uri)

    if uri_value is None:
      uri_value = self.host_name
      logger.debug("[Alert][{0}] Setting the URI to this host since it wasn't specified".format(
        self.get_name()))

    # in some cases, a single property is a comma-separated list like
    # host1:8080,host2:8081,host3:8083
    uri_value_array = uri_value.split(',')
    if len(uri_value_array) > 1:
      for item in uri_value_array:
        if self.host_name in item:
          uri_value = item
          if logger.isEnabledFor(logging.DEBUG):
            logger.debug("[Alert][{0}] Extracted {1} as the host name while parsing the CSV URI {2}".format(
              self.get_name(), uri_value, str(uri_value_array)))
          break


    host = BaseAlert.get_host_from_url(uri_value)
    if host is None:
      host = self.host_name

    try:
      port = int(get_port_from_url(uri_value))
    except:
      if self.default_port is None:
        label = 'Unable to determine port from URI {0}'.format(uri_value)
        return (self.RESULT_UNKNOWN, [label])

      port = self.default_port


    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("[Alert][{0}] Checking {1} on port {2}".format(
        self.get_name(), host, str(port)))
    
    try:
      s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
      s.settimeout(self.critical_timeout)

      if OSCheck.is_windows_family():
        # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1
        host = resolve_address(host)

      start_time = time.time()
      s.connect((host, port))
      end_time = time.time()
      milliseconds = end_time - start_time
      seconds = milliseconds / 1000.0

      # not sure why this happens sometimes, but we don't always get a
      # socket exception if the connect() is > than the critical threshold
      if seconds >= self.critical_timeout:
        return (self.RESULT_CRITICAL, ['Socket Timeout', host, port])

      result = self.RESULT_OK
      if seconds >= self.warning_timeout:
        result = self.RESULT_WARNING

      return (result, [seconds, port])
    except Exception as e:
      return (self.RESULT_CRITICAL, [str(e), host, port])
    finally:
      if s is not None:
        try:
          s.close()
        except:
          # no need to log a close failure
          pass
def execute(configurations={}, parameters={}, host_name=None):
    """
  Returns a tuple containing the result code and a pre-formatted result label

  Keyword arguments:
  configurations (dictionary): a mapping of configuration key to value
  parameters (dictionary): a mapping of script parameter key to value
  host_name (string): the name of this host where the alert is running
  """
    result_code = RESULT_CODE_UNKNOWN

    if configurations is None:
        return (result_code,
                ['There were no configurations supplied to the script.'])

    scheme = 'http'
    http_uri = None
    https_uri = None
    http_policy = 'HTTP_ONLY'

    if NODEMANAGER_HTTP_ADDRESS_KEY in configurations:
        http_uri = configurations[NODEMANAGER_HTTP_ADDRESS_KEY]

    if NODEMANAGER_HTTPS_ADDRESS_KEY in configurations:
        https_uri = configurations[NODEMANAGER_HTTPS_ADDRESS_KEY]

    if YARN_HTTP_POLICY_KEY in configurations:
        http_policy = configurations[YARN_HTTP_POLICY_KEY]

    # parse script arguments
    connection_timeout = CONNECTION_TIMEOUT_DEFAULT
    if CONNECTION_TIMEOUT_KEY in parameters:
        connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])

    # determine the right URI and whether to use SSL
    uri = http_uri
    if http_policy == 'HTTPS_ONLY':
        scheme = 'https'

        if https_uri is not None:
            uri = https_uri

    label = ''
    url_response = None
    node_healthy = 'false'
    total_time = 0

    # some yarn-site structures don't have the web ui address
    if uri is None:
        if host_name is None:
            host_name = socket.getfqdn()

        uri = '{0}:{1}'.format(host_name, NODEMANAGER_DEFAULT_PORT)

    if OSCheck.is_windows_family():
        uri_host, uri_port = uri.split(':')
        # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1
        uri_host = resolve_address(uri_host)
        uri = '{0}:{1}'.format(uri_host, uri_port)

    query = "{0}://{1}/ws/v1/node/info".format(scheme, uri)

    try:
        # execute the query for the JSON that includes templeton status
        url_response = urllib2.urlopen(query, timeout=connection_timeout)
    except urllib2.HTTPError, httpError:
        label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query,
                                                    str(httpError))

        return (RESULT_CODE_CRITICAL, [label])
Example #7
0
  def _collect(self):
    # can be parameterized or static
    # if not parameterized, this will return the static value
    uri_value = self._get_configuration_value(self.uri)

    host_not_specified = False
    if uri_value is None:
      host_not_specified = True
      uri_value = self.host_name
      logger.debug("[Alert][{0}] Setting the URI to this host since it wasn't specified".format(
        self.get_name()))

    # in some cases, a single property is a comma-separated list like
    # host1:8080,host2:8081,host3:8083
    uri_value_array = uri_value.split(',')
    if len(uri_value_array) > 1:
      for item in uri_value_array:
        if self.host_name in item:
          uri_value = item
          if logger.isEnabledFor(logging.DEBUG):
            logger.debug("[Alert][{0}] Extracted {1} as the host name while parsing the CSV URI {2}".format(
              self.get_name(), uri_value, str(uri_value_array)))
          break


    host = BaseAlert.get_host_from_url(uri_value)
    if host is None or host == "localhost" or host == "0.0.0.0":
      host = self.host_name
      host_not_specified = True

    hosts = [host]
    # If host is not specified in the uri, hence we are using current host name
    # then also add public host name as a fallback.  
    if host_not_specified and host.lower() == self.host_name.lower() \
      and self.host_name.lower() != self.public_host_name.lower():
      hosts.append(self.public_host_name)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("[Alert][{0}] List of hosts = {1}".format(self.get_name(), hosts))

    try:
      port = int(get_port_from_url(uri_value))
    except:
      if self.default_port is None:
        label = 'Unable to determine port from URI {0}'.format(uri_value)
        return (self.RESULT_UNKNOWN, [label])

      port = self.default_port

    exceptions = []

    for host in hosts:
      if logger.isEnabledFor(logging.DEBUG):
        logger.debug("[Alert][{0}] Checking {1} on port {2}".format(
          self.get_name(), host, str(port)))

      s = None
      try:
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.settimeout(self.critical_timeout)

        if OSCheck.is_windows_family():
          # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1
          host = resolve_address(host)

        start_time = time.time()
        s.connect((host, port))
        if self.socket_command is not None:
          s.sendall(self.socket_command)
          data = s.recv(1024)
          if self.socket_command_response is not None and data != self.socket_command_response:
            raise Exception("Expected response {0}, Actual response {1}".format(
              self.socket_command_response, data))
        end_time = time.time()
        milliseconds = end_time - start_time
        seconds = milliseconds / 1000.0

        # not sure why this happens sometimes, but we don't always get a
        # socket exception if the connect() is > than the critical threshold
        if seconds >= self.critical_timeout:
          return (self.RESULT_CRITICAL, ['Socket Timeout', host, port])

        result = self.RESULT_OK
        if seconds >= self.warning_timeout:
          result = self.RESULT_WARNING

        return (result, [seconds, port])
      except Exception as e:
        exceptions.append(e)
      finally:
        if s is not None:
          try:
            s.close()
          except:
            # no need to log a close failure
            pass

    if exceptions:
      return (self.RESULT_CRITICAL, [str(exceptions[0]), hosts[0], port])
def execute(configurations={}, parameters={}, host_name=None):
  """
  Returns a tuple containing the result code and a pre-formatted result label

  Keyword arguments:
  configurations (dictionary): a mapping of configuration key to value
  parameters (dictionary): a mapping of script parameter key to value
  host_name (string): the name of this host where the alert is running
  """
  result_code = RESULT_CODE_UNKNOWN

  if configurations is None:
    return (result_code, ['There were no configurations supplied to the script.'])

  scheme = 'http'
  http_uri = None
  https_uri = None
  http_policy = 'HTTP_ONLY'

  if NODEMANAGER_HTTP_ADDRESS_KEY in configurations:
    http_uri = configurations[NODEMANAGER_HTTP_ADDRESS_KEY]

  if NODEMANAGER_HTTPS_ADDRESS_KEY in configurations:
    https_uri = configurations[NODEMANAGER_HTTPS_ADDRESS_KEY]

  if YARN_HTTP_POLICY_KEY in configurations:
    http_policy = configurations[YARN_HTTP_POLICY_KEY]


  # parse script arguments
  connection_timeout = CONNECTION_TIMEOUT_DEFAULT
  if CONNECTION_TIMEOUT_KEY in parameters:
    connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])


  # determine the right URI and whether to use SSL
  uri = http_uri
  if http_policy == 'HTTPS_ONLY':
    scheme = 'https'

    if https_uri is not None:
      uri = https_uri

  label = ''
  url_response = None
  node_healthy = 'false'
  total_time = 0

  # some yarn-site structures don't have the web ui address
  if uri is None:
    if host_name is None:
      host_name = socket.getfqdn()

    uri = '{0}:{1}'.format(host_name, NODEMANAGER_DEFAULT_PORT)
    
  if OSCheck.is_windows_family():
    uri_host, uri_port = uri.split(':')
    # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1
    uri_host = resolve_address(uri_host)
    uri = '{0}:{1}'.format(uri_host, uri_port)

  query = "{0}://{1}/ws/v1/node/info".format(scheme,uri)

  try:
    # execute the query for the JSON that includes templeton status
    url_response = urllib2.urlopen(query, timeout=connection_timeout)
  except urllib2.HTTPError, httpError:
    label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query,
      str(httpError))

    return (RESULT_CODE_CRITICAL, [label])
def execute(configurations={}, parameters={}, host_name=None):
  """
  Returns a tuple containing the result code and a pre-formatted result label

  Keyword arguments:
  configurations (dictionary): a mapping of configuration key to value
  parameters (dictionary): a mapping of script parameter key to value
  host_name (string): the name of this host where the alert is running
  """
  result_code = RESULT_CODE_UNKNOWN

  if configurations is None:
    return (result_code, ['There were no configurations supplied to the script.'])

  scheme = 'http'
  http_uri = None
  https_uri = None
  http_policy = 'HTTP_ONLY'

  if SMOKEUSER_KEY in configurations:
    smokeuser = configurations[SMOKEUSER_KEY]

  executable_paths = None
  if EXECUTABLE_SEARCH_PATHS in configurations:
    executable_paths = configurations[EXECUTABLE_SEARCH_PATHS]

  security_enabled = False
  if SECURITY_ENABLED_KEY in configurations:
    security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'

  kerberos_keytab = None
  if KERBEROS_KEYTAB in configurations:
    kerberos_keytab = configurations[KERBEROS_KEYTAB]

  kerberos_principal = None
  if KERBEROS_PRINCIPAL in configurations:
    kerberos_principal = configurations[KERBEROS_PRINCIPAL]
    kerberos_principal = kerberos_principal.replace('_HOST', host_name)

  if NODEMANAGER_HTTP_ADDRESS_KEY in configurations:
    http_uri = configurations[NODEMANAGER_HTTP_ADDRESS_KEY]

  if NODEMANAGER_HTTPS_ADDRESS_KEY in configurations:
    https_uri = configurations[NODEMANAGER_HTTPS_ADDRESS_KEY]

  if YARN_HTTP_POLICY_KEY in configurations:
    http_policy = configurations[YARN_HTTP_POLICY_KEY]


  # parse script arguments
  connection_timeout = CONNECTION_TIMEOUT_DEFAULT
  if CONNECTION_TIMEOUT_KEY in parameters:
    connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])


  # determine the right URI and whether to use SSL
  uri = http_uri
  if http_policy == 'HTTPS_ONLY':
    scheme = 'https'

    if https_uri is not None:
      uri = https_uri

  label = ''
  url_response = None
  node_healthy = 'false'
  total_time = 0

  # some yarn-site structures don't have the web ui address
  if uri is None:
    if host_name is None:
      host_name = socket.getfqdn()

    uri = '{0}:{1}'.format(host_name, NODEMANAGER_DEFAULT_PORT)
    
  if OSCheck.is_windows_family():
    uri_host, uri_port = uri.split(':')
    # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1
    uri_host = resolve_address(uri_host)
    uri = '{0}:{1}'.format(uri_host, uri_port)

  query = "{0}://{1}/ws/v1/node/info".format(scheme,uri)

  try:
    if kerberos_principal is not None and kerberos_keytab is not None and security_enabled:
      env = Environment.get_instance()

      # curl requires an integer timeout
      curl_connection_timeout = int(connection_timeout)

      url_response, error_msg, time_millis  = curl_krb_request(env.tmp_dir, kerberos_keytab, kerberos_principal,
        query, "nm_health_alert", executable_paths, False, "NodeManager Health", smokeuser,
        connection_timeout=curl_connection_timeout)

      json_response = json.loads(url_response)
    else:
      # execute the query for the JSON that includes templeton status
      url_response = urllib2.urlopen(query, timeout=connection_timeout)
      json_response = json.loads(url_response.read())
  except urllib2.HTTPError, httpError:
    label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query,
      str(httpError), traceback.format_exc())

    return (RESULT_CODE_CRITICAL, [label])