def _collect(self): """ Low level function to collect alert data. The result is a tuple as: res[0] = the result code res[1] = the list of arguments supplied to the reporting text for the result code """ if self.metric_info is None: raise Exception("Could not determine result. Specific metric collector is not defined.") if self.uri_property_keys is None: raise Exception("Could not determine result. URL(s) were not defined.") # use the URI lookup keys to get a final URI value to query alert_uri = self._get_uri_from_structure(self.uri_property_keys) logger.debug("[Alert][{0}] Calculated metric URI to be {1} (ssl={2})".format( self.get_name(), alert_uri.uri, str(alert_uri.is_ssl_enabled))) host = BaseAlert.get_host_from_url(alert_uri.uri) if host is None: host = self.host_name try: port = int(get_port_from_url(alert_uri.uri)) except: port = 6188 collect_result = None value_list = [] if isinstance(self.metric_info, AmsMetric): raw_data_points, http_code = self._load_metric(alert_uri.is_ssl_enabled, host, port, self.metric_info) if not raw_data_points and http_code not in [200, 307]: collect_result = self.RESULT_UNKNOWN value_list.append('HTTP {0} response (metrics unavailable)'.format(str(http_code))) elif not raw_data_points and http_code in [200, 307]: raise Exception("[Alert][{0}] Unable to extract JSON from HTTP response".format(self.get_name())) else: data_points = self.metric_info.calculate_value(raw_data_points) compute_result = self.metric_info.calculate_compute(data_points) value_list.append(compute_result) collect_result = self._get_result(value_list[0] if compute_result is None else compute_result) logger.debug("[Alert][{0}] Computed result = {1}".format(self.get_name(), str(value_list))) return (collect_result, value_list)
def _collect(self): if self.metric_info is None: raise Exception("Could not determine result. Specific metric collector is not defined.") if self.uri_property_keys is None: raise Exception("Could not determine result. URL(s) were not defined.") # use the URI lookup keys to get a final URI value to query alert_uri = self._get_uri_from_structure(self.uri_property_keys) logger.debug("[Alert][{0}] Calculated metric URI to be {1} (ssl={2})".format( self.get_name(), alert_uri.uri, str(alert_uri.is_ssl_enabled))) host = BaseAlert.get_host_from_url(alert_uri.uri) if host is None: host = self.host_name port = 80 # probably not very realistic try: port = int(get_port_from_url(alert_uri.uri)) except: pass collect_result = None value_list = [] if isinstance(self.metric_info, JmxMetric): jmx_property_values, http_code = self._load_jmx(alert_uri.is_ssl_enabled, host, port, self.metric_info) if not jmx_property_values and http_code in [200, 307]: collect_result = self.RESULT_UNKNOWN value_list.append('HTTP {0} response (metrics unavailable)'.format(str(http_code))) elif not jmx_property_values and http_code not in [200, 307]: raise Exception("[Alert][{0}] Unable to extract JSON from JMX response".format(self.get_name())) else: value_list.extend(jmx_property_values) check_value = self.metric_info.calculate(value_list) value_list.append(check_value) collect_result = self._get_result(value_list[0] if check_value is None else check_value) logger.debug("[Alert][{0}] Resolved values = {1}".format(self.get_name(), str(value_list))) return (collect_result, value_list)
def _build_web_query(self, alert_uri): """ Builds a URL out of the URI structure. If the URI is already a URL of the form http[s]:// then this will return the URI as the URL; otherwise, it will build the URL from the URI structure's elements """ # shortcut if the supplied URI starts with the information needed string_uri = str(alert_uri.uri) if string_uri.startswith('http://') or string_uri.startswith('https://'): return alert_uri.uri uri_path = None if string_uri and string_uri != str(None): uri_path = get_path_from_url(string_uri) # start building the URL manually host = BaseAlert.get_host_from_url(alert_uri.uri) if host is None: host = self.host_name # maybe slightly realistic port = 80 if alert_uri.is_ssl_enabled is True: port = 443 # extract the port try: port = int(get_port_from_url(alert_uri.uri)) except: pass scheme = 'http' if alert_uri.is_ssl_enabled is True: scheme = 'https' if OSCheck.is_windows_family(): # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1 host = resolve_address(host) if uri_path: return "{0}://{1}:{2}/{3}".format(scheme, host, str(port), uri_path) else: return "{0}://{1}:{2}".format(scheme, host, str(port))
def _collect(self): if self.metric_info is None: raise Exception("Could not determine result. Specific metric collector is not defined.") if self.uri_property_keys is None: raise Exception("Could not determine result. URL(s) were not defined.") # use the URI lookup keys to get a final URI value to query alert_uri = self._get_uri_from_structure(self.uri_property_keys) logger.debug("[Alert][{0}] Calculated metric URI to be {1} (ssl={2})".format( self.get_name(), alert_uri.uri, str(alert_uri.is_ssl_enabled))) host = BaseAlert.get_host_from_url(alert_uri.uri) if host is None: host = self.host_name port = 80 # probably not very realistic try: port = int(get_port_from_url(alert_uri.uri)) except: pass collect_result = None value_list = [] if isinstance(self.metric_info, JmxMetric): value_list.extend(self._load_jmx(alert_uri.is_ssl_enabled, host, port, self.metric_info)) check_value = self.metric_info.calculate(value_list) value_list.append(check_value) collect_result = self.__get_result(value_list[0] if check_value is None else check_value) logger.debug("[Alert][{0}] Resolved values = {1}".format( self.get_name(), str(value_list))) return (collect_result, value_list)
def _build_web_query(self, alert_uri): """ Builds a URL out of the URI structure. If the URI is already a URL of the form http[s]:// then this will return the URI as the URL; otherwise, it will build the URL from the URI structure's elements """ # shortcut if the supplied URI starts with the information needed string_uri = str(alert_uri.uri) if string_uri.startswith('http://') or string_uri.startswith( 'https://'): return alert_uri.uri # start building the URL manually host = BaseAlert.get_host_from_url(alert_uri.uri) if host is None: host = self.host_name # maybe slightly realistic port = 80 if alert_uri.is_ssl_enabled is True: port = 443 # extract the port try: port = int(get_port_from_url(alert_uri.uri)) except: pass scheme = 'http' if alert_uri.is_ssl_enabled is True: scheme = 'https' if OSCheck.is_windows_family(): # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1 host = resolve_address(host) return "{0}://{1}:{2}".format(scheme, host, str(port))
namenode_address = format('hdfs://{dfs_ha_nameservices}') namenode_hosts = default("/clusterHostInfo/namenode_host", None) if type(namenode_hosts) is list: namenode_host = namenode_hosts[0] else: namenode_host = namenode_hosts has_namenode = not namenode_host == None namenode_http_port = "50070" namenode_rpc_port = "8020" if has_namenode: if 'dfs.namenode.http-address' in config['configurations']['hdfs-site']: namenode_http_port = get_port_from_url(config['configurations']['hdfs-site']['dfs.namenode.http-address']) if dfs_ha_enabled and namenode_rpc: namenode_rpc_port = get_port_from_url(namenode_rpc) else: if 'dfs.namenode.rpc-address' in config['configurations']['hdfs-site']: namenode_rpc_port = get_port_from_url(config['configurations']['hdfs-site']['dfs.namenode.rpc-address']) rm_hosts = default("/clusterHostInfo/rm_host", None) if type(rm_hosts) is list: rm_host = rm_hosts[0] else: rm_host = rm_hosts has_rm = not rm_host == None jt_rpc_port = "8050" rm_port = "8080"
jdbc_libs_dir = format("{hive_lib}/native/lib64") lib_dir_available = os.path.exists(jdbc_libs_dir) if sqla_db_used: jars_path_in_archive = format("{tmp_dir}/sqla-client-jdbc/java/*") libs_path_in_archive = format("{tmp_dir}/sqla-client-jdbc/native/lib64/*") downloaded_custom_connector = format("{tmp_dir}/{jdbc_jar_name}") libs_in_hive_lib = format("{jdbc_libs_dir}/*") # Start, Common Hosts and Ports ambari_server_hostname = config['ambariLevelParams']['ambari_server_host'] hive_metastore_hosts = default('/clusterHostInfo/hive_metastore_hosts', []) hive_metastore_host = hive_metastore_hosts[0] if len(hive_metastore_hosts) > 0 else None hive_metastore_port = get_port_from_url(config['configurations']['hive-site']['hive.metastore.uris']) hive_server_hosts = default("/clusterHostInfo/hive_server_hosts", []) hive_server_host = hive_server_hosts[0] if len(hive_server_hosts) > 0 else None hive_server_interactive_hosts = default('/clusterHostInfo/hive_server_interactive_hosts', []) hive_server_interactive_host = hive_server_interactive_hosts[0] if len(hive_server_interactive_hosts) > 0 else None hive_server_interactive_ha = True if len(hive_server_interactive_hosts) > 1 else False # End, Common Hosts and Ports hive_transport_mode = config['configurations']['hive-site']['hive.server2.transport.mode'] if hive_transport_mode.lower() == "http": hive_server_port = config['configurations']['hive-site']['hive.server2.thrift.http.port'] else: hive_server_port = default('/configurations/hive-site/hive.server2.thrift.port',"10000")
def _collect(self): """ Low level function to collect alert data. The result is a tuple as: res[0] = the result code res[1] = the list of arguments supplied to the reporting text for the result code """ if self.metric_info is None: raise Exception( "Could not determine result. Specific metric collector is not defined." ) if self.uri_property_keys is None: raise Exception( "Could not determine result. URL(s) were not defined.") # use the URI lookup keys to get a final URI value to query alert_uri = self._get_uri_from_structure(self.uri_property_keys) if logger.isEnabledFor(logging.DEBUG): logger.debug( "[Alert][{0}] Calculated metric URI to be {1} (ssl={2})". format(self.get_name(), alert_uri.uri, str(alert_uri.is_ssl_enabled))) host = inet_utils.get_host_from_url(alert_uri.uri) if host is None: host = self.host_name try: port = int(get_port_from_url(alert_uri.uri)) except: port = 6188 collect_result = None value_list = [] if isinstance(self.metric_info, AmsMetric): raw_data_points, http_code = self._load_metric( alert_uri.is_ssl_enabled, host, port, self.metric_info) if not raw_data_points and http_code not in [200, 307]: collect_result = self.RESULT_UNKNOWN value_list.append( 'HTTP {0} response (metrics unavailable)'.format( str(http_code))) elif not raw_data_points and http_code in [200, 307]: raise Exception( "[Alert][{0}] Unable to extract JSON from HTTP response". format(self.get_name())) else: data_points = self.metric_info.calculate_value(raw_data_points) compute_result = self.metric_info.calculate_compute( data_points) value_list.append(compute_result) collect_result = self._get_result( value_list[0] if compute_result is None else compute_result ) if logger.isEnabledFor(logging.DEBUG): logger.debug("[Alert][{0}] Computed result = {1}".format( self.get_name(), str(value_list))) return (collect_result, value_list)
def _collect(self): # can be parameterized or static # if not parameterized, this will return the static value uri_value = self._get_configuration_value(self.uri) if uri_value is None: uri_value = self.host_name logger.debug("[Alert][{0}] Setting the URI to this host since it wasn't specified".format( self.get_name())) # in some cases, a single property is a comma-separated list like # host1:8080,host2:8081,host3:8083 uri_value_array = uri_value.split(',') if len(uri_value_array) > 1: for item in uri_value_array: if self.host_name in item: uri_value = item if logger.isEnabledFor(logging.DEBUG): logger.debug("[Alert][{0}] Extracted {1} as the host name while parsing the CSV URI {2}".format( self.get_name(), uri_value, str(uri_value_array))) break host = BaseAlert.get_host_from_url(uri_value) if host is None: host = self.host_name try: port = int(get_port_from_url(uri_value)) except: if self.default_port is None: label = 'Unable to determine port from URI {0}'.format(uri_value) return (self.RESULT_UNKNOWN, [label]) port = self.default_port if logger.isEnabledFor(logging.DEBUG): logger.debug("[Alert][{0}] Checking {1} on port {2}".format( self.get_name(), host, str(port))) try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(self.critical_timeout) if OSCheck.is_windows_family(): # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1 host = resolve_address(host) start_time = time.time() s.connect((host, port)) end_time = time.time() milliseconds = end_time - start_time seconds = milliseconds / 1000.0 # not sure why this happens sometimes, but we don't always get a # socket exception if the connect() is > than the critical threshold if seconds >= self.critical_timeout: return (self.RESULT_CRITICAL, ['Socket Timeout', host, port]) result = self.RESULT_OK if seconds >= self.warning_timeout: result = self.RESULT_WARNING return (result, [seconds, port]) except Exception as e: return (self.RESULT_CRITICAL, [str(e), host, port]) finally: if s is not None: try: s.close() except: # no need to log a close failure pass
def _collect(self): # can be parameterized or static # if not parameterized, this will return the static value uri_value = self._get_configuration_value(self.uri) host_not_specified = False if uri_value is None: host_not_specified = True uri_value = self.host_name logger.debug("[Alert][{0}] Setting the URI to this host since it wasn't specified".format( self.get_name())) # in some cases, a single property is a comma-separated list like # host1:8080,host2:8081,host3:8083 uri_value_array = uri_value.split(',') if len(uri_value_array) > 1: for item in uri_value_array: if self.host_name in item: uri_value = item if logger.isEnabledFor(logging.DEBUG): logger.debug("[Alert][{0}] Extracted {1} as the host name while parsing the CSV URI {2}".format( self.get_name(), uri_value, str(uri_value_array))) break host = BaseAlert.get_host_from_url(uri_value) if host is None or host == "localhost" or host == "0.0.0.0": host = self.host_name host_not_specified = True hosts = [host] # If host is not specified in the uri, hence we are using current host name # then also add public host name as a fallback. if host_not_specified and host.lower() == self.host_name.lower() \ and self.host_name.lower() != self.public_host_name.lower(): hosts.append(self.public_host_name) if logger.isEnabledFor(logging.DEBUG): logger.debug("[Alert][{0}] List of hosts = {1}".format(self.get_name(), hosts)) try: port = int(get_port_from_url(uri_value)) except: if self.default_port is None: label = 'Unable to determine port from URI {0}'.format(uri_value) return (self.RESULT_UNKNOWN, [label]) port = self.default_port exceptions = [] for host in hosts: if logger.isEnabledFor(logging.DEBUG): logger.debug("[Alert][{0}] Checking {1} on port {2}".format( self.get_name(), host, str(port))) s = None try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(self.critical_timeout) if OSCheck.is_windows_family(): # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1 host = resolve_address(host) start_time = time.time() s.connect((host, port)) if self.socket_command is not None: s.sendall(self.socket_command) data = s.recv(1024) if self.socket_command_response is not None and data != self.socket_command_response: raise Exception("Expected response {0}, Actual response {1}".format( self.socket_command_response, data)) end_time = time.time() milliseconds = end_time - start_time seconds = milliseconds / 1000.0 # not sure why this happens sometimes, but we don't always get a # socket exception if the connect() is > than the critical threshold if seconds >= self.critical_timeout: return (self.RESULT_CRITICAL, ['Socket Timeout', host, port]) result = self.RESULT_OK if seconds >= self.warning_timeout: result = self.RESULT_WARNING return (result, [seconds, port]) except Exception as e: exceptions.append(e) finally: if s is not None: try: s.close() except: # no need to log a close failure pass if exceptions: return (self.RESULT_CRITICAL, [str(exceptions[0]), hosts[0], port])
check_db_connection_jar = format("/usr/lib/ambari-agent/{check_db_connection_jar_name}") hive_jdbc_drivers_list = [ "com.microsoft.sqlserver.jdbc.SQLServerDriver", "com.mysql.jdbc.Driver", "org.postgresql.Driver", "oracle.jdbc.driver.OracleDriver", ] downloaded_custom_connector = format("{tmp_dir}/{jdbc_jar_name}") prepackaged_ojdbc_symlink = format("{hive_lib}/ojdbc6.jar") templeton_port = config["configurations"]["webhcat-site"]["templeton.port"] # common hive_metastore_hosts = config["clusterHostInfo"]["hive_metastore_host"] hive_metastore_host = hive_metastore_hosts[0] hive_metastore_port = get_port_from_url(config["configurations"]["hive-site"]["hive.metastore.uris"]) # "9083" ambari_server_hostname = config["clusterHostInfo"]["ambari_server_host"][0] hive_server_host = config["clusterHostInfo"]["hive_server_host"][0] hive_server_hosts = config["clusterHostInfo"]["hive_server_host"] hive_transport_mode = config["configurations"]["hive-site"]["hive.server2.transport.mode"] if hive_transport_mode.lower() == "http": hive_server_port = config["configurations"]["hive-site"]["hive.server2.thrift.http.port"] else: hive_server_port = default("/configurations/hive-site/hive.server2.thrift.port", "10000") hive_url = format("jdbc:hive2://{hive_server_host}:{hive_server_port}") hive_http_endpoint = default("/confiurations/hive-site/hive.server2.thrift.http.path", "cliservice") hive_server_principal = config["configurations"]["hive-site"]["hive.server2.authentication.kerberos.principal"] hive_server2_authentication = config["configurations"]["hive-site"]["hive.server2.authentication"]
# params from nifi-toolkit-env nifi_toolkit_java_options = config['configurations']['nifi-toolkit-env'][ 'nifi_toolkit_java_options'] if 'nifi-toolkit-env' in config[ 'configurations'] else '-Xms128m -Xmx256m' nifi_toolkit_conf_dir = '/etc/nifi-toolkit' # autodetect jdk home jdk64_home = config['ambariLevelParams']['java_home'] # autodetect ambari server for metrics if 'metrics_collector_hosts' in config['clusterHostInfo']: metrics_collector_host = str( config['clusterHostInfo']['metrics_collector_hosts'][0]) metrics_collector_port = str( get_port_from_url(config['configurations']['ams-site'] ['timeline.metrics.service.webapp.address'])) else: metrics_collector_host = '' metrics_collector_port = '' # detect zookeeper_quorum zookeeper_port = default('/configurations/zoo.cfg/clientPort', None) # get comma separated list of zookeeper hosts from clusterHostInfo index = 0 zookeeper_quorum = "" zk_hosts_property = 'zookeeper_hosts' if 'zookeeper_hosts' in config[ 'clusterHostInfo'] else "zookeeper_server_hosts" for host in config['clusterHostInfo'][zk_hosts_property]: zookeeper_quorum += host + ":" + str(zookeeper_port) index += 1 if index < len(config['clusterHostInfo'][zk_hosts_property]):
templeton_port = config['configurations']['webhcat-site']['templeton.port'] #constants for type2 jdbc jdbc_libs_dir = format("{hive_lib}/native/lib64") lib_dir_available = os.path.exists(jdbc_libs_dir) if sqla_db_used: jars_path_in_archive = format("{tmp_dir}/sqla-client-jdbc/java/*") libs_path_in_archive = format("{tmp_dir}/sqla-client-jdbc/native/lib64/*") downloaded_custom_connector = format("{tmp_dir}/sqla-client-jdbc.tar.gz") libs_in_hive_lib = format("{jdbc_libs_dir}/*") #common hive_metastore_hosts = config['clusterHostInfo']['hive_metastore_host'] hive_metastore_host = hive_metastore_hosts[0] hive_metastore_port = get_port_from_url(config['configurations']['hive-site']['hive.metastore.uris']) #"9083" ambari_server_hostname = config['clusterHostInfo']['ambari_server_host'][0] hive_server_host = config['clusterHostInfo']['hive_server_host'][0] hive_server_hosts = config['clusterHostInfo']['hive_server_host'] hive_transport_mode = config['configurations']['hive-site']['hive.server2.transport.mode'] if hive_transport_mode.lower() == "http": hive_server_port = config['configurations']['hive-site']['hive.server2.thrift.http.port'] else: hive_server_port = default('/configurations/hive-site/hive.server2.thrift.port',"10000") hive_url = format("jdbc:hive2://{hive_server_host}:{hive_server_port}") hive_http_endpoint = default('/confiurations/hive-site/hive.server2.thrift.http.path', "cliservice") hive_server_principal = config['configurations']['hive-site']['hive.server2.authentication.kerberos.principal'] hive_server2_authentication = config['configurations']['hive-site']['hive.server2.authentication']