def _check_is_all_crash(self): """ original method of checking whether all service is alive not used right now """ heartbeats = heartbeatdb.get_heartbeats(self._heart_beat_config["check_duration"]) heartbeats = misc.cursor_to_array(heartbeats) heartbeats = misc.select(heartbeats, fields=["ip", "handler_name", "pid"]) heartbeats = misc.distinct(heartbeats) handler_counts_per_machine = misc.count(heartbeats, key = lambda heartbeat : "%s_%s" % (heartbeat["ip"], heartbeat["handler_name"])) heartbeatdb.save_handler_counts(simplejson.dumps(handler_counts_per_machine), type="handler_counts_per_machine") handler_counts = misc.count(heartbeats, key = lambda heartbeat : heartbeat["handler_name"]) heartbeatdb.save_handler_counts(simplejson.dumps(handler_counts), type="handler_counts_total") logging.debug("current alive handler counts", handler_counts) #Note: currently we will send email if no handler is running if len(filter(lambda handler_name : handler_counts.get(handler_name, 0) == 0, self._heart_beat_config["required_handlers"])) > 0: if self._last_notification_time is None or datetime.datetime.now() - self._last_notification_time >= \ datetime.timedelta(seconds=self._heart_beat_config["notification_duration"]): email_body = "some handlers are not running:\n %s" % handler_counts_per_machine self._send_email( self._heart_beat_config["email_server"], self._heart_beat_config["email_from"], self._heart_beat_config["email_tos"], self._heart_beat_config["email_title"], email_body) self._last_notification_time = datetime.datetime.now() logging.error("heartbeat server detects required handlers are not fully running, notification email sent", handler_counts_per_machine)
def _get_results_by_cache(cache_key, query_func, force, *args): now = datetime.datetime.utcnow() if not force and global_db_caches.has_key(cache_key) and \ now - global_db_caches[cache_key]["last_retrieved_time"] < datetime.timedelta(seconds = common_settings.db_cache_expiry_duration): results = global_db_caches[cache_key]["results"] else: results = misc.cursor_to_array(query_func(*args)) db_cache = {"last_retrieved_time" : now, "results" : results} global_db_caches[cache_key] = db_cache return results
def _check(self): heartbeats = heartbeatdb.get_heartbeats(self._heart_beat_config["check_duration"]) heartbeats = misc.cursor_to_array(heartbeats) heartbeats = misc.select(heartbeats, fields=["ip", "handler_name", "pid"]) heartbeats = misc.distinct(heartbeats, key=str) name_count = self.count_by_name(heartbeats) config = self.load_cfg(self._heart_beat_config["config_path"]) results = self.check_detail(config, name_count, config.sections(),self._heart_beat_config["detail_flag"]) if len(results) > 0: result_str = '' for result in results: result_str += results[result]["text"] result_str += '\n' self._send_email( self._heart_beat_config["email_server"], self._heart_beat_config["email_from"], self._heart_beat_config["email_tos"], self._heart_beat_config["email_title"], result_str) if self._heart_beat_config['repair_flag']: self.repair_service(results, config) self._last_notification_time = datetime.datetime.now()