Exemple #1
0
    def _check_is_all_crash(self):
        """
        original method of checking whether all service is alive
        not used right now
        """
	heartbeats = heartbeatdb.get_heartbeats(self._heart_beat_config["check_duration"])
        heartbeats = misc.cursor_to_array(heartbeats)
        heartbeats = misc.select(heartbeats, fields=["ip", "handler_name", "pid"])
        heartbeats = misc.distinct(heartbeats)
        handler_counts_per_machine = misc.count(heartbeats, key = lambda heartbeat : "%s_%s" % (heartbeat["ip"], heartbeat["handler_name"]))
        heartbeatdb.save_handler_counts(simplejson.dumps(handler_counts_per_machine), type="handler_counts_per_machine")
        handler_counts = misc.count(heartbeats, key = lambda heartbeat : heartbeat["handler_name"])
        heartbeatdb.save_handler_counts(simplejson.dumps(handler_counts), type="handler_counts_total")
        logging.debug("current alive handler counts", handler_counts)
        #Note: currently we will send email if no handler is running
        if len(filter(lambda handler_name : handler_counts.get(handler_name, 0) == 0, self._heart_beat_config["required_handlers"])) > 0:
            if self._last_notification_time is None or datetime.datetime.now() - self._last_notification_time >= \
                datetime.timedelta(seconds=self._heart_beat_config["notification_duration"]):

                email_body = "some handlers are not running:\n %s" % handler_counts_per_machine
                self._send_email(
                    self._heart_beat_config["email_server"],
                    self._heart_beat_config["email_from"],
                    self._heart_beat_config["email_tos"],
                    self._heart_beat_config["email_title"],
                    email_body)
                self._last_notification_time = datetime.datetime.now()
                logging.error("heartbeat server detects required handlers are not fully running, notification email sent", handler_counts_per_machine)
Exemple #2
0
def _get_results_by_cache(cache_key, query_func, force, *args):
    now = datetime.datetime.utcnow()
    if not force and global_db_caches.has_key(cache_key) and \
        now - global_db_caches[cache_key]["last_retrieved_time"] < datetime.timedelta(seconds = common_settings.db_cache_expiry_duration):
        results = global_db_caches[cache_key]["results"]
    else:
        results = misc.cursor_to_array(query_func(*args))
        db_cache = {"last_retrieved_time" : now, "results" : results}
        global_db_caches[cache_key] = db_cache
    return results
Exemple #3
0
    def _check(self):
        heartbeats = heartbeatdb.get_heartbeats(self._heart_beat_config["check_duration"])
        heartbeats = misc.cursor_to_array(heartbeats)
        heartbeats = misc.select(heartbeats, fields=["ip", "handler_name", "pid"])
        heartbeats = misc.distinct(heartbeats, key=str)
	name_count = self.count_by_name(heartbeats)
        config = self.load_cfg(self._heart_beat_config["config_path"])
        results = self.check_detail(config, name_count, config.sections(),self._heart_beat_config["detail_flag"])
        if len(results) > 0:
            result_str = ''
            for result in results:
		result_str += results[result]["text"]
                result_str += '\n'
            self._send_email(
                    self._heart_beat_config["email_server"],
                    self._heart_beat_config["email_from"],
                    self._heart_beat_config["email_tos"],
                    self._heart_beat_config["email_title"],
                    result_str)
            if self._heart_beat_config['repair_flag']:
	        self.repair_service(results, config)
            self._last_notification_time = datetime.datetime.now()