def test_initialize_of_coordinator(self):
        coordinator, mock_os_operator, mock_consumer = self.__get_mock_objects(
        )
        # mock the heartbeat function
        coordinator._MonitorCoordinator__process_heartbeat = MagicMock(
            return_value=None)
        # start coordinator
        coordinator._MonitorCoordinator__coordinating_monitors(
            self.__get_mock_msg_list([self.config_msg]))
        # should tried to connect the two servers
        Mu.open_ssh_connection.assert_has_calls([
            call(ANY, ANY, self.server1, Mc.get_ssh_default_user(),
                 Mc.get_ssh_default_password()),
            call(ANY, ANY, self.server2, Mc.get_ssh_default_user(),
                 Mc.get_ssh_default_password())
        ])

        # should tried to start agent on two servers
        mock_os_operator.restart_agent.assert_has_calls([
            call(ANY, 1, "/usr/sap", Mc.get_agent_path(),
                 self.config_msg[self.mem_interval],
                 self.config_msg[self.cpu_interval],
                 self.config_msg[self.disk_interval],
                 self.config_msg[self.instance_interval]),
            call(ANY, 2, "/usr/sap", Mc.get_agent_path(),
                 self.config_msg[self.mem_interval],
                 self.config_msg[self.cpu_interval],
                 self.config_msg[self.disk_interval],
                 self.config_msg[self.instance_interval])
        ])
 def __init__(self):
     self.__os_operator = LinuxOperator()
     operator = HANAMonitorDAO(Mc.get_hana_server(), Mc.get_hana_port(),
                               Mc.get_hana_user(), Mc.get_hana_password())
     self.servers = self.__get_servers(operator)
     # self.servers = []
     # move belows to config?
     self.path = Mc.get_agent_path()[:-len('agent.py')]
     self.files = [
         'agent.py', 'util.py', 'errors.py', 'config/configuration.ini',
         'config/logging.ini'
     ]
Beispiel #3
0
    def __restart_agent_via_server_id(self, server_id):
        pre_time = self.__heartbeat_agent_restart_info.get(
            server_id, datetime.min)
        cur_time = datetime.now()

        if (cur_time - pre_time
            ).total_seconds() >= self.__heartbeat_restart_agent_interval:
            servers = [
                s for s in self.__configs.get(Mc.DB_CONFIGURATION_SERVER, [])
                if s[Mc.FIELD_SERVER_ID] == server_id
            ]
            for server in servers:
                # update restart time before restarting
                # because restart agent takes more than 2 minutes if server is no responding
                self.__heartbeat_agent_restart_info[server_id] = datetime.now()

                Mu.log_info(
                    self.__logger, "Restarting agent on {0}.".format(
                        server[Mc.FIELD_SERVER_FULL_NAME]))
                self.__restart_agent(
                    server[Mc.FIELD_SERVER_FULL_NAME],
                    server[Mc.FIELD_SERVER_ID], server[Mc.FIELD_MOUNT_POINT],
                    Mc.get_agent_path(),
                    self.__configs.get("CHECK_INTERVAL_MEM_INT", 60),
                    self.__configs.get("CHECK_INTERVAL_CPU_INT", 300),
                    self.__configs.get("CHECK_INTERVAL_DISK_INT", 3600),
                    self.__configs.get("CHECK_INTERVAL_INSTANCE_INT", 300))
                Mu.log_info(
                    self.__logger,
                    "Restarting agent on {0} is finished.".format(
                        server[Mc.FIELD_SERVER_FULL_NAME]))
        else:
            Mu.log_info(self.__logger, (
                "heartbeat failed for {0}, but did not try to restart agent due to the "
                "configured operation interval time ({1}). (pre: {2}, cur: {3})"
            ).format(server_id, self.__heartbeat_restart_agent_interval,
                     pre_time.strftime("%Y-%m-%d %H:%M:%S"),
                     cur_time.strftime("%Y-%m-%d %H:%M:%S")))
Beispiel #4
0
    def __coordinating_monitors(self, consumer):
        """
        Coordinating (start/stop/restart) all the agents
        :param consumer: kafka consumer
        """
        Mu.log_debug(self.__logger,
                     "Coordinator is listening on topic for configurations.")
        for msg in consumer:
            try:
                Mu.log_debug(self.__logger, "New configs are coming...")
                if self.__update_configs(msg.value):
                    # start/restart all agents, current design is restart all agents if any config is changed
                    servers = self.__configs.get(Mc.DB_CONFIGURATION_SERVER,
                                                 [])
                    for server in servers:
                        self.__restart_agent(
                            server[Mc.FIELD_SERVER_FULL_NAME],
                            server[Mc.FIELD_SERVER_ID],
                            server[Mc.FIELD_MOUNT_POINT], Mc.get_agent_path(),
                            self.__configs.get("CHECK_INTERVAL_MEM_INT", 60),
                            self.__configs.get("CHECK_INTERVAL_CPU_INT", 300),
                            self.__configs.get("CHECK_INTERVAL_DISK_INT",
                                               3600),
                            self.__configs.get("CHECK_INTERVAL_INSTANCE_INT",
                                               300))

                if self.__check_configuration() and not self.__heartbeat_flag:
                    self.__heartbeat_flag = True
                    # start heart beat thread
                    heartbeat_thread = threading.Thread(
                        target=self.__process_heartbeat)
                    heartbeat_thread.start()
            except Exception as ex:
                Mu.log_warning_exc(
                    self.__logger,
                    "Error occurred when coordinating the monitors, Err: {0}".
                    format(ex))