Ejemplo n.º 1
0
 def __kill_all_processes(self):
     """Kill all processes that are stil listed as running."""
     with self.__running_processes_lock:
         if len(self.__running_processes) == 0:
             return
         for worker, processes in self.__running_processes.items():
             kill_cmd = "kill -9"
             for pid in processes.keys():
                 kill_cmd += " {}".format(pid)
             ssh_tools.worker_ssh(worker, kill_cmd)
Ejemplo n.º 2
0
 def __kill_all_processes(self):
     """Kill all processes that are stil listed as running."""
     with self.__running_processes_lock:
         if len(self.__running_processes) == 0:
             return
         for worker, processes in self.__running_processes.items():
             kill_cmd = "kill -9"
             for pid in processes.keys():
                 kill_cmd += " {}".format(pid)
             ssh_tools.worker_ssh(worker, kill_cmd)
Ejemplo n.º 3
0
    def __worker_get_pids_from_file(worker, path):
        """Returns a list of PIDs listed in a file on the specified worker node.

        Args:
            worker: Worker node to read the file on.
            path: File containing PIDs.

        Returns:
            A list of PIDs specified in the file.
        """
        # Rotate the pid file, if exists
        mv_cmd = "mv {1} {1}.0 &> /dev/null".format(worker.hn(), path)
        ssh_tools.worker_ssh(worker, mv_cmd)

        # get rotated file's content
        cat_cmd = "cat {1}".format(worker.hn(), "%s.0" % path)
        content = ssh_tools.worker_ssh(worker, cat_cmd)

        return [int(x) for x in content.split()]
Ejemplo n.º 4
0
    def __worker_get_pids_from_file(worker, path):
        """Returns a list of PIDs listed in a file on the specified worker node.

        Args:
            worker: Worker node to read the file on.
            path: File containing PIDs.

        Returns:
            A list of PIDs specified in the file.
        """
        # Rotate the pid file, if exists
        mv_cmd = "mv {1} {1}.0 &> /dev/null".format(worker.hn(), path)
        ssh_tools.worker_ssh(worker, mv_cmd)

        # get rotated file's content
        cat_cmd = "cat {1}".format(worker.hn(), "%s.0" % path)
        content = ssh_tools.worker_ssh(worker, cat_cmd)

        return [int(x) for x in content.split()]
Ejemplo n.º 5
0
    def run(self):
        self.__stop.clear()

        for worker in network_emulator.NetworkEmulator.get_instance(
        ).cluster.worker:
            self.__running_processes[worker] = dict()

        while not self.__stop.isSet():
            for worker in network_emulator.NetworkEmulator.get_instance(
            ).cluster.worker:
                successful_processes = []
                try:
                    successful_processes = self.__worker_get_pids_from_file(
                        worker,
                        os.path.join(
                            configuration.get_worker_working_directory(),
                            "pids_successful"))
                    logger.debug("Successful processes {!s}".format(
                        successful_processes))
                except subprocess.CalledProcessError:
                    # This possible, if file pids_successful does not yet exist
                    pass

                failed_processes = []
                try:
                    failed_processes = self.__worker_get_pids_from_file(
                        worker,
                        os.path.join(
                            configuration.get_worker_working_directory(),
                            "pids_failed"))
                    logger.debug(
                        "Failed processes {!s}".format(failed_processes))
                except subprocess.CalledProcessError:
                    # This possible, if file pids_failed does not yet exist
                    pass

                # For every failed process retrieve and print processes's output from worker
                for pid in failed_processes:
                    try:
                        cat_cmd = "cat {1}".format(
                            worker.hn(),
                            os.path.join(
                                configuration.get_worker_working_directory(),
                                "processes", str(pid)))
                        logfile_content = ssh_tools.worker_ssh(worker, cat_cmd)
                        logfile_formatted = utils.indent(logfile_content, 2)

                        logger.error(
                            "Process with PID {0} failed:\n{1}".format(
                                pid, logfile_formatted))
                    except subprocess.CalledProcessError, err:
                        logger.error(
                            "Failed to retrieve logfile for process with PID %i"
                            % pid)
                        # Not allowed, as every daemonized process writes to a logfile
                        raise err

                # post-process successful and failed processes
                with self.__running_processes_lock:
                    # all successful transmissions
                    for pid in successful_processes:
                        if pid in self.__running_processes[worker]:
                            self.__running_processes[worker][
                                pid].call_terminated(
                                    process.Process.SUCCESSFUL)
                            del self.__running_processes[worker][pid]
                        else:
                            logger.error(
                                "PID of successful transmission not found")

                    # all unsuccessful transmissions
                    for pid in failed_processes:
                        if pid in self.__running_processes[worker]:
                            self.__running_processes[worker][
                                pid].call_terminated(process.Process.FAILED)
                            del self.__running_processes[worker][pid]

            time.sleep(self.__interval)
Ejemplo n.º 6
0
    def run(self):
        self.__stop.clear()

        for worker in network_emulator.NetworkEmulator.get_instance().cluster.worker:
            self.__running_processes[worker] = dict()

        while not self.__stop.isSet():
            for worker in network_emulator.NetworkEmulator.get_instance().cluster.worker:
                successful_processes = []
                try:
                    successful_processes = self.__worker_get_pids_from_file(
                        worker,
                        os.path.join(configuration.get_worker_working_directory(), "pids_successful"))
                    logger.debug("Successful processes {!s}".format(successful_processes))
                except subprocess.CalledProcessError:
                    # This possible, if file pids_successful does not yet exist
                    pass

                failed_processes = []
                try:
                    failed_processes = self.__worker_get_pids_from_file(
                        worker,
                        os.path.join(configuration.get_worker_working_directory(), "pids_failed"))
                    logger.debug("Failed processes {!s}".format(failed_processes))
                except subprocess.CalledProcessError:
                    # This possible, if file pids_failed does not yet exist
                    pass

                # For every failed process retrieve and print processes's output from worker
                for pid in failed_processes:
                    try:
                        cat_cmd = "cat {1}".format(
                            worker.hn(),
                            os.path.join(configuration.get_worker_working_directory(),
                                         "processes", str(pid)))
                        logfile_content = ssh_tools.worker_ssh(worker, cat_cmd)
                        logfile_formatted = utils.indent(logfile_content, 2)

                        logger.error("Process with PID {0} failed:\n{1}".format(
                            pid, logfile_formatted))
                    except subprocess.CalledProcessError, err:
                        logger.error("Failed to retrieve logfile for process with PID %i" % pid)
                        # Not allowed, as every daemonized process writes to a logfile
                        raise err

                # post-process successful and failed processes
                with self.__running_processes_lock:
                    # all successful transmissions
                    for pid in successful_processes:
                        if pid in self.__running_processes[worker]:
                            self.__running_processes[worker][pid].call_terminated(
                                process.Process.SUCCESSFUL)
                            del self.__running_processes[worker][pid]
                        else:
                            logger.error("PID of successful transmission not found")

                    # all unsuccessful transmissions
                    for pid in failed_processes:
                        if pid in self.__running_processes[worker]:
                            self.__running_processes[worker][pid].call_terminated(
                                process.Process.FAILED)
                            del self.__running_processes[worker][pid]

            time.sleep(self.__interval)