Beispiel #1
0
def get_processes_by_hostname(cluster: Cluster, hostname: str) -> Iterator[ProcessInfo]:
    for (_, process) in cluster.get_processes(hostname=hostname):
        if is_valid_process(process):
            yield process
Beispiel #2
0
class ClusterHelper:
    def __init__(self, cluster_info: ClusterInfo, workdir: Path):
        self.cluster_info = cluster_info
        self.workdir = workdir
        self.workdir.mkdir(exist_ok=True, parents=True)

        self.cluster = Cluster(str(self.workdir))

    @property
    def active_nodes(self) -> List[str]:
        return list(self.cluster.nodes.keys())

    @property
    def processes(self) -> List[Process]:
        processes = []
        for node in self.cluster.nodes.values():
            processes += node.processes
        return processes

    def commit(self):
        with open(self.workdir / CLUSTER_FILENAME, "w") as f:
            self.cluster.serialize(f)

    def stop(self, use_sigint=False):
        start = time.time()

        fn = functools.partial(kill_fn, use_sigint)
        self.cluster.kill(fn)
        logging.debug(f"Cluster killed in {time.time() - start} seconds")

    def start_processes(self, processes: List[StartProcessArgs]):
        def prepare_workdir(workdir: Path) -> Path:
            workdir = workdir if workdir else self.workdir
            workdir.mkdir(parents=True, exist_ok=True)
            return workdir.absolute()

        pool_args = [
            dataclasses.replace(args, workdir=prepare_workdir(args.workdir))
            for args in processes
        ]

        logging.debug(f"Starting cluster processes: {pool_args}")

        for process in pool_args:
            logging.debug(f"Command: {' '.join(process.args)}")
        spawned = []
        if len(pool_args) == 1:
            spawned.append(start_process_pool(pool_args[0]))
        else:
            with Pool() as pool:
                for res in pool.map(start_process_pool, pool_args):
                    spawned.append(res)

        for (process, args) in zip(spawned, pool_args):
            self.cluster.add(process=process, key=args.name, **args.metadata)

    def start_monitoring(self, nodes: List[str], observe_processes=False):
        if not self.cluster_info.monitor_nodes:
            return

        init_cmd = []
        pyenv = get_pyenv_from_env()
        if pyenv:
            init_cmd += [f"source {pyenv}/bin/activate"]
        else:
            logging.warning(
                "No Python virtualenv detected. Monitoring will probably not work."
            )

        nodes = sorted(set(nodes))
        workdir = self.workdir / "monitoring"
        processes = []
        for node in nodes:
            args = [
                "python",
                str(MONITOR_SCRIPT_PATH),
                str(node_monitoring_trace(self.workdir, node)),
            ]
            if observe_processes:
                node_processes = self.cluster.get_processes(hostname=node)
                pids = [str(process.pid) for (_, process) in node_processes]
                args += ["--observe-pids", ",".join(pids)]
            process = StartProcessArgs(
                args=args,
                hostname=node,
                name="monitor",
                workdir=workdir,
                init_cmd=init_cmd,
            )
            processes.append(process)
        self.start_processes(processes)