def run_remote_worker(worker_id, worker_path, id): """Run worker on remote mochine.""" from zeus.common.utils import init_log init_log(level="info", log_file=".temp_{}.log".format(worker_id), log_path=worker_path) config = _load_config(worker_id, worker_path, id) os.environ["LD_LIBRARY_PATH"] = config["env"]["LD_LIBRARY_PATH"] os.environ["PWD"] = config["env"]["PWD"] os.chdir(os.environ["PWD"]) zeus.register_zeus(os.environ['BACKEND_TYPE'].lower()) if zeus.is_gpu_device(): sub_pid_list = call_in_gpu(config, id, worker_id, worker_path) elif zeus.is_npu_device(): os.environ["PYTHONPATH"] = config["env"]["PYTHONPATH"] os.environ["PATH"] = config["env"]["PATH"] os.environ["ASCEND_OPP_PATH"] = config["env"]["ASCEND_OPP_PATH"] sub_pid_list = call_in_npu(config, id, worker_id, worker_path) logging.info("DistributedWorker finished!") for sub_pid in sub_pid_list: kill_proc_tree(pid=sub_pid) logging.info("DistributedWorker subprocess cleaned!") return 0
def train_process(self): """Validate process for the model validate worker.""" init_log(level=General.logger.level, log_file="device_evaluator_{}.log".format(self.worker_id), log_path=self.local_log_path) logging.info("start Davinci or mobile evaluate process") self.load_model() self.valid_loader = self._init_dataloader(mode='test') performance = self.valid() logging.info("Evaluator result in Davinci/bolt: {}".format(performance)) self._broadcast(performance) logging.info("finished Davinci or mobile evaluate for id {}".format(self.worker_id))
def _init_env(cfg_path): """Init config and evn parameters. :param cfg_path: config file path """ logging.getLogger().setLevel(logging.DEBUG) UserConfig().load(cfg_path) # load general General.from_json(UserConfig().data.get("general"), skip_check=False) init_log(level=General.logger.level, log_path=TaskOps().local_log_path) cluster_args = env_args() if not cluster_args: cluster_args = init_local_cluster_args() setattr(PipelineConfig, "steps", UserConfig().data.pipeline) General.env = cluster_args set_backend(General.backend, General.device_category)