def user_define_engine(engine_yaml): _config = envs.load_yaml(engine_yaml) envs.set_runtime_environs(_config) train_location = envs.get_global_env("engine.file") train_dirname = os.path.dirname(train_location) base_name = os.path.splitext(os.path.basename(train_location))[0] sys.path.append(train_dirname) trainer_class = envs.lazy_instance_by_fliename(base_name, "UserDefineTraining") return trainer_class
def set_runtime_envs(cluster_envs, engine_yaml): if cluster_envs is None: cluster_envs = {} envs.set_runtime_environs(cluster_envs) need_print = {} for k, v in os.environ.items(): if k.startswith("train.trainer."): need_print[k] = v print(envs.pretty_print_envs(need_print, ("Runtime Envs", "Value")))
def master(): role = "MASTER" from paddlerec.core.engine.cluster.cluster import ClusterEngine _envs = envs.load_yaml(args.backend) flattens = envs.flatten_environs(_envs, "_") flattens["engine_role"] = role flattens["engine_run_config"] = args.model flattens["engine_temp_path"] = tempfile.mkdtemp() envs.set_runtime_environs(flattens) print(envs.pretty_print_envs(flattens, ("Submit Envs", "Value"))) launch = ClusterEngine(None, args.model) return launch
def user_define_engine(engine_yaml): with open(engine_yaml, 'r') as rb: _config = yaml.load(rb.read(), Loader=yaml.FullLoader) assert _config is not None envs.set_runtime_environs(_config) train_location = envs.get_global_env("engine.file") train_dirname = os.path.dirname(train_location) base_name = os.path.splitext(os.path.basename(train_location))[0] sys.path.append(train_dirname) trainer_class = envs.lazy_instance_by_fliename(base_name, "UserDefineTraining") return trainer_class
def master(): from paddlerec.core.engine.cluster.cluster import ClusterEngine # Get fleet_mode & device run_extras = get_all_inters_from_yaml(args.model, ["runner."]) mode = envs.get_runtime_environ("mode") fleet_class = ".".join(["runner", mode, "fleet_mode"]) device_class = ".".join(["runner", mode, "device"]) fleet_mode = run_extras.get(fleet_class, "ps") device = run_extras.get(device_class, "cpu") device = device.upper() fleet_mode = fleet_mode.upper() if fleet_mode == "COLLECTIVE" and device != "GPU": raise ValueError("COLLECTIVE can not be used without GPU") # Get Thread nums model_envs = envs.load_yaml(args.model) phases_class = ".".join(["runner", mode, "phases"]) phase_names = run_extras.get(phases_class) phases = [] all_phases = model_envs.get("phase") if phase_names is None: phases = all_phases else: for phase in all_phases: if phase["name"] in phase_names: phases.append(phase) thread_num = [] for phase in phases: thread_num.append(int(phase["thread_num"])) max_thread_num = max(thread_num) backend_envs = envs.load_yaml(args.backend) flattens = envs.flatten_environs(backend_envs, "_") flattens["engine_role"] = "MASTER" flattens["engine_mode"] = envs.get_runtime_environ("mode") flattens["engine_run_config"] = args.model flattens["max_thread_num"] = max_thread_num flattens["fleet_mode"] = fleet_mode flattens["device"] = device flattens["backend_yaml"] = args.backend envs.set_runtime_environs(flattens) launch = ClusterEngine(None, args.model) return launch
def set_runtime_envs(cluster_envs, engine_yaml): if cluster_envs is None: cluster_envs = {} engine_extras = get_inters_from_yaml(engine_yaml, "train.trainer.") if "train.trainer.threads" in engine_extras and "CPU_NUM" in cluster_envs: cluster_envs["CPU_NUM"] = engine_extras["train.trainer.threads"] envs.set_runtime_environs(cluster_envs) envs.set_runtime_environs(engine_extras) need_print = {} for k, v in os.environ.items(): if k.startswith("train.trainer."): need_print[k] = v print(envs.pretty_print_envs(need_print, ("Runtime Envs", "Value")))
def master(): role = "MASTER" from paddlerec.core.engine.cluster.cluster import ClusterEngine with open(args.backend, 'r') as rb: _envs = yaml.load(rb.read(), Loader=yaml.FullLoader) flattens = envs.flatten_environs(_envs, "_") flattens["engine_role"] = role flattens["engine_run_config"] = args.model flattens["engine_temp_path"] = tempfile.mkdtemp() update_workspace(flattens) envs.set_runtime_environs(flattens) print( envs.pretty_print_envs(flattens, ("Submit Runtime Envs", "Value"))) launch = ClusterEngine(None, args.model) return launch
dir = envs.paddlerec_adapter(model) path = os.path.join(dir, "config.yaml") else: if not os.path.isfile(model): raise IOError("model config: {} invalid".format(model)) path = model return path if __name__ == "__main__": parser = argparse.ArgumentParser(description='paddle-rec run') parser.add_argument("-m", "--model", type=str) parser.add_argument("-b", "--backend", type=str, default=None) abs_dir = os.path.dirname(os.path.abspath(__file__)) envs.set_runtime_environs({"PACKAGE_BASE": abs_dir}) args = parser.parse_args() args.model = get_abs_model(args.model) if not validation.yaml_validation(args.model): sys.exit(-1) engine_registry() running_config = get_all_inters_from_yaml(args.model, ["mode", "runner."]) modes = get_modes(running_config) for mode in modes: envs.set_runtime_environs({"mode": mode}) which_engine = get_engine(args, running_config, mode) engine = which_engine(args)
dir = envs.paddlerec_adapter(model) path = os.path.join(dir, "config.yaml") else: if not os.path.isfile(model): raise IOError("model config: {} invalid".format(model)) path = model return path if __name__ == "__main__": parser = argparse.ArgumentParser(description='paddle-rec run') parser.add_argument("-m", "--model", type=str) parser.add_argument("-b", "--backend", type=str, default=None) abs_dir = os.path.dirname(os.path.abspath(__file__)) envs.set_runtime_environs({"PACKAGE_BASE": abs_dir}) args = parser.parse_args() args.model = get_abs_model(args.model) if not validation.yaml_validation(args.model): sys.exit(-1) engine_registry() running_config = get_all_inters_from_yaml(args.model, ["workspace", "mode", "runner."]) modes = get_modes(running_config) for mode in modes: envs.set_runtime_environs({ "mode": mode,
def env_set(self): envs.set_runtime_environs(self.cluster_env) flattens = envs.flatten_environs(self.cluster_env) print(envs.pretty_print_envs(flattens, ("Cluster Envs", "Value")))