def create_experiment( self, config: Union[str, pathlib.Path, Dict], model_dir: str, ) -> experiment.ExperimentReference: check.is_instance(config, (str, pathlib.Path, dict), "config parameter must be dictionary or path") if isinstance(config, str): with open(config) as f: experiment_config = yaml.safe_load(f) elif isinstance(config, pathlib.Path): with config.open() as f: experiment_config = yaml.safe_load(f) elif isinstance(config, Dict): experiment_config = config model_context = _path_to_files(pathlib.Path(model_dir)) experiment_request = V1CreateExperimentRequest( model_definition=model_context, config=yaml.safe_dump(experiment_config), ) experiment_response = self._internal.determined_create_experiment( experiment_request) return experiment.ExperimentReference( experiment_response.experiment.id, self._session._master, self._experiments, )
def parse_config( config_file: Optional[IO], entrypoint: Optional[List[str]], overrides: Iterable[str], volumes: Iterable[str], ) -> Dict[str, Any]: config = {} # type: Dict[str, Any] if config_file: with config_file: config = util.safe_load_yaml_with_exceptions(config_file) for config_arg in overrides: if "=" not in config_arg: raise ValueError( "Could not read configuration option '{}'\n\n" "Expecting:\n{}".format(config_arg, CONFIG_DESC) ) key, value = config_arg.split("=", maxsplit=1) # type: Tuple[str, Any] # Separate values if a comma exists. Use yaml.safe_load() to cast # the value(s) to the type YAML would use, e.g., "4" -> 4. if "," in value: value = [yaml.safe_load(v) for v in value.split(",")] else: value = yaml.safe_load(value) # Certain configurations keys are expected to have list values. # Convert a single value to a singleton list if needed. if key in _CONFIG_PATHS_COERCE_TO_LIST: value = [value] # TODO(#2703): Consider using full JSONPath spec instead of dot # notation. config = _set_nested_config(config, key.split("."), value) for volume_arg in volumes: if ":" not in volume_arg: raise ValueError( "Could not read volume option '{}'\n\n" "Expecting:\n{}".format(volume_arg, VOLUME_DESC) ) host_path, container_path = volume_arg.split(":", maxsplit=1) bind_mounts = config.setdefault("bind_mounts", []) bind_mounts.append({"host_path": host_path, "container_path": container_path}) # Use the entrypoint command line argument if an entrypoint has not already # defined by previous settings. if not config.get("entrypoint") and entrypoint: config["entrypoint"] = entrypoint return config
def format_base64_as_yaml(source: str) -> str: s = yaml.safe_dump(yaml.safe_load(base64.b64decode(source)), default_flow_style=False) if not isinstance(s, str): raise AssertionError("cannot format base64 string to yaml") return s
def test_non_root_experiment(clean_auth: None, tmp_path: pathlib.Path) -> None: user = create_linked_user(65534, "nobody", 65534, "nogroup") with logged_in_user(user): with open(conf.fixtures_path("no_op/model_def.py")) as f: model_def_content = f.read() with open(conf.fixtures_path("no_op/single-one-short-step.yaml")) as f: config = yaml.safe_load(f) # Use a user-owned path to ensure shared_fs uses the container_path and not host_path. with non_tmp_shared_fs_path() as host_path: config["checkpoint_storage"] = { "type": "shared_fs", "host_path": host_path, } # Call `det --version` in a startup hook to ensure that det is on the PATH. with FileTree( tmp_path, { "startup-hook.sh": "det --version || exit 77", "const.yaml": yaml.dump(config), # type: ignore "model_def.py": model_def_content, }, ) as tree: exp.run_basic_test(str(tree.joinpath("const.yaml")), str(tree), None)
def test_schemas(test_case: str) -> None: cases_file, case_name = test_case.split("::", 1) with open(os.path.join(CASES_ROOT, cases_file)) as f: cases = yaml.safe_load(f) # Find the right test from the file of test cases. case = [c for c in cases if c["name"] == case_name][0] Case(**case).run()
def _parse_config_file_or_exit(config_file: io.FileIO) -> Dict: experiment_config = yaml.safe_load(config_file.read()) config_file.close() if not experiment_config or not isinstance(experiment_config, dict): print("Error: invalid experiment config file {}".format( config_file.name)) sys.exit(1) return experiment_config
def all_cases() -> Iterator["str"]: for root, _, files in os.walk(CASES_ROOT): for file in files: if file.endswith(".yaml"): path = os.path.join(root, file) with open(path) as f: cases = yaml.safe_load(f) for case in cases: display_path = os.path.relpath(path, CASES_ROOT) yield display_path + "::" + case["name"]
def preview_search(args: Namespace) -> None: experiment_config = yaml.safe_load(args.config_file.read()) args.config_file.close() if "searcher" not in experiment_config: print("Experiment configuration must have 'searcher' section") sys.exit(1) r = api.post(args.master, "searcher/preview", body=experiment_config) j = r.json() def to_full_name(kind: str) -> str: if kind[-1] == "R": return "train {} records".format(kind[:-1]) if kind[-1] == "B": return "train {} batch(es)".format(kind[:-1]) if kind[-1] == "E": return "train {} epoch(s)".format(kind[:-1]) elif kind == "V": return "validation" else: raise ValueError("unexpected kind: {}".format(kind)) def render_sequence(sequence: List[str]) -> str: if not sequence: return "N/A" instructions = [] current = sequence[0] count = 0 for k in sequence: if k != current: instructions.append("{} x {}".format(count, to_full_name(current))) current = k count = 1 else: count += 1 instructions.append("{} x {}".format(count, to_full_name(current))) return ", ".join(instructions) headers = ["Trials", "Breakdown"] values = [(count, render_sequence(operations.split())) for operations, count in j["results"].items()] print(colored("Using search configuration:", "green")) yml = yaml.YAML() yml.indent(mapping=2, sequence=4, offset=2) yml.dump(experiment_config["searcher"], sys.stdout) print() print("This search will create a total of {} trial(s).".format( sum(j["results"].values()))) print(tabulate.tabulate(values, headers, tablefmt="presto"), flush=False)
def parse_config_overrides(config: Dict[str, Any], overrides: Iterable[str]) -> None: for config_arg in overrides: if "=" not in config_arg: raise ValueError("Could not read configuration option '{}'\n\n" "Expecting:\n{}".format(config_arg, CONFIG_DESC)) key, value = config_arg.split("=", maxsplit=1) # type: Tuple[str, Any] # Separate values if a comma exists. Use yaml.safe_load() to cast # the value(s) to the type YAML would use, e.g., "4" -> 4. if "," in value: value = [yaml.safe_load(v) for v in value.split(",")] else: value = yaml.safe_load(value) # Certain configurations keys are expected to have list values. # Convert a single value to a singleton list if needed. if key in _CONFIG_PATHS_COERCE_TO_LIST: value = [value] # TODO(#2703): Consider using full JSONPath spec instead of dot # notation. config = _set_nested_config(config, key.split("."), value)
def safe_load_yaml_with_exceptions(yaml_file: Union[io.FileIO, IO[Any]]) -> Any: """Attempts to use ruamel.yaml.safe_load on the specified file. If successful, returns the output. If not, formats a ruamel.yaml Exception so that the user does not see a traceback of our internal APIs. --------------------------------------------------------------------------------------------- DuplicateKeyError Example: Input: Traceback (most recent call last): ... ruamel.yaml.constructor.DuplicateKeyError: while constructing a mapping in "<unicode string>", line 1, column 1: description: constrained_adaptiv ... ^ (line: 1) found duplicate key "checkpoint_storage" with value "{}" (original value: "{}") in "<unicode string>", line 7, column 1: checkpoint_storage: ^ (line: 7) To suppress this check see: http://yaml.readthedocs.io/en/latest/api.html#duplicate-keys Duplicate keys will become an error in future releases, and are errors by default when using the new API. Failed to create experiment Output: Error: invalid experiment config file constrained_adaptive.yaml. DuplicateKeyError: found duplicate key "learning_rate" with value "0.022" (original value: "0.025") in "constrained_adaptive.yaml", line 23, column 3 --------------------------------------------------------------------------------------------- """ try: config = yaml.safe_load(yaml_file) except ( yaml.error.MarkedYAMLWarning, yaml.error.MarkedYAMLError, yaml.error.MarkedYAMLFutureWarning, ) as e: err_msg = ( f"Error: invalid experiment config file {yaml_file.name!r}.\n" f"{e.__class__.__name__}: {e.problem}\n{e.problem_mark}" ) print(err_msg) sys.exit(1) return config
def _parse_config(field: Any) -> Any: # Pretty print the config field. return yaml.safe_dump(yaml.safe_load(base64.b64decode(field)), default_flow_style=False)
def agent_up( master_host: str, master_port: int, agent_config_path: Optional[Path], agent_name: str, agent_label: Optional[str], agent_resource_pool: Optional[str], image_repo_prefix: Optional[str], version: Optional[str], gpu: bool, autorestart: bool, cluster_name: str, labels: Optional[Dict] = None, ) -> None: agent_conf = {} volumes = ["/var/run/docker.sock:/var/run/docker.sock"] if agent_config_path is not None: with agent_config_path.open() as f: agent_conf = yaml.safe_load(f) volumes += [f"{os.path.abspath(agent_config_path)}:/etc/determined/agent.yaml"] # Fallback on agent config for options not specified as flags. environment = {} if agent_name == AGENT_NAME_DEFAULT: agent_name = agent_conf.get("agent_id", agent_name) else: environment["DET_AGENT_ID"] = agent_name environment["DET_MASTER_PORT"] = str(master_port) if master_port == MASTER_PORT_DEFAULT: if "master_port" in agent_conf: del environment["DET_MASTER_PORT"] master_port = agent_conf["master_port"] if agent_label is not None: environment["DET_LABEL"] = agent_label if agent_resource_pool is not None: environment["DET_RESOURCE_POOL"] = agent_resource_pool if image_repo_prefix is None: image_repo_prefix = "determinedai" if version is None: version = determined.__version__ _wait_for_master(master_host, master_port, cluster_name) if master_host == "localhost": master_host = get_proxy_addr() environment["DET_MASTER_HOST"] = master_host image = f"{image_repo_prefix}/determined-agent:{version}" init = True mounts = [] # type: List[str] if labels is None: labels = {} labels["ai.determined.type"] = "agent" restart_policy = {"Name": "unless-stopped"} if autorestart else None device_requests = [GPU_DEVICE_REQUEST] if gpu else None docker_client = docker.from_env() print(f"Starting {agent_name}") docker_client.containers.run( image=image, environment=environment, init=init, mounts=mounts, volumes=volumes, network_mode="host", name=agent_name, detach=True, labels=labels, restart_policy=restart_policy, device_requests=device_requests, )
def master_up( port: int, master_config_path: Optional[Path], storage_host_path: Path, master_name: str, image_repo_prefix: Optional[str], version: Optional[str], db_password: str, delete_db: bool, autorestart: bool, cluster_name: str, auto_work_dir: Optional[Path], ) -> None: command = ["up", "-d"] if image_repo_prefix is None: image_repo_prefix = "determinedai" if version is None: version = determined.__version__ if autorestart: restart_policy = "unless-stopped" else: restart_policy = "no" env = { "INTEGRATIONS_HOST_PORT": str(port), "DET_DB_PASSWORD": db_password, "IMAGE_REPO_PREFIX": image_repo_prefix, "DET_VERSION": version, "DET_RESTART_POLICY": restart_policy, } # Some cli flags for det deploy local will cause us to write a temporary master.yaml. master_conf = {} make_temp_conf = False if master_config_path is not None: with master_config_path.open() as f: master_conf = yaml.safe_load(f) else: # These defaults come from master/packaging/master.yaml (except for host_path). master_conf = { "db": { "user": "******", "host": "determined-db", "port": 5432, "name": "determined", }, "checkpoint_storage": { "type": "shared_fs", "host_path": appdirs.user_data_dir("determined"), "save_experiment_best": 0, "save_trial_best": 1, "save_trial_latest": 1, }, } make_temp_conf = True if storage_host_path is not None: master_conf["checkpoint_storage"] = { "type": "shared_fs", "host_path": str(storage_host_path.resolve()), } make_temp_conf = True if auto_work_dir is not None: work_dir = str(auto_work_dir.resolve()) master_conf.setdefault("task_container_defaults", {})["work_dir"] = work_dir master_conf["task_container_defaults"].setdefault("bind_mounts", []).append( {"host_path": work_dir, "container_path": work_dir} ) make_temp_conf = True # Ensure checkpoint storage directory exists. final_storage_host_path = master_conf.get("checkpoint_storage", {}).get("host_path") if final_storage_host_path is not None: final_storage_host_path = Path(final_storage_host_path) if not final_storage_host_path.exists(): final_storage_host_path.mkdir(parents=True) if make_temp_conf: fd, temp_path = tempfile.mkstemp(prefix="det-deploy-local-master-config-") with open(fd, "w") as f: yaml.dump(master_conf, f) master_config_path = Path(temp_path) # This is always true by now, but mypy needs help. assert master_config_path is not None env["DET_MASTER_CONFIG"] = str(master_config_path.resolve()) master_down(master_name, delete_db) docker_compose(command, master_name, env) _wait_for_master("localhost", port, cluster_name)
def load_config(config_path: str) -> Any: with open(config_path) as f: config = yaml.safe_load(f) return config
def set_template(args: Namespace) -> None: with args.template_file: body = yaml.safe_load(args.template_file) api.put(args.master, path="templates/" + args.template_name, body=body) print(colored("Set template {}".format(args.template_name), "green"))
def test_schemas(test_case: str) -> None: cases_file, case_name = test_case.split("::", 1) with open(os.path.join(CASES_ROOT, cases_file)) as f: cases = yaml.safe_load(f) for case in cases: Case(**case).run()