Ejemplo n.º 1
0
    def create_experiment(
        self,
        config: Union[str, pathlib.Path, Dict],
        model_dir: str,
    ) -> experiment.ExperimentReference:
        check.is_instance(config, (str, pathlib.Path, dict),
                          "config parameter must be dictionary or path")
        if isinstance(config, str):
            with open(config) as f:
                experiment_config = yaml.safe_load(f)
        elif isinstance(config, pathlib.Path):
            with config.open() as f:
                experiment_config = yaml.safe_load(f)
        elif isinstance(config, Dict):
            experiment_config = config

        model_context = _path_to_files(pathlib.Path(model_dir))

        experiment_request = V1CreateExperimentRequest(
            model_definition=model_context,
            config=yaml.safe_dump(experiment_config),
        )
        experiment_response = self._internal.determined_create_experiment(
            experiment_request)
        return experiment.ExperimentReference(
            experiment_response.experiment.id,
            self._session._master,
            self._experiments,
        )
Ejemplo n.º 2
0
def parse_config(
    config_file: Optional[IO],
    entrypoint: Optional[List[str]],
    overrides: Iterable[str],
    volumes: Iterable[str],
) -> Dict[str, Any]:
    config = {}  # type: Dict[str, Any]
    if config_file:
        with config_file:
            config = util.safe_load_yaml_with_exceptions(config_file)

    for config_arg in overrides:
        if "=" not in config_arg:
            raise ValueError(
                "Could not read configuration option '{}'\n\n"
                "Expecting:\n{}".format(config_arg, CONFIG_DESC)
            )

        key, value = config_arg.split("=", maxsplit=1)  # type: Tuple[str, Any]

        # Separate values if a comma exists. Use yaml.safe_load() to cast
        # the value(s) to the type YAML would use, e.g., "4" -> 4.
        if "," in value:
            value = [yaml.safe_load(v) for v in value.split(",")]
        else:
            value = yaml.safe_load(value)

            # Certain configurations keys are expected to have list values.
            # Convert a single value to a singleton list if needed.
            if key in _CONFIG_PATHS_COERCE_TO_LIST:
                value = [value]

        # TODO(#2703): Consider using full JSONPath spec instead of dot
        # notation.
        config = _set_nested_config(config, key.split("."), value)

    for volume_arg in volumes:
        if ":" not in volume_arg:
            raise ValueError(
                "Could not read volume option '{}'\n\n"
                "Expecting:\n{}".format(volume_arg, VOLUME_DESC)
            )

        host_path, container_path = volume_arg.split(":", maxsplit=1)
        bind_mounts = config.setdefault("bind_mounts", [])
        bind_mounts.append({"host_path": host_path, "container_path": container_path})

    # Use the entrypoint command line argument if an entrypoint has not already
    # defined by previous settings.
    if not config.get("entrypoint") and entrypoint:
        config["entrypoint"] = entrypoint

    return config
Ejemplo n.º 3
0
def format_base64_as_yaml(source: str) -> str:
    s = yaml.safe_dump(yaml.safe_load(base64.b64decode(source)),
                       default_flow_style=False)

    if not isinstance(s, str):
        raise AssertionError("cannot format base64 string to yaml")
    return s
Ejemplo n.º 4
0
def test_non_root_experiment(clean_auth: None, tmp_path: pathlib.Path) -> None:
    user = create_linked_user(65534, "nobody", 65534, "nogroup")

    with logged_in_user(user):
        with open(conf.fixtures_path("no_op/model_def.py")) as f:
            model_def_content = f.read()

        with open(conf.fixtures_path("no_op/single-one-short-step.yaml")) as f:
            config = yaml.safe_load(f)

        # Use a user-owned path to ensure shared_fs uses the container_path and not host_path.
        with non_tmp_shared_fs_path() as host_path:
            config["checkpoint_storage"] = {
                "type": "shared_fs",
                "host_path": host_path,
            }

            # Call `det --version` in a startup hook to ensure that det is on the PATH.
            with FileTree(
                    tmp_path,
                {
                    "startup-hook.sh": "det --version || exit 77",
                    "const.yaml": yaml.dump(config),  # type: ignore
                    "model_def.py": model_def_content,
                },
            ) as tree:
                exp.run_basic_test(str(tree.joinpath("const.yaml")), str(tree),
                                   None)
Ejemplo n.º 5
0
def test_schemas(test_case: str) -> None:
    cases_file, case_name = test_case.split("::", 1)
    with open(os.path.join(CASES_ROOT, cases_file)) as f:
        cases = yaml.safe_load(f)
    # Find the right test from the file of test cases.
    case = [c for c in cases if c["name"] == case_name][0]
    Case(**case).run()
Ejemplo n.º 6
0
def _parse_config_file_or_exit(config_file: io.FileIO) -> Dict:
    experiment_config = yaml.safe_load(config_file.read())
    config_file.close()
    if not experiment_config or not isinstance(experiment_config, dict):
        print("Error: invalid experiment config file {}".format(
            config_file.name))
        sys.exit(1)
    return experiment_config
Ejemplo n.º 7
0
def all_cases() -> Iterator["str"]:
    for root, _, files in os.walk(CASES_ROOT):
        for file in files:
            if file.endswith(".yaml"):
                path = os.path.join(root, file)
                with open(path) as f:
                    cases = yaml.safe_load(f)
                for case in cases:
                    display_path = os.path.relpath(path, CASES_ROOT)
                    yield display_path + "::" + case["name"]
Ejemplo n.º 8
0
def preview_search(args: Namespace) -> None:
    experiment_config = yaml.safe_load(args.config_file.read())
    args.config_file.close()

    if "searcher" not in experiment_config:
        print("Experiment configuration must have 'searcher' section")
        sys.exit(1)
    r = api.post(args.master, "searcher/preview", body=experiment_config)
    j = r.json()

    def to_full_name(kind: str) -> str:
        if kind[-1] == "R":
            return "train {} records".format(kind[:-1])
        if kind[-1] == "B":
            return "train {} batch(es)".format(kind[:-1])
        if kind[-1] == "E":
            return "train {} epoch(s)".format(kind[:-1])
        elif kind == "V":
            return "validation"
        else:
            raise ValueError("unexpected kind: {}".format(kind))

    def render_sequence(sequence: List[str]) -> str:
        if not sequence:
            return "N/A"
        instructions = []
        current = sequence[0]
        count = 0
        for k in sequence:
            if k != current:
                instructions.append("{} x {}".format(count,
                                                     to_full_name(current)))
                current = k
                count = 1
            else:
                count += 1
        instructions.append("{} x {}".format(count, to_full_name(current)))
        return ", ".join(instructions)

    headers = ["Trials", "Breakdown"]
    values = [(count, render_sequence(operations.split()))
              for operations, count in j["results"].items()]

    print(colored("Using search configuration:", "green"))
    yml = yaml.YAML()
    yml.indent(mapping=2, sequence=4, offset=2)
    yml.dump(experiment_config["searcher"], sys.stdout)
    print()
    print("This search will create a total of {} trial(s).".format(
        sum(j["results"].values())))
    print(tabulate.tabulate(values, headers, tablefmt="presto"), flush=False)
Ejemplo n.º 9
0
def parse_config_overrides(config: Dict[str, Any],
                           overrides: Iterable[str]) -> None:
    for config_arg in overrides:
        if "=" not in config_arg:
            raise ValueError("Could not read configuration option '{}'\n\n"
                             "Expecting:\n{}".format(config_arg, CONFIG_DESC))

        key, value = config_arg.split("=", maxsplit=1)  # type: Tuple[str, Any]

        # Separate values if a comma exists. Use yaml.safe_load() to cast
        # the value(s) to the type YAML would use, e.g., "4" -> 4.
        if "," in value:
            value = [yaml.safe_load(v) for v in value.split(",")]
        else:
            value = yaml.safe_load(value)

            # Certain configurations keys are expected to have list values.
            # Convert a single value to a singleton list if needed.
            if key in _CONFIG_PATHS_COERCE_TO_LIST:
                value = [value]

        # TODO(#2703): Consider using full JSONPath spec instead of dot
        # notation.
        config = _set_nested_config(config, key.split("."), value)
Ejemplo n.º 10
0
def safe_load_yaml_with_exceptions(yaml_file: Union[io.FileIO, IO[Any]]) -> Any:
    """Attempts to use ruamel.yaml.safe_load on the specified file. If successful, returns
    the output. If not, formats a ruamel.yaml Exception so that the user does not see a traceback
    of our internal APIs.

    ---------------------------------------------------------------------------------------------
    DuplicateKeyError Example:
    Input:
        Traceback (most recent call last):
        ...
        ruamel.yaml.constructor.DuplicateKeyError: while constructing a mapping
        in "<unicode string>", line 1, column 1:
            description: constrained_adaptiv ...
            ^ (line: 1)
        found duplicate key "checkpoint_storage" with value "{}" (original value: "{}")
        in "<unicode string>", line 7, column 1:
            checkpoint_storage:
            ^ (line: 7)
        To suppress this check see:
            http://yaml.readthedocs.io/en/latest/api.html#duplicate-keys
        Duplicate keys will become an error in future releases, and are errors
        by default when using the new API.
        Failed to create experiment
    Output:
        Error: invalid experiment config file constrained_adaptive.yaml.
        DuplicateKeyError: found duplicate key "learning_rate" with value "0.022"
        (original value: "0.025")
        in "constrained_adaptive.yaml", line 23, column 3
    ---------------------------------------------------------------------------------------------
    """
    try:
        config = yaml.safe_load(yaml_file)
    except (
        yaml.error.MarkedYAMLWarning,
        yaml.error.MarkedYAMLError,
        yaml.error.MarkedYAMLFutureWarning,
    ) as e:
        err_msg = (
            f"Error: invalid experiment config file {yaml_file.name!r}.\n"
            f"{e.__class__.__name__}: {e.problem}\n{e.problem_mark}"
        )
        print(err_msg)
        sys.exit(1)
    return config
Ejemplo n.º 11
0
def _parse_config(field: Any) -> Any:
    # Pretty print the config field.
    return yaml.safe_dump(yaml.safe_load(base64.b64decode(field)),
                          default_flow_style=False)
Ejemplo n.º 12
0
def agent_up(
    master_host: str,
    master_port: int,
    agent_config_path: Optional[Path],
    agent_name: str,
    agent_label: Optional[str],
    agent_resource_pool: Optional[str],
    image_repo_prefix: Optional[str],
    version: Optional[str],
    gpu: bool,
    autorestart: bool,
    cluster_name: str,
    labels: Optional[Dict] = None,
) -> None:
    agent_conf = {}
    volumes = ["/var/run/docker.sock:/var/run/docker.sock"]
    if agent_config_path is not None:
        with agent_config_path.open() as f:
            agent_conf = yaml.safe_load(f)
        volumes += [f"{os.path.abspath(agent_config_path)}:/etc/determined/agent.yaml"]

    # Fallback on agent config for options not specified as flags.
    environment = {}
    if agent_name == AGENT_NAME_DEFAULT:
        agent_name = agent_conf.get("agent_id", agent_name)
    else:
        environment["DET_AGENT_ID"] = agent_name
    environment["DET_MASTER_PORT"] = str(master_port)
    if master_port == MASTER_PORT_DEFAULT:
        if "master_port" in agent_conf:
            del environment["DET_MASTER_PORT"]
            master_port = agent_conf["master_port"]

    if agent_label is not None:
        environment["DET_LABEL"] = agent_label
    if agent_resource_pool is not None:
        environment["DET_RESOURCE_POOL"] = agent_resource_pool
    if image_repo_prefix is None:
        image_repo_prefix = "determinedai"
    if version is None:
        version = determined.__version__

    _wait_for_master(master_host, master_port, cluster_name)

    if master_host == "localhost":
        master_host = get_proxy_addr()
    environment["DET_MASTER_HOST"] = master_host

    image = f"{image_repo_prefix}/determined-agent:{version}"
    init = True
    mounts = []  # type: List[str]
    if labels is None:
        labels = {}
    labels["ai.determined.type"] = "agent"

    restart_policy = {"Name": "unless-stopped"} if autorestart else None
    device_requests = [GPU_DEVICE_REQUEST] if gpu else None

    docker_client = docker.from_env()

    print(f"Starting {agent_name}")
    docker_client.containers.run(
        image=image,
        environment=environment,
        init=init,
        mounts=mounts,
        volumes=volumes,
        network_mode="host",
        name=agent_name,
        detach=True,
        labels=labels,
        restart_policy=restart_policy,
        device_requests=device_requests,
    )
Ejemplo n.º 13
0
def master_up(
    port: int,
    master_config_path: Optional[Path],
    storage_host_path: Path,
    master_name: str,
    image_repo_prefix: Optional[str],
    version: Optional[str],
    db_password: str,
    delete_db: bool,
    autorestart: bool,
    cluster_name: str,
    auto_work_dir: Optional[Path],
) -> None:
    command = ["up", "-d"]
    if image_repo_prefix is None:
        image_repo_prefix = "determinedai"
    if version is None:
        version = determined.__version__
    if autorestart:
        restart_policy = "unless-stopped"
    else:
        restart_policy = "no"

    env = {
        "INTEGRATIONS_HOST_PORT": str(port),
        "DET_DB_PASSWORD": db_password,
        "IMAGE_REPO_PREFIX": image_repo_prefix,
        "DET_VERSION": version,
        "DET_RESTART_POLICY": restart_policy,
    }

    # Some cli flags for det deploy local will cause us to write a temporary master.yaml.
    master_conf = {}
    make_temp_conf = False

    if master_config_path is not None:
        with master_config_path.open() as f:
            master_conf = yaml.safe_load(f)
    else:
        # These defaults come from master/packaging/master.yaml (except for host_path).
        master_conf = {
            "db": {
                "user": "******",
                "host": "determined-db",
                "port": 5432,
                "name": "determined",
            },
            "checkpoint_storage": {
                "type": "shared_fs",
                "host_path": appdirs.user_data_dir("determined"),
                "save_experiment_best": 0,
                "save_trial_best": 1,
                "save_trial_latest": 1,
            },
        }
        make_temp_conf = True

    if storage_host_path is not None:
        master_conf["checkpoint_storage"] = {
            "type": "shared_fs",
            "host_path": str(storage_host_path.resolve()),
        }
        make_temp_conf = True

    if auto_work_dir is not None:
        work_dir = str(auto_work_dir.resolve())
        master_conf.setdefault("task_container_defaults", {})["work_dir"] = work_dir
        master_conf["task_container_defaults"].setdefault("bind_mounts", []).append(
            {"host_path": work_dir, "container_path": work_dir}
        )
        make_temp_conf = True

    # Ensure checkpoint storage directory exists.
    final_storage_host_path = master_conf.get("checkpoint_storage", {}).get("host_path")
    if final_storage_host_path is not None:
        final_storage_host_path = Path(final_storage_host_path)
        if not final_storage_host_path.exists():
            final_storage_host_path.mkdir(parents=True)

    if make_temp_conf:
        fd, temp_path = tempfile.mkstemp(prefix="det-deploy-local-master-config-")
        with open(fd, "w") as f:
            yaml.dump(master_conf, f)
        master_config_path = Path(temp_path)

    # This is always true by now, but mypy needs help.
    assert master_config_path is not None

    env["DET_MASTER_CONFIG"] = str(master_config_path.resolve())

    master_down(master_name, delete_db)
    docker_compose(command, master_name, env)
    _wait_for_master("localhost", port, cluster_name)
Ejemplo n.º 14
0
def load_config(config_path: str) -> Any:
    with open(config_path) as f:
        config = yaml.safe_load(f)
    return config
Ejemplo n.º 15
0
def set_template(args: Namespace) -> None:
    with args.template_file:
        body = yaml.safe_load(args.template_file)
        api.put(args.master, path="templates/" + args.template_name, body=body)
        print(colored("Set template {}".format(args.template_name), "green"))
Ejemplo n.º 16
0
def test_schemas(test_case: str) -> None:
    cases_file, case_name = test_case.split("::", 1)
    with open(os.path.join(CASES_ROOT, cases_file)) as f:
        cases = yaml.safe_load(f)
    for case in cases:
        Case(**case).run()