Example #1
0
def test_create_test_mode() -> None:
    # test-mode should succeed with a valid experiment.
    command = [
        "det",
        "-m",
        conf.make_master_url(),
        "experiment",
        "create",
        "--test-mode",
        conf.fixtures_path("mnist_pytorch/adaptive_short.yaml"),
        conf.official_examples_path("mnist_pytorch"),
    ]
    output = subprocess.check_output(command, universal_newlines=True)
    assert "Model definition test succeeded" in output

    # test-mode should fail when an error is introduced into the trial
    # implementation.
    command = [
        "det",
        "-m",
        conf.make_master_url(),
        "experiment",
        "create",
        "--test-mode",
        conf.fixtures_path("trial_error/const.yaml"),
        conf.fixtures_path("trial_error"),
    ]
    with pytest.raises(subprocess.CalledProcessError):
        subprocess.check_call(command)
Example #2
0
def test_experiment_delete() -> None:
    subprocess.check_call(
        ["det", "-m", conf.make_master_url(), "user", "whoami"])

    experiment_id = exp.run_basic_test(conf.fixtures_path("no_op/single.yaml"),
                                       conf.fixtures_path("no_op"), 1)

    subprocess.check_call(
        [
            "det", "-m",
            conf.make_master_url(), "experiment", "delete",
            str(experiment_id), "--yes"
        ],
        env={
            **os.environ, "DET_ADMIN": "1"
        },
    )

    # "det experiment describe" call should fail, because the
    # experiment is no longer in the database.
    with pytest.raises(subprocess.CalledProcessError):
        subprocess.check_call([
            "det", "-m",
            conf.make_master_url(), "experiment", "describe",
            str(experiment_id)
        ])
Example #3
0
def test_labels() -> None:
    experiment_id = exp.create_experiment(
        conf.fixtures_path("no_op/single-one-short-step.yaml"),
        conf.fixtures_path("no_op"), None)

    label = "__det_test_dummy_label__"

    # Add a label and check that it shows up.
    subprocess.check_call([
        "det", "-m",
        conf.make_master_url(), "e", "label", "add",
        str(experiment_id), label
    ])
    output = subprocess.check_output([
        "det", "-m",
        conf.make_master_url(), "e", "describe",
        str(experiment_id)
    ]).decode()
    assert label in output

    # Remove the label and check that it doesn't show up.
    subprocess.check_call([
        "det", "-m",
        conf.make_master_url(), "e", "label", "remove",
        str(experiment_id), label
    ])
    output = subprocess.check_output([
        "det", "-m",
        conf.make_master_url(), "e", "describe",
        str(experiment_id)
    ]).decode()
    assert label not in output
Example #4
0
def run_list_cli_tests(experiment_id: int) -> None:
    """
    Runs list-related CLI commands on a finished experiment. Will raise an
    exception if the CLI command encounters a traceback failure.
    """

    subprocess.check_call([
        "det", "-m",
        conf.make_master_url(), "experiment", "list-trials",
        str(experiment_id)
    ])

    subprocess.check_call([
        "det", "-m",
        conf.make_master_url(), "experiment", "list-checkpoints",
        str(experiment_id)
    ])
    subprocess.check_call([
        "det",
        "-m",
        conf.make_master_url(),
        "experiment",
        "list-checkpoints",
        "--best",
        str(1),
        str(experiment_id),
    ])
Example #5
0
def change_experiment_state(experiment_id: int, new_state: str) -> None:
    auth.initialize_session(conf.make_master_url(), try_reauth=True)
    r = api.patch(
        conf.make_master_url(),
        "experiments/{}".format(experiment_id),
        headers={"Content-Type": "application/merge-patch+json"},
        body={"state": new_state},
    )
    assert r.status_code == requests.codes.no_content, r.text
Example #6
0
def get_num_running_commands() -> int:
    auth.initialize_session(conf.make_master_url(), try_reauth=True)
    r = api.get(conf.make_master_url(), "commands")
    assert r.status_code == requests.codes.ok, r.text

    return len([
        command for _id, command in r.json().items()
        if command["state"] == "RUNNING"
    ])
Example #7
0
def cluster_slots() -> Dict[str, Any]:
    """
    cluster_slots returns a dict of slots that each agent has.
    :return:  Dict[AgentID, List[Slot]]
    """
    auth.initialize_session(conf.make_master_url(), try_reauth=True)
    r = api.get(conf.make_master_url(), "agents")
    assert r.status_code == requests.codes.ok, r.text
    json = r.json()  # type: Dict[str, Any]
    return {agent["id"]: agent["slots"].values() for agent in json.values()}
Example #8
0
def test_experiment_archive_unarchive() -> None:
    experiment_id = exp.create_experiment(
        conf.fixtures_path("no_op/single.yaml"), conf.fixtures_path("no_op"),
        ["--paused"])

    describe_args = [
        "det",
        "-m",
        conf.make_master_url(),
        "experiment",
        "describe",
        "--json",
        str(experiment_id),
    ]

    # Check that the experiment is initially unarchived.
    infos = json.loads(subprocess.check_output(describe_args))
    assert len(infos) == 1
    assert not infos[0]["archived"]

    # Check that archiving a non-terminal experiment fails, then terminate it.
    with pytest.raises(subprocess.CalledProcessError):
        subprocess.check_call([
            "det", "-m",
            conf.make_master_url(), "experiment", "archive",
            str(experiment_id)
        ])
    subprocess.check_call([
        "det", "-m",
        conf.make_master_url(), "experiment", "cancel",
        str(experiment_id)
    ])

    # Check that we can archive and unarchive the experiment and see the expected effects.
    subprocess.check_call([
        "det", "-m",
        conf.make_master_url(), "experiment", "archive",
        str(experiment_id)
    ])
    infos = json.loads(subprocess.check_output(describe_args))
    assert len(infos) == 1
    assert infos[0]["archived"]

    subprocess.check_call([
        "det", "-m",
        conf.make_master_url(), "experiment", "unarchive",
        str(experiment_id)
    ])
    infos = json.loads(subprocess.check_output(describe_args))
    assert len(infos) == 1
    assert not infos[0]["archived"]
Example #9
0
def test_configs(tmp_path: Path) -> None:
    with FileTree(
            tmp_path,
        {
            "config.yaml":
            """
resources:
  slots: 1
environment:
  environment_variables:
   - TEST=TEST
"""
        },
    ) as tree:
        config_path = tree.joinpath("config.yaml")
        _run_and_verify_exit_code_zero([
            "det",
            "-m",
            conf.make_master_url(),
            "cmd",
            "run",
            "--config-file",
            str(config_path),
            "python",
            "-c",
            """
import os
test = os.environ["TEST"]
if test != "TEST":
    print("{} != {}".format(test, "TEST"))
    sys.exit(1)
""",
        ])
Example #10
0
def activate_experiment(experiment_id: int) -> None:
    command = [
        "det", "-m",
        conf.make_master_url(), "experiment", "activate",
        str(experiment_id)
    ]
    subprocess.check_call(command)
Example #11
0
def maybe_create_experiment(
        config_file: str,
        model_def_file: str,
        create_args: Optional[List[str]] = None
) -> subprocess.CompletedProcess:
    command = [
        "det",
        "-m",
        conf.make_master_url(),
        "experiment",
        "create",
        config_file,
        model_def_file,
    ]

    if create_args is not None:
        command += create_args

    env = os.environ.copy()
    env["DET_DEBUG"] = "true"

    return subprocess.run(
        command,
        universal_newlines=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        env=env,
    )
Example #12
0
def test_basic_workflows(tmp_path: Path) -> None:
    with FileTree(tmp_path, {"hello.py": "print('hello world')"}) as tree:
        _run_and_verify_exit_code_zero([
            "det",
            "-m",
            conf.make_master_url(),
            "cmd",
            "run",
            "--context",
            str(tree),
            "python",
            "hello.py",
        ])

    with FileTree(tmp_path, {"hello.py": "print('hello world')"}) as tree:
        link = tree.joinpath("hello-link.py")
        link.symlink_to(tree.joinpath("hello.py"))
        _run_and_verify_exit_code_zero([
            "det",
            "-m",
            conf.make_master_url(),
            "cmd",
            "run",
            "--context",
            str(tree),
            "python",
            "hello-link.py",
        ])

    _run_and_verify_exit_code_zero([
        "det", "-m",
        conf.make_master_url(), "cmd", "run", "python", "-c",
        "print('hello world')"
    ])

    with pytest.raises(subprocess.CalledProcessError):
        _run_and_return_real_exit_status([
            "det",
            "-m",
            conf.make_master_url(),
            "cmd",
            "run",
            "--context",
            "non-existent-path-here",
            "python",
            "hello.py",
        ])
Example #13
0
def experiment_has_active_workload(experiment_id: int) -> bool:
    r = api.get(conf.make_master_url(), "tasks").json()
    for task in r.values():
        if "Experiment {}".format(experiment_id) in task["name"] and len(
                task["containers"]) > 0:
            return True

    return False
Example #14
0
def get_command_config(command_type: str, id: str) -> str:
    assert command_type in ["command", "notebook", "shell"]
    command = ["det", "-m", conf.make_master_url(), command_type, "config", id]
    completed_process = subprocess.run(command,
                                       universal_newlines=True,
                                       stdout=subprocess.PIPE)
    assert completed_process.returncode == 0
    return str(completed_process.stdout)
Example #15
0
def test_exit_code_reporting() -> None:
    """
    Confirm that failed commands are not reported as successful, and confirm
    that our test infrastructure is valid.
    """
    with pytest.raises(AssertionError):
        _run_and_verify_exit_code_zero(
            ["det", "-m",
             conf.make_master_url(), "cmd", "run", "false"])
Example #16
0
def test_large_uploads(tmp_path: Path) -> None:
    with pytest.raises(subprocess.CalledProcessError):
        with FileTree(tmp_path, {"hello.py": "print('hello world')"}) as tree:
            large = tree.joinpath("large-file.bin")
            large.touch()
            f = large.open(mode="w")
            f.seek(1024 * 1024 * 120)
            f.write("\0")
            f.close()

            _run_and_return_real_exit_status([
                "det",
                "-m",
                conf.make_master_url(),
                "cmd",
                "run",
                "--context",
                str(tree),
                "python",
                "hello.py",
            ])

    with FileTree(tmp_path, {
            "hello.py": "print('hello world')",
            ".detignore": "*.bin"
    }) as tree:
        large = tree.joinpath("large-file.bin")
        large.touch()
        f = large.open(mode="w")
        f.seek(1024 * 1024 * 120)
        f.write("\0")
        f.close()

        _run_and_verify_exit_code_zero([
            "det",
            "-m",
            conf.make_master_url(),
            "cmd",
            "run",
            "--context",
            str(tree),
            "python",
            "hello.py",
        ])
Example #17
0
def test_iris() -> None:
    config = conf.load_config(
        conf.official_examples_path("iris_tf_keras/const.yaml"))
    config = conf.set_max_steps(config, 2)

    exp_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("iris_tf_keras"), 1)
    exp_ref = Determined(conf.make_master_url()).get_experiment(exp_id)
    model = exp_ref.top_checkpoint().load()
    model.summary()
Example #18
0
def test_pytorch_load() -> None:
    config = conf.load_config(
        conf.fixtures_path("mnist_pytorch/const-pytorch11.yaml"))

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("mnist_pytorch"), 1)

    nn = (Determined(conf.make_master_url()).get_experiment(
        experiment_id).top_checkpoint().load(map_location=torch.device("cpu")))
    assert isinstance(nn, torch.nn.Module)
Example #19
0
def maybe_set_template(template_name: str, template_file: str) -> subprocess.CompletedProcess:
    command = [
        "det",
        "-m",
        conf.make_master_url(),
        "template",
        "set",
        template_name,
        os.path.join(os.path.dirname(__file__), template_file),
    ]
    return subprocess.run(command, universal_newlines=True, stdout=subprocess.PIPE)
Example #20
0
def test_mnist_estimator_load() -> None:
    config = conf.load_config(
        conf.fixtures_path("mnist_estimator/single.yaml"))
    config = conf.set_tf1_image(config)
    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("mnist_estimator"), 1)

    trials = exp.experiment_trials(experiment_id)
    model = Determined(conf.make_master_url()).get_trial(
        trials[0].id).top_checkpoint().load()
    assert isinstance(model, AutoTrackable)
Example #21
0
def test_pytorch_cifar10_const() -> None:
    config = conf.load_config(
        conf.official_examples_path("cifar10_cnn_pytorch/const.yaml"))
    config = conf.set_max_steps(config, 2)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("cifar10_cnn_pytorch"), 1)
    trials = exp.experiment_trials(experiment_id)
    nn = (Determined(conf.make_master_url()).get_trial(
        trials[0].id).select_checkpoint(latest=True).load(
            map_location=torch.device("cpu")))
    assert isinstance(nn, torch.nn.Module)
Example #22
0
def run_describe_cli_tests(experiment_id: int) -> None:
    """
    Runs `det experiment describe` CLI command on a finished
    experiment. Will raise an exception if `det experiment describe`
    encounters a traceback failure.
    """
    # "det experiment describe" without metrics.
    with tempfile.TemporaryDirectory() as tmpdir:
        subprocess.check_call([
            "det",
            "-m",
            conf.make_master_url(),
            "experiment",
            "describe",
            str(experiment_id),
            "--outdir",
            tmpdir,
        ])

        assert os.path.exists(os.path.join(tmpdir, "experiments.csv"))
        assert os.path.exists(os.path.join(tmpdir, "steps.csv"))
        assert os.path.exists(os.path.join(tmpdir, "trials.csv"))

    # "det experiment describe" with metrics.
    with tempfile.TemporaryDirectory() as tmpdir:
        subprocess.check_call([
            "det",
            "-m",
            conf.make_master_url(),
            "experiment",
            "describe",
            str(experiment_id),
            "--metrics",
            "--outdir",
            tmpdir,
        ])

        assert os.path.exists(os.path.join(tmpdir, "experiments.csv"))
        assert os.path.exists(os.path.join(tmpdir, "steps.csv"))
        assert os.path.exists(os.path.join(tmpdir, "trials.csv"))
Example #23
0
def test_absolute_bind_mount(tmp_path: Path) -> None:
    _run_and_verify_exit_code_zero([
        "det",
        "-m",
        conf.make_master_url(),
        "cmd",
        "run",
        "--volume",
        "/bin:/foo-bar",
        "ls",
        "/foo-bar",
    ])

    with FileTree(
            tmp_path,
        {
            "config.yaml":
            """
bind_mounts:
- host_path: /bin
  container_path: /foo-bar
"""
        },
    ) as tree:
        config_path = tree.joinpath("config.yaml")
        _run_and_verify_exit_code_zero([
            "det",
            "-m",
            conf.make_master_url(),
            "cmd",
            "run",
            "--volume",
            "/bin:/foo-bar2",
            "--config-file",
            str(config_path),
            "ls",
            "/foo-bar",
            "/foo-bar2",
        ])
Example #24
0
def test_end_to_end_adaptive() -> None:
    exp_id = exp.run_basic_test(
        conf.fixtures_path("mnist_pytorch/adaptive_short.yaml"),
        conf.official_examples_path("mnist_pytorch"),
        None,
    )

    # Check that validation accuracy look sane (more than 93% on MNIST).
    trials = exp.experiment_trials(exp_id)
    best = None
    for trial in trials:
        assert len(trial.steps)
        last_step = trial.steps[-1]
        accuracy = last_step.validation.metrics["validation_metrics"][
            "accuracy"]
        if not best or accuracy > best:
            best = accuracy

    assert best is not None
    assert best > 0.93

    # Check that ExperimentReference returns a sorted order of top checkpoints
    # without gaps. The top 2 checkpoints should be the first 2 of the top k
    # checkpoints if sorting is stable.
    exp_ref = Determined(conf.make_master_url()).get_experiment(exp_id)

    top_2 = exp_ref.top_n_checkpoints(2)
    top_k = exp_ref.top_n_checkpoints(len(trials))

    top_2_uuids = [c.uuid for c in top_2]
    top_k_uuids = [c.uuid for c in top_k]

    assert top_2_uuids == top_k_uuids[:2]

    # Check that metrics are truly in sorted order.
    metrics = [
        c.validation.metrics["validation_metrics"]["validation_loss"]
        for c in top_k
    ]

    assert metrics == sorted(metrics)

    # Check that changing smaller is better reverses the checkpoint ordering.
    top_k_reversed = exp_ref.top_n_checkpoints(len(trials),
                                               sort_by="validation_loss",
                                               smaller_is_better=False)
    top_k_reversed_uuids = [c.uuid for c in top_k_reversed]

    assert top_k_uuids == top_k_reversed_uuids[::-1]
Example #25
0
def get_command_config(command_type: str, id: str) -> str:
    assert command_type in ["command", "notebook", "shell"]
    command = ["det", "-m", conf.make_master_url(), command_type, "config", id]
    env = os.environ.copy()
    env["DET_DEBUG"] = "true"
    completed_process = subprocess.run(
        command,
        universal_newlines=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        env=env,
    )
    assert completed_process.returncode == 0, "\nstdout:\n{} \nstderr:\n{}".format(
        completed_process.stdout, completed_process.stderr)
    return str(completed_process.stdout)
Example #26
0
def maybe_create_native_experiment(context_dir: str,
                                   command: List[str]) -> Optional[int]:
    target_env = os.environ.copy()
    target_env["DET_MASTER"] = conf.make_master_url()

    with subprocess.Popen(command,
                          stdin=subprocess.PIPE,
                          stdout=subprocess.PIPE,
                          cwd=context_dir,
                          env=target_env) as p:
        for line in p.stdout:
            m = re.search(r"Created experiment (\d+)\n", line.decode())
            if m is not None:
                return int(m.group(1))

    return None
Example #27
0
def maybe_create_experiment(implementation: NativeImplementation) -> typing.Optional[int]:
    logging.debug(implementation)

    target_env = os.environ.copy()
    target_env["DET_MASTER"] = conf.make_master_url()

    with subprocess.Popen(
        implementation.command + ["--config", json.dumps(implementation.configuration)],
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        cwd=implementation.cwd,
        env=target_env,
    ) as p:
        for line in p.stdout:
            m = re.search(r"Created experiment (\d+)\n", line.decode())
            if m is not None:
                return int(m.group(1))

    return None
Example #28
0
def test_image_pull_after_remove() -> None:
    """
    Remove pulled image and verify that it will be pulled again with auth.
    """
    client = docker.from_env()
    try:
        client.images.remove("alpine:3.10")
    except docker.errors.ImageNotFound:
        pass

    _run_and_verify_exit_code_zero([
        "det",
        "-m",
        conf.make_master_url(),
        "cmd",
        "run",
        "--config",
        "environment.image=alpine:3.10",
        "sleep 3; echo hello world",
    ])
Example #29
0
def maybe_create_experiment(
        config_file: str,
        model_def_file: str,
        create_args: Optional[List[str]] = None
) -> subprocess.CompletedProcess:
    command = [
        "det",
        "-m",
        conf.make_master_url(),
        "experiment",
        "create",
        config_file,
        model_def_file,
    ]

    if create_args is not None:
        command += create_args

    return subprocess.run(command,
                          universal_newlines=True,
                          stdout=subprocess.PIPE)
Example #30
0
def query() -> api.GraphQLQuery:
    auth.initialize_session(conf.make_master_url(), try_reauth=True)
    return api.GraphQLQuery(conf.make_master_url())