예제 #1
0
def test_write_load_project_spec(temp_workdir):
    with open("replicate.yaml", "w") as f:
        f.write("repository: file://.replicate/")

    project = Project()
    project._write_project_spec(version=1234)
    assert project._load_project_spec().version == 1234
예제 #2
0
def test_load_project_spec(temp_workdir):
    with open("replicate.yaml", "w") as f:
        f.write("repository: file://.replicate/")

    os.mkdir(".replicate")
    with open(".replicate/repository.json", "w") as f:
        f.write(
            """{
  "version": 1234
}"""
        )

    project = Project()
    assert project._load_project_spec() == ProjectSpec(version=1234)
예제 #3
0
def test_write_project_spec(temp_workdir):
    with open("replicate.yaml", "w") as f:
        f.write("repository: file://.replicate/")

    project = Project()
    project._write_project_spec(version=1234)

    with open(".replicate/repository.json") as f:
        assert (
            f.read()
            == """{
  "version": 1234
}"""
        )
예제 #4
0
    def test_exceptional_values(self, temp_workdir):
        project = Project()

        with open("replicate.yaml", "w") as f:
            f.write("repository: file://.replicate/")

        experiment = project.experiments.create(disable_heartbeat=True)
        experiment.checkpoint(
            path=None,
            metrics={"accuracy": float("nan")},
            primary_metric=("accuracy", "maximize"),
        )
        experiment.checkpoint(
            path=None,
            metrics={"accuracy": float("-inf")},
            primary_metric=("accuracy", "maximize"),
        )
        experiment.checkpoint(
            path=None,
            metrics={"accuracy": float("+inf")},
            primary_metric=("accuracy", "maximize"),
        )
        experiment.checkpoint(
            path=None,
            metrics={"accuracy": None},
            primary_metric=("accuracy", "maximize"),
        )

        experiment = project.experiments.get(experiment.id)
        assert math.isnan(experiment.checkpoints[0].metrics["accuracy"])
        assert math.isinf(experiment.checkpoints[1].metrics["accuracy"])
        assert experiment.checkpoints[1].metrics["accuracy"] < 0
        assert math.isinf(experiment.checkpoints[2].metrics["accuracy"])
        assert experiment.checkpoints[2].metrics["accuracy"] > 0
        assert experiment.checkpoints[3].metrics["accuracy"] is None
예제 #5
0
    def test_open(self, temp_workdir):
        project = Project()
        with open("foo.txt", "w") as f:
            f.write("foo")

        with open("replicate.yaml", "w") as f:
            f.write("repository: file://.replicate/")

        exp = project.experiments.create(path=".",
                                         params={"foo": "bar"},
                                         disable_heartbeat=True)
        with open("bar.txt", "w") as f:
            f.write("bar")
        chk = exp.checkpoint(path="bar.txt", metrics={"accuracy": "awesome"})

        chk_tar_path = os.path.join(".replicate/checkpoints",
                                    chk.id + ".tar.gz")
        wait(
            lambda: os.path.exists(chk_tar_path),
            timeout_seconds=5,
            sleep_seconds=0.01,
        )
        time.sleep(0.1)  # wait to finish writing

        # test with already existing checkpoint
        assert chk.open("foo.txt").read().decode() == "foo"
        assert chk.open("bar.txt").read().decode() == "bar"

        # test with checkpoint from replicate.experiments.list()
        exp = project.experiments.list()[0]
        chk = exp.checkpoints[0]
        assert chk.open("foo.txt").read().decode() == "foo"
        assert chk.open("bar.txt").read().decode() == "bar"
예제 #6
0
    def test_delete(self, temp_workdir):
        project = Project()

        with open("replicate.yaml", "w") as f:
            f.write("repository: file://.replicate/")

        with open("foo.txt", "w") as f:
            f.write("hello")

        experiment = project.experiments.create(path=".",
                                                params={"foo": "bar"},
                                                disable_heartbeat=True)
        with open("model.txt", "w") as f:
            f.write("i'm a model")
        chk = experiment.checkpoint(path="model.txt",
                                    metrics={"accuracy": "awesome"})

        def get_paths():
            return set(
                str(p).replace(".replicate/", "")
                for p in Path(".replicate").rglob("*"))

        chk_tar_path = os.path.join(".replicate/checkpoints",
                                    chk.id + ".tar.gz")
        wait(
            lambda: os.path.exists(chk_tar_path),
            timeout_seconds=5,
            sleep_seconds=0.01,
        )

        paths = get_paths()
        expected = set([
            "repository.json",
            "metadata/experiments/{}.json".format(experiment.id),
            "experiments",
            "checkpoints/{}.tar.gz".format(chk.id),
            "metadata",
            "metadata/experiments",
            "experiments/{}.tar.gz".format(experiment.id),
            "checkpoints",
        ])
        assert paths == expected

        experiment.delete()

        paths = get_paths()
        expected = set([
            "repository.json",  # we're not deleting the project spec
            "experiments",
            "metadata",
            "metadata/experiments",
            "checkpoints",
        ])
        assert paths == expected
예제 #7
0
def test_load_corrupted_project_spec(temp_workdir):
    with open("replicate.yaml", "w") as f:
        f.write("repository: file://.replicate/")

    project = Project()
    os.mkdir(".replicate")

    with open(".replicate/repository.json", "w") as f:
        f.write(
            """{
  "version": asdf
}"""
        )

    with pytest.raises(CorruptedProjectSpec):
        project._load_project_spec()

    with open(".replicate/repository.json", "w") as f:
        f.write(
            """{
  "foo": "bar"
}"""
        )

    with pytest.raises(CorruptedProjectSpec):
        project._load_project_spec()
예제 #8
0
    def test_checkpoints(self, temp_workdir):
        project = Project()

        with open("replicate.yaml", "w") as f:
            f.write("repository: file://.replicate/")

        experiment = project.experiments.create(path=None,
                                                params={"foo": "bar"},
                                                disable_heartbeat=True)
        chk1 = experiment.checkpoint(path=None, metrics={"accuracy": "ok"})
        chk2 = experiment.checkpoint(path=None, metrics={"accuracy": "super"})
        assert len(experiment.checkpoints) == 2
        assert experiment.checkpoints[0].id == chk1.id
        assert experiment.checkpoints[1].id == chk2.id
예제 #9
0
    def test_checkpoint_auto_increments_step(self, temp_workdir):
        project = Project()

        with open("replicate.yaml", "w") as f:
            f.write("repository: file://.replicate/")

        experiment = project.experiments.create(path=None,
                                                params={"foo": "bar"},
                                                disable_heartbeat=True)
        chk1 = experiment.checkpoint()
        chk2 = experiment.checkpoint()
        chk3 = experiment.checkpoint(step=10)
        chk4 = experiment.checkpoint()
        assert chk1.step == 0
        assert chk2.step == 1
        assert chk3.step == 10
        assert chk4.step == 11
예제 #10
0
    def test_list_project_options(self, has_repository, has_directory,
                                  has_config, should_error, temp_workdir):
        repo = "file://.replicate/" if has_repository else None
        directory = "." if has_directory else None

        if has_config:
            with open("replicate.yaml", "w") as f:
                f.write("repository: file://.replicate/")

        project = Project(repository=repo, directory=directory)
        if should_error:
            with pytest.raises((ValueError, ConfigNotFoundError)):
                project.experiments.list()
        else:
            exps = project.experiments.list()
            assert isinstance(exps, ExperimentList)
            assert len(exps) == 0
예제 #11
0
    def test_create_project_options(self, has_repository, has_directory,
                                    has_config, should_error, temp_workdir):
        repo = "file://.replicate/" if has_repository else None
        directory = "." if has_directory else None
        expected_type = BrokenExperiment if should_error else Experiment

        if has_config:
            with open("replicate.yaml", "w") as f:
                f.write("repository: file://.replicate/")

        project = Project(repository=repo, directory=directory)
        exp = project.experiments.create(path=".")

        assert isinstance(exp, expected_type)

        # to avoid writing heartbeats that sometimes cause
        # TemporaryDirectory cleanup to fail
        if not should_error:
            exp.stop()
예제 #12
0
    def test_best_none(self, temp_workdir):
        project = Project()

        with open("replicate.yaml", "w") as f:
            f.write("repository: file://.replicate/")

        experiment = project.experiments.create(disable_heartbeat=True)

        experiment.checkpoint(
            path=None,
            metrics={"accuracy": None},
            primary_metric=("accuracy", "maximize"),
        )
        experiment.checkpoint(
            path=None,
            metrics={"accuracy": float("nan")},
            primary_metric=("accuracy", "maximize"),
        )
        assert experiment.best() is None
예제 #13
0
    def test_delete(self, temp_workdir):
        project = Project()

        with open("replicate.yaml", "w") as f:
            f.write("repository: file://.replicate/")

        with open("foo.txt", "w") as f:
            f.write("hello")

        experiment = project.experiments.create(
            path=".", params={"foo": "bar"}, disable_heartbeat=True
        )
        with open("model.txt", "w") as f:
            f.write("i'm a model")
        chk = experiment.checkpoint(path="model.txt", metrics={"accuracy": "awesome"})

        def get_paths():
            return set(
                str(p).replace(".replicate/", "") for p in Path(".replicate").rglob("*")
            )

        paths = get_paths()
        expected = set(
            [
                "metadata/experiments/{}.json".format(experiment.id),
                "experiments",
                "checkpoints/{}.tar.gz".format(chk.id),
                "metadata",
                "metadata/experiments",
                "experiments/{}.tar.gz".format(experiment.id),
                "checkpoints",
            ]
        )
        assert paths == expected

        experiment.delete()

        paths = get_paths()
        expected = set(
            ["experiments", "metadata", "metadata/experiments", "checkpoints",]
        )
        assert paths == expected
예제 #14
0
    def test_create_project_options(self, has_repository, has_directory,
                                    has_config, exception, temp_workdir):
        repo = "file://.replicate/" if has_repository else None
        directory = "." if has_directory else None

        if has_config:
            with open("replicate.yaml", "w") as f:
                f.write("repository: file://.replicate/")

        project = Project(repository=repo, directory=directory)

        if exception:
            with pytest.raises(exception):
                project.experiments.create(path=".")

        else:
            exp = project.experiments.create(path=".")
            # to avoid writing heartbeats that sometimes cause
            # TemporaryDirectory cleanup to fail
            exp.stop()
예제 #15
0
    def test_list(self, temp_workdir):
        project = Project()

        with open("replicate.yaml", "w") as f:
            f.write("repository: file://.replicate/")

        exp1 = project.experiments.create(path=None,
                                          params={"foo": "bar"},
                                          disable_heartbeat=True)
        exp1.checkpoint(path=None, metrics={"accuracy": "wicked"})
        exp2 = project.experiments.create(path=None,
                                          params={"foo": "baz"},
                                          disable_heartbeat=True)

        experiments = project.experiments.list()
        assert len(experiments) == 2
        assert experiments[0].id == exp1.id
        assert len(experiments[0].checkpoints) == 1
        assert experiments[0].checkpoints[0].metrics == {"accuracy": "wicked"}
        assert experiments[1].id == exp2.id
예제 #16
0
    def test_refresh(self, temp_workdir):
        project = Project()

        with open("replicate.yaml", "w") as f:
            f.write("repository: file://.replicate/")

        experiment = project.experiments.create(params={"foo": "bar"},
                                                disable_heartbeat=True)

        experiment.checkpoint(metrics={"accuracy": 0})

        other_experiment = project.experiments.get(experiment.id)
        assert len(other_experiment.checkpoints) == 1

        experiment.checkpoint(metrics={"accuracy": 1})
        assert len(other_experiment.checkpoints) == 1

        other_experiment.refresh()
        assert len(other_experiment.checkpoints) == 2
        assert other_experiment.checkpoints[-1].metrics["accuracy"] == 1
예제 #17
0
    def test_get(self, temp_workdir):
        project = Project()

        with open("replicate.yaml", "w") as f:
            f.write("repository: file://.replicate/")

        exp1 = project.experiments.create(path=None,
                                          params={"foo": "bar"},
                                          disable_heartbeat=True)
        exp1.checkpoint(path=None, metrics={"accuracy": "wicked"})
        exp2 = project.experiments.create(path=None,
                                          params={"foo": "baz"},
                                          disable_heartbeat=True)

        actual_exp = project.experiments.get(exp1.id)
        assert actual_exp.created == exp1.created
        assert len(actual_exp.checkpoints) == 1
        assert actual_exp.checkpoints[0].metrics == {"accuracy": "wicked"}
        # get by prefix
        assert project.experiments.get(exp2.id[:7]).created == exp2.created

        with pytest.raises(DoesNotExistError):
            project.experiments.get("doesnotexist")
예제 #18
0
    def test_open(self, temp_workdir):
        project = Project()
        with open("foo.txt", "w") as f:
            f.write("foo")

        with open("replicate.yaml", "w") as f:
            f.write("repository: file://.replicate/")

        exp = project.experiments.create(path=".",
                                         params={"foo": "bar"},
                                         disable_heartbeat=True)
        with open("bar.txt", "w") as f:
            f.write("bar")
        chk = exp.checkpoint(path="bar.txt", metrics={"accuracy": "awesome"})

        # test with already existing checkpoint
        assert chk.open("foo.txt").read().decode() == "foo"
        assert chk.open("bar.txt").read().decode() == "bar"

        # test with checkpoint from replicate.experiments.list()
        exp = project.experiments.list()[0]
        chk = exp.checkpoints[0]
        assert chk.open("foo.txt").read().decode() == "foo"
        assert chk.open("bar.txt").read().decode() == "bar"
예제 #19
0
import tempfile

from replicate.project import Project
from replicate.repository import repository_for_url

parser = argparse.ArgumentParser(
    description=
    "Create two projects: one with lots of metadata, and another which is the same but with a few new projects and checkpoints to test incremental updates"
)
parser.add_argument("bucket")
parser.add_argument("bucket_prime")
args = parser.parse_args()

with tempfile.TemporaryDirectory() as project_dir:
    print("Creating project...")
    project = Project(dir=project_dir)
    for i in range(1000):
        if i % 10 == 0:
            print("Experiment", i)
        experiment = project.experiments.create(path=None,
                                                params={"foo": "bar"},
                                                quiet=True,
                                                disable_heartbeat=True)
        for j in range(100):
            experiment.checkpoint(path=None,
                                  metrics={"loss": 0.00001},
                                  quiet=True)

    print("Uploading to bucket...")
    repository = repository_for_url(args.bucket)
    repository.put_path(os.path.join(project_dir, ".replicate/"), "")
예제 #20
0
            metrics={"loss": loss.item(), "accuracy": acc},
            primary_metric=("loss", "minimize"),
        )

    experiment.stop()


parser = argparse.ArgumentParser(
    description="Create a project with a bunch of realistic-ish data to test `replicate ls` output and things"
)
parser.add_argument("repository")
args = parser.parse_args()

with tempfile.TemporaryDirectory() as project_dir:
    print("Creating project...")
    project = Project(directory=".", repository=args.repository)
    train(project, learning_rate=0.01, num_epochs=10)
    train(project, learning_rate=0.05, num_epochs=10)
    train(project, learning_rate=0.01, num_epochs=100)
    train(project, learning_rate=0.05, num_epochs=100)
    train(project, learning_rate=0.001, num_epochs=100)
    train(project, learning_rate=0.1, num_epochs=100)
    train(project, learning_rate=0.01, num_epochs=100, hidden_layer_size=30)
    train(project, learning_rate=0.01, num_epochs=100, hidden_layer_size=10)
    train(project, learning_rate=0.01, num_epochs=50, model="bartnet")
    train(project, learning_rate=0.01, num_epochs=50, dropout_rate=0.5, model="homnet")
    train(project, learning_rate=0.01, num_epochs=50, dropout_rate=0.3, model="homnet")
    train(
        project,
        learning_rate=0.01,
        num_epochs=50,
예제 #21
0
    def test_checkout(self, temp_workdir, tmpdir_factory):
        project = Project()
        with open("foo.txt", "w") as f:
            f.write("foo")

        with open("replicate.yaml", "w") as f:
            f.write("repository: file://.replicate/")

        exp = project.experiments.create(path=".",
                                         params={"foo": "bar"},
                                         disable_heartbeat=True)
        with open("bar.txt", "w") as f:
            f.write("bar")
        chk = exp.checkpoint(path="bar.txt", metrics={"accuracy": "awesome"})

        chk_tar_path = os.path.join(".replicate/checkpoints",
                                    chk.id + ".tar.gz")
        wait(
            lambda: os.path.exists(chk_tar_path),
            timeout_seconds=5,
            sleep_seconds=0.01,
        )
        time.sleep(0.1)  # wait to finish writing

        # test with already existing checkpoint
        tmpdir = tmpdir_factory.mktemp("checkout")
        chk.checkout(output_directory=str(tmpdir))
        with open(tmpdir / "foo.txt") as f:
            assert f.read() == "foo"
        with open(tmpdir / "bar.txt") as f:
            assert f.read() == "bar"

        # test with checkpoint from replicate.experiments.list()
        exp = project.experiments.list()[0]
        chk = exp.checkpoints[0]
        tmpdir = tmpdir_factory.mktemp("checkout")
        chk.checkout(output_directory=str(tmpdir))
        with open(tmpdir / "foo.txt") as f:
            assert f.read() == "foo"
        with open(tmpdir / "bar.txt") as f:
            assert f.read() == "bar"

        # test with no paths
        exp = project.experiments.create(params={"foo": "bar"},
                                         disable_heartbeat=True)
        chk = exp.checkpoint(metrics={"accuracy": "awesome"})
        tmpdir = tmpdir_factory.mktemp("checkout")
        with pytest.raises(DoesNotExist):
            chk.checkout(output_directory=str(tmpdir))

        # test experiment with no path
        exp = project.experiments.create(params={"foo": "bar"},
                                         disable_heartbeat=True)
        chk = exp.checkpoint(path="bar.txt", metrics={"accuracy": "awesome"})

        chk_tar_path = os.path.join(".replicate/checkpoints",
                                    chk.id + ".tar.gz")
        wait(
            lambda: os.path.exists(chk_tar_path),
            timeout_seconds=5,
            sleep_seconds=0.01,
        )
        time.sleep(0.1)  # wait to finish writing

        tmpdir = tmpdir_factory.mktemp("checkout")
        chk.checkout(output_directory=str(tmpdir))
        assert not os.path.exists(tmpdir / "foo.txt")
        with open(tmpdir / "bar.txt") as f:
            assert f.read() == "bar"

        # test checkpoint with no path
        exp = project.experiments.create(path="foo.txt",
                                         params={"foo": "bar"},
                                         disable_heartbeat=True)
        chk = exp.checkpoint(metrics={"accuracy": "awesome"})

        exp_tar_path = os.path.join(".replicate/experiments",
                                    exp.id + ".tar.gz")
        wait(
            lambda: os.path.exists(exp_tar_path),
            timeout_seconds=5,
            sleep_seconds=0.01,
        )
        time.sleep(0.1)  # wait to finish writing

        tmpdir = tmpdir_factory.mktemp("checkout")
        chk.checkout(output_directory=str(tmpdir))
        assert not os.path.exists(tmpdir / "bar.txt")
        with open(tmpdir / "foo.txt") as f:
            assert f.read() == "foo"
예제 #22
0
def test_empty_experiment_from_pb():
    exp_pb = empty_experiment_pb()
    project = Project()
    expected = empty_experiment(project)
    assert pb_convert.experiment_from_pb(project, exp_pb) == expected
예제 #23
0
def test_empty_experiment_to_pb():
    project = Project()
    exp = empty_experiment(project)
    expected = empty_experiment_pb()
    assert pb_convert.experiment_to_pb(exp) == expected
예제 #24
0
def test_load_missing_project_spec(temp_workdir):
    with open("replicate.yaml", "w") as f:
        f.write("repository: file://.replicate/")

    project = Project()
    assert project._load_project_spec() is None
예제 #25
0
    def test_checkout(self, temp_workdir, tmpdir_factory):
        project = Project()
        with open("foo.txt", "w") as f:
            f.write("foo")

        with open("replicate.yaml", "w") as f:
            f.write("repository: file://.replicate/")

        exp = project.experiments.create(path=".",
                                         params={"foo": "bar"},
                                         disable_heartbeat=True)
        with open("bar.txt", "w") as f:
            f.write("bar")
        chk = exp.checkpoint(path="bar.txt", metrics={"accuracy": "awesome"})

        # test with already existing checkpoint
        tmpdir = tmpdir_factory.mktemp("checkout")
        chk.checkout(output_directory=str(tmpdir))
        with open(tmpdir / "foo.txt") as f:
            assert f.read() == "foo"
        with open(tmpdir / "bar.txt") as f:
            assert f.read() == "bar"

        # test with checkpoint from replicate.experiments.list()
        exp = project.experiments.list()[0]
        chk = exp.checkpoints[0]
        tmpdir = tmpdir_factory.mktemp("checkout")
        chk.checkout(output_directory=str(tmpdir))
        with open(tmpdir / "foo.txt") as f:
            assert f.read() == "foo"
        with open(tmpdir / "bar.txt") as f:
            assert f.read() == "bar"

        # test with no paths
        exp = project.experiments.create(params={"foo": "bar"},
                                         disable_heartbeat=True)
        chk = exp.checkpoint(metrics={"accuracy": "awesome"})
        tmpdir = tmpdir_factory.mktemp("checkout")
        with pytest.raises(DoesNotExistError):
            chk.checkout(output_directory=str(tmpdir))

        # test experiment with no path
        exp = project.experiments.create(params={"foo": "bar"},
                                         disable_heartbeat=True)
        chk = exp.checkpoint(path="bar.txt", metrics={"accuracy": "awesome"})
        tmpdir = tmpdir_factory.mktemp("checkout")
        chk.checkout(output_directory=str(tmpdir))
        assert not os.path.exists(tmpdir / "foo.txt")
        with open(tmpdir / "bar.txt") as f:
            assert f.read() == "bar"

        # test checkpoint with no path
        exp = project.experiments.create(path="foo.txt",
                                         params={"foo": "bar"},
                                         disable_heartbeat=True)
        chk = exp.checkpoint(metrics={"accuracy": "awesome"})
        tmpdir = tmpdir_factory.mktemp("checkout")
        chk.checkout(output_directory=str(tmpdir))
        assert not os.path.exists(tmpdir / "bar.txt")
        with open(tmpdir / "foo.txt") as f:
            assert f.read() == "foo"