class TestObservations:
    observations = {
        utils.gen_str(): [utils.gen_str(), utils.gen_str()],
        utils.gen_str(): [utils.gen_int(), utils.gen_int()],
        utils.gen_str(): [utils.gen_float(),
                          utils.gen_float()],
    }

    def test_single(self, experiment_run):
        for key, vals in six.viewitems(self.observations):
            for val in vals:
                experiment_run.log_observation(key, val)

        with pytest.raises(KeyError):
            experiment_run.get_observation(utils.gen_str())

        for key, val in six.viewitems(self.observations):
            assert [
                obs_val for obs_val, _ in experiment_run.get_observation(key)
            ] == val

        assert {
            key: [obs_val for obs_val, _ in obs_seq]
            for key, obs_seq in experiment_run.get_observations().items()
        } == self.observations

    def test_log_collection(self, experiment_run):
        with pytest.raises(TypeError):  # single fn, list
            experiment_run.log_observation(utils.gen_str(), utils.gen_list())

        with pytest.raises(TypeError):  # single fn, dict
            experiment_run.log_observation(utils.gen_str(), utils.gen_dict())
    def test_log_collection(self, experiment_run):
        with pytest.raises(TypeError):  # single fn, list
            experiment_run.log_metric(utils.gen_str(), utils.gen_list())

        with pytest.raises(TypeError):  # batch fn, list
            experiment_run.log_metrics({utils.gen_str(): utils.gen_list()})

        with pytest.raises(TypeError):  # single fn, dict
            experiment_run.log_metric(utils.gen_str(), utils.gen_dict())

        with pytest.raises(TypeError):  # batch fn, dict
            experiment_run.log_metrics({utils.gen_str(): utils.gen_dict()})
    def test_atomic(self, experiment_run):
        """Test if batch completely fails even if only a single key conflicts."""
        experiment_run.log_metrics(self.metrics)

        for key, val in six.viewitems(self.metrics):
            with pytest.raises(ValueError):
                experiment_run.log_metrics({
                    key: val,
                    utils.gen_str(): utils.gen_str(),
                })

        assert experiment_run.get_metrics() == self.metrics
Beispiel #4
0
    def test_creation_from_scratch(self, client):
        name = utils.gen_str()
        dataset = client.set_dataset(name=name, type="local")

        version = dataset.create_version(__file__)
        assert version.dataset_type == _DatasetService.DatasetTypeEnum.PATH
        assert version.id
Beispiel #5
0
    def run_fake_experiment(client):
        run = client.set_experiment_run()

        run.log_attribute("is_test", True)
        run.get_attribute("is_test")

        run.log_hyperparameters({
            'C': utils.gen_float(),
            'solver': utils.gen_str(),
            'max_iter': utils.gen_int(),
        })
        run.get_hyperparameter("C")
        run.get_hyperparameter("solver")
        run.get_hyperparameter("max_iter")

        run.log_observation("rand_val", utils.gen_float())
        run.log_observation("rand_val", utils.gen_float())
        run.log_observation("rand_val", utils.gen_float())
        run.get_observation("rand_val")

        run.log_metric("val_acc", utils.gen_float())
        run.get_metric("val_acc")

        run.log_artifact("self", run)
        run.get_artifact("self")
Beispiel #6
0
    def test_s3_dataset_version_creation(self, client, s3_bucket):
        name = utils.gen_str()
        dataset = client.create_s3_dataset("s3-" + name)
        dataset_version = client.create_s3_dataset_version(dataset,
            s3_bucket)

        assert len(dataset_version.dataset_version_info.dataset_part_infos) == 2
Beispiel #7
0
    def test_big_query_dataset_version_creation(self, client, big_query_job):
        name = utils.gen_str()
        dataset = client.create_big_query_dataset("bq-" + name)
        dataset_version = client.create_big_query_dataset_version(dataset,
            job_id=big_query_job[0], location=big_query_job[1])

        assert dataset_version.dataset_version_info.query == big_query_job[2]
Beispiel #8
0
 def test_s3_dataset_creation(self, client):
     try:
         name = utils.gen_str()
         dataset = client.set_dataset("s3-" + name, type="s3")
         assert dataset.dataset_type == _DatasetService.DatasetTypeEnum.PATH
     except botocore.exceptions.ClientError:
         pytest.skip("insufficient AWS credentials")
Beispiel #9
0
    def test_find_datasets_client_api(self, client):
        name1 = utils.gen_str()
        dataset1 = client.set_dataset(
            name=name1,
            type="big query",
            tags=["test1-" + name1, "test2-" + name1])
        assert dataset1.dataset_type == _DatasetService.DatasetTypeEnum.QUERY
        assert dataset1.id

        name2 = utils.gen_str()
        dataset2 = client.set_dataset(name=name2, type="s3", tags=["test1"])
        assert dataset2.dataset_type == _DatasetService.DatasetTypeEnum.PATH
        assert dataset2.id

        # TODO: update once RAW is supported
        # name = utils.gen_str()
        # dataset3 = client.set_dataset(name=name, type="raw")
        # assert dataset3.dataset_type == _DatasetService.DatasetTypeEnum.RAW
        # assert dataset3.id

        # datasets = client.find_datasets()
        # assert len(datasets) == 3
        # assert datasets[0].id == dataset1.id
        # assert datasets[1].id == dataset2.id
        # assert datasets[2].id == dataset3.id

        datasets = client.find_datasets()
        assert len(
            datasets
        ) >= 2  # at least 2 because they were just created. Needs to be updated

        datasets = client.find_datasets(
            tags=["test1-" + name1, "test2-" + name1])
        assert len(datasets) == 1
        assert datasets[0].id == dataset1.id

        datasets = client.find_datasets(name=name1)
        assert len(datasets) == 1
        assert datasets[0].id == dataset1.id

        datasets = client.find_datasets(dataset_ids=[dataset1.id, dataset2.id])
        assert len(datasets) == 2

        datasets = client.find_datasets(dataset_ids=[dataset1.id, dataset2.id],
                                        name=name1)
        assert len(datasets) == 1
        assert datasets[0].id == dataset1.id
Beispiel #10
0
    def test_store_plt(self, experiment_run):
        key = utils.gen_str()
        plt.scatter(*np.random.random((2, 10)))

        experiment_run.log_image(key, plt)
        assert np.array_equal(
            np.asarray(experiment_run.get_image(key).getdata()),
            np.asarray(self.matplotlib_to_pil(plt).getdata()))
Beispiel #11
0
 def test_creation_from_scratch(self, client):
     name = utils.gen_str()
     dataset = Dataset(client._conn,
                       client._conf,
                       name=name,
                       dataset_type=_DatasetService.DatasetTypeEnum.PATH)
     assert dataset.dataset_type == _DatasetService.DatasetTypeEnum.PATH
     assert dataset.id
Beispiel #12
0
    def test_empty(self, experiment_run):
        artifacts = {
            utils.gen_str(): six.BytesIO(),
        }

        for key, artifact in six.viewitems(artifacts):
            with pytest.raises(ValueError):
                experiment_run.log_artifact(key, artifact)
Beispiel #13
0
    def test_filesystem_dataset_version_creation(self, client):
        dir_name, _ = self.create_dir_with_files(num_files=3)
        name = utils.gen_str()
        dataset = client.set_dataset("fs-" + name, type="local")
        dataset_version = dataset.create_version(dir_name)

        assert len(
            dataset_version.dataset_version_info.dataset_part_infos) == 3
        shutil.rmtree(dir_name)
Beispiel #14
0
    def test_get_all_datasets_client_api(self, client):
        name = utils.gen_str()
        dataset1 = client.create_dataset(name=name,
                                         dataset_type=_DatasetService.DatasetTypeEnum.QUERY)
        assert dataset1.dataset_type == _DatasetService.DatasetTypeEnum.QUERY
        assert dataset1.id

        name = utils.gen_str()
        dataset2 = client.create_dataset(name=name,
                                         dataset_type=_DatasetService.DatasetTypeEnum.PATH)
        assert dataset2.dataset_type == _DatasetService.DatasetTypeEnum.PATH
        assert dataset2.id

        name = utils.gen_str()
        dataset3 = client.create_dataset(name=name,
                                         dataset_type=_DatasetService.DatasetTypeEnum.RAW)
        assert dataset3.dataset_type == _DatasetService.DatasetTypeEnum.RAW
        assert dataset3.id
Beispiel #15
0
    def test_store_fig(self, experiment_run):
        key = utils.gen_str()
        fig, ax = plt.subplots()
        ax.scatter(*np.random.random((2, 10)))

        experiment_run.log_image(key, fig)
        assert np.array_equal(
            np.asarray(experiment_run.get_image(key).getdata()),
            np.asarray(self.matplotlib_to_pil(fig).getdata()))
Beispiel #16
0
    def test_creation_by_id_client_api(self, client):
        name = utils.gen_str()
        dataset = client.set_dataset(name=name, type="s3")
        assert dataset.dataset_type == _DatasetService.DatasetTypeEnum.PATH
        assert dataset.id

        same_dataset = client.set_dataset(id=dataset.id)
        assert dataset.id == same_dataset.id
        assert dataset.name == same_dataset.name
Beispiel #17
0
    def test_log_dataset_version(self, client, experiment_run, s3_bucket):
        name = utils.gen_str()
        dataset = client.create_s3_dataset("s3-" + name)
        assert dataset.dataset_type == _DatasetService.DatasetTypeEnum.PATH

        dataset_version = client.create_s3_dataset_version(dataset, s3_bucket)
        experiment_run.log_dataset_version('train', dataset_version)

        _, linked_id = experiment_run.get_dataset('train')
        assert linked_id == dataset_version.id
Beispiel #18
0
    def test_s3_dataset_version_creation(self, client, s3_bucket):
        try:
            name = utils.gen_str()
            dataset = client.set_dataset("s3-" + name, type="s3")
            dataset_version = dataset.create_version(s3_bucket)

            assert len(
                dataset_version.dataset_version_info.dataset_part_infos) >= 1
        except botocore.exceptions.ClientError:
            pytest.skip("insufficient AWS credentials")
Beispiel #19
0
    def test_creation_by_id(self, client):
        name = utils.gen_str()
        dataset = client.set_dataset(name=name, type="local")

        version = dataset.create_version(__file__)
        assert version.dataset_type == _DatasetService.DatasetTypeEnum.PATH
        assert version.id

        same_version = client.get_dataset_version(id=version.id)
        assert version.id == same_version.id
Beispiel #20
0
    def test_creation_from_scratch(self, client):
        name = utils.gen_str()
        dataset = client.create_dataset(name=name,
                                        dataset_type=_DatasetService.DatasetTypeEnum.PATH)

        version = client.create_dataset_version(dataset=dataset,
                                                dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(),
                                                dataset_type=_DatasetService.DatasetTypeEnum.PATH)
        assert version.dataset_type == _DatasetService.DatasetTypeEnum.PATH
        assert version.id
Beispiel #21
0
    def test_creation_by_id(self, client):
        name = utils.gen_str()
        dataset = Dataset(client._conn, client._conf,
                          name=name, dataset_type=_DatasetService.DatasetTypeEnum.PATH)
        assert dataset.dataset_type == _DatasetService.DatasetTypeEnum.PATH
        assert dataset.id

        same_dataset = Dataset(client._conn, client._conf,
                               _dataset_id=dataset.id)
        assert dataset.id == same_dataset.id
Beispiel #22
0
    def test_get_dataset_client_api(self, client):
        name = utils.gen_str()
        dataset = client.create_dataset(name=name,
                                        dataset_type=_DatasetService.DatasetTypeEnum.PATH)
        assert dataset.dataset_type == _DatasetService.DatasetTypeEnum.PATH
        assert dataset.id

        same_dataset = client.get_dataset(id=dataset.id)
        assert dataset.id == same_dataset.id
        assert dataset.name == same_dataset.name
Beispiel #23
0
 def test_creation_from_scratch(self, client):
     name = utils.gen_str()
     dataset = Dataset(client._conn, client._conf,
                       name=name, dataset_type=_DatasetService.DatasetTypeEnum.PATH)
     version = DatasetVersion(client._conn, client._conf,
                              dataset_id=dataset.id,
                              dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(),
                              dataset_type=_DatasetService.DatasetTypeEnum.PATH)
     assert version.dataset_type == _DatasetService.DatasetTypeEnum.PATH
     assert version.id
    def test_batch(self, experiment_run):
        experiment_run.log_hyperparameters(self.hyperparameters)

        with pytest.raises(KeyError):
            experiment_run.get_hyperparameter(utils.gen_str())

        for key, val in six.viewitems(self.hyperparameters):
            assert experiment_run.get_hyperparameter(key) == val

        assert experiment_run.get_hyperparameters() == self.hyperparameters
Beispiel #25
0
    def test_get(self, experiment_run):
        artifacts = {
            utils.gen_str():
            {utils.gen_str(): utils.gen_str()
             for _ in range(6)},
            utils.gen_str():
            {utils.gen_str(): utils.gen_str()
             for _ in range(6)},
            utils.gen_str():
            {utils.gen_str(): utils.gen_str()
             for _ in range(6)},
        }

        for key, artifact in six.viewitems(artifacts):
            experiment_run.log_artifact(key, artifact)
        for key, artifact in six.viewitems(artifacts):
            assert experiment_run.get_artifact(key) == artifact
        with pytest.raises(KeyError):
            experiment_run.get_artifact(utils.gen_str())
    def test_batch(self, experiment_run):
        experiment_run.log_metrics(self.metrics)

        with pytest.raises(KeyError):
            experiment_run.get_metric(utils.gen_str())

        for key, val in six.viewitems(self.metrics):
            assert experiment_run.get_metric(key) == val

        assert experiment_run.get_metrics() == self.metrics
Beispiel #27
0
    def test_rdbms_version_creation(self, client):
        name = utils.gen_str()
        dataset = client.set_dataset("pg-" + name, type="postgres")
        dataset_version = dataset.create_version(
            query="SELECT * FROM ner-table",
            db_connection_str="localhost:6543",
            num_records=100)

        assert dataset_version.dataset_version_info.query == "SELECT * FROM ner-table"
        assert dataset_version.dataset_version_info.data_source_uri == "localhost:6543"
        assert dataset_version.dataset_version_info.num_records == 100
    def test_single(self, experiment_run):
        for key, val in six.viewitems(self.attributes):
            experiment_run.log_attribute(key, val)

        with pytest.raises(KeyError):
            experiment_run.get_attribute(utils.gen_str())

        for key, val in six.viewitems(self.attributes):
            assert experiment_run.get_attribute(key) == val

        assert experiment_run.get_attributes() == self.attributes
    def test_log_collection(self, experiment_run):
        # single fn, list
        key, value = utils.gen_str(), utils.gen_list()
        experiment_run.log_attribute(key, value)
        assert experiment_run.get_attribute(key) == value

        # batch fn, list
        key, value = utils.gen_str(), utils.gen_list()
        experiment_run.log_attributes({key: value})
        assert experiment_run.get_attribute(key) == value

        # single fn, dict
        key, value = utils.gen_str(), utils.gen_dict()
        experiment_run.log_attribute(key, value)
        assert experiment_run.get_attribute(key) == value

        # batch fn, dict
        key, value = utils.gen_str(), utils.gen_dict()
        experiment_run.log_attributes({key: value})
        assert experiment_run.get_attribute(key) == value
Beispiel #30
0
    def test_store_pil(self, experiment_run):
        key = utils.gen_str()
        img = PIL.Image.new('RGB', (64, 64), 'white')
        PIL.ImageDraw.Draw(img).arc(
            np.r_[np.random.randint(32, size=(2)),
                  np.random.randint(32, 64, size=(2))].tolist(),
            np.random.randint(360), np.random.randint(360), 'black')

        experiment_run.log_image(key, img)
        assert (np.array_equal(
            np.asarray(experiment_run.get_image(key).getdata()),
            np.asarray(img.getdata())))