def test_retrieve(self, requests_mock, runner, as_pandas): data_set = create_data_set("data-set-1") data_set.entries[0].id = 1 mock_get_data_set(requests_mock, data_set) output_path = "dataset.json" if not as_pandas else "dataset.csv" arguments = ["retrieve", "--id", data_set.id, "--output", output_path] if as_pandas: arguments.append("--pandas") result = runner.invoke(dataset_cli, arguments) if result.exit_code != 0: raise result.exception if as_pandas: rest_data_set = pandas.read_csv(output_path) assert len(rest_data_set) == len(data_set.entries) else: rest_data_set = DataSet.parse_file(output_path) assert rest_data_set.json().replace("\n", "") == data_set.json()
def test_retrieve_data_sets(self, benchmark, requests_mock): # Mock the data set to retrieve. data_set = create_data_set("data-set-1", 1) mock_get_data_set(requests_mock, data_set) with temporary_cd(): BenchmarkInputFactory._retrieve_data_sets(benchmark, None) assert os.path.isfile("test-set-collection.json") from nonbonded.library.models.datasets import DataSetCollection data_set_collection = DataSetCollection.parse_file( "test-set-collection.json") assert data_set_collection.data_sets[0].json() == data_set.json()
def test_generate_evaluator_target(self, requests_mock): data_set = create_data_set("data-set-1") mock_get_data_set(requests_mock, data_set) target = create_evaluator_target("evaluator-target-1", [data_set.id]) with temporary_cd(): OptimizationInputFactory._generate_evaluator_target( target, 8000, None) assert os.path.isfile("training-set.json") off_data_set = PhysicalPropertyDataSet.from_json( "training-set.json") assert off_data_set.json() == data_set.to_evaluator().json() assert os.path.isfile("options.json")
def test_study_with_children(requests_mock, monkeypatch): # Overwrite the child factories so we don't need to provide # sensible children and wait for them to be buit. def mock_generate(model, **_): os.makedirs(model.id, exist_ok=True) monkeypatch.setattr(OptimizationInputFactory, "generate", mock_generate) monkeypatch.setattr(BenchmarkInputFactory, "generate", mock_generate) mock_get_data_set(requests_mock, create_data_set("data-set-1")) optimization = create_optimization( "project-1", "study-1", "optimization-1", targets=[create_evaluator_target("evaluator-target", ["data-set-1"])], ) benchmark = create_benchmark( "project-1", "study-1", "benchmark-1", ["data-set-1"], None, create_force_field(), ) study = create_study("project-1", "study-1") study.optimizations = [optimization] study.benchmarks = [benchmark] mock_get_study(requests_mock, study) with temporary_cd(): InputFactory.generate(study, "test-env", "12:34", "lilac-dask", 8000, 1, False) assert os.path.isdir(study.id) assert os.path.isdir(os.path.join(study.id, "optimizations")) assert os.path.isdir(os.path.join(study.id, "benchmarks")) assert os.path.isdir(os.path.join(study.id, "optimizations", optimization.id)) assert os.path.isdir(os.path.join(study.id, "benchmarks", benchmark.id))
def test_find_or_retrieve_data_sets( requests_mock, data_set_ids, data_set_type, expected_data_set_ids, expected_raises ): local_data_sets = [ create_data_set("data-set-1", 1), create_qc_data_set("data-set-1"), create_data_set("data-set-2", 2), ] remote_data_sets = [ create_qc_data_set("data-set-2"), create_data_set("data-set-2", 3), create_data_set("data-set-3", 4), ] for remote_data_set in remote_data_sets: if isinstance(remote_data_set, DataSet): mock_get_data_set(requests_mock, remote_data_set) else: mock_get_qc_data_set(requests_mock, remote_data_set) with expected_raises: found_data_sets = InputFactory._find_or_retrieve_data_sets( data_set_ids, data_set_type, local_data_sets ) assert sorted(data_set_ids) == sorted( data_set.id for data_set in found_data_sets ) assert all(isinstance(data_set, data_set_type) for data_set in found_data_sets) found_entry_ids = [ entry.id for data_set in found_data_sets if isinstance(data_set, DataSet) for entry in data_set.entries ] assert found_entry_ids == expected_data_set_ids