def test_interpolation(self): context = Parameters.from_mapping( yaml.safe_load(self.WRITING_REFERENCE)) loader = YAMLParametersLoader() self.assertEqual( loader._interpolate( Parameters.from_mapping( yaml.safe_load(self.MULTIPLE_INTERPOLATION_REFERENCE)), context, )._data, immutabledict([ ("pear", "raspberry"), ("banana", "raspberry"), ("apple", "raspberry"), ("the_ultimate_fruit", "raspberry"), ]), ) self.assertEqual( loader._interpolate( Parameters.from_mapping( yaml.safe_load( self.MULTIPLE_INTERPOLATION_REFERENCE_NEEDING_CONTEXT) ), context, )._data, immutabledict([ ("pear", "raspberry/world"), ("banana", "raspberry/world"), ("apple", "raspberry/world"), ("the_ultimate_fruit", "raspberry/world"), # the actual pair ("hello", "world") should not be present ]), ) self.assertEqual( loader._interpolate( Parameters.from_mapping( yaml.safe_load(self.NESTED_INTERPOLATION)), context, ).as_nested_dicts(), { "key": 2, "key2": "fooo", "key3": { "lalala": "fooo", "meep": 2, "list": [1, 2, 3] }, }, ) with self.assertRaisesRegex( ParameterInterpolationError, r"These interpolated parameters form at least one graph cycle that must be fixed: " r"\('b', 'c'\)", ): loader._interpolate( Parameters.from_mapping( yaml.safe_load('a: "%b%"\nb: "%c%"\nc: "%b%"')), context, )
def test_optional_creatable_file(self): test_dir = Path(tempfile.mkdtemp()).absolute() existing_dir_path = test_dir / "existing_directory" existing_dir_path.mkdir(parents=True, exist_ok=True) non_existing_dir_path = test_dir / "non_existent_directory" a_file = existing_dir_path / "a_file" a_file.touch() non_existing_file = test_dir / "b_file" params = Parameters.from_mapping({ "directory_which_exists": str(existing_dir_path.absolute()), "directory_which_does_not_exist": str(non_existing_dir_path.absolute()), "a_file": str(a_file.absolute()), "non_existing_file": str(non_existing_file.absolute()), }) self.assertEqual(None, params.optional_creatable_file("missing_param")) self.assertEqual( os.path.realpath(non_existing_file), os.path.realpath( params.optional_creatable_file("non_existing_file")), )
def test_simple_dax(tmp_path): params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "partition": "scavenge", }) workflow_builder = WorkflowBuilder.from_parameters(params) assert workflow_builder.name == "Test" assert workflow_builder.created_by == "Testing" assert (workflow_builder._workflow_directory # pylint:disable=protected-access == tmp_path / "working") assert workflow_builder._namespace == "test" # pylint:disable=protected-access assert workflow_builder._default_site == "saga" # pylint:disable=protected-access assert workflow_builder.default_resource_request # pylint:disable=protected-access assert workflow_builder._job_graph is not None # pylint:disable=protected-access
def test_optional_existing_directory(self): test_dir = Path(tempfile.mkdtemp()).absolute() existing_dir_path = test_dir / "existing_directory" existing_dir_path.mkdir(parents=True, exist_ok=True) non_existing_dir_path = test_dir / "non_existent_directory" a_file = test_dir / "a_file" a_file.touch() params = Parameters.from_mapping({ "directory_which_exists": str(existing_dir_path.absolute()), "directory_which_does_not_exist": non_existing_dir_path, "a_file": a_file, }) # noinspection PyTypeChecker self.assertEqual( os.path.realpath(existing_dir_path), os.path.realpath( params.optional_existing_directory("directory_which_exists")), ) self.assertEqual(None, params.optional_existing_directory("missing_param")) with self.assertRaises(ParameterError): params.optional_existing_directory( "directory_which_does_not_exist") with self.assertRaises(ParameterError): params.optional_existing_directory("a_file") shutil.rmtree(test_dir)
def test_optional_creatable_empty_directory(self): test_dir = Path(tempfile.mkdtemp()).absolute() existing_dir_path = test_dir / "existing_directory" existing_dir_path.mkdir(parents=True, exist_ok=True) non_existing_dir_path = test_dir / "non_existent_directory" a_file = existing_dir_path / "a_file" a_file.touch() params = Parameters.from_mapping({ "directory_which_exists": str(existing_dir_path.absolute()), "directory_which_does_not_exist": str(non_existing_dir_path.absolute()), "a_file": a_file, }) self.assertEqual( None, params.optional_creatable_empty_directory("missing_param")) self.assertEqual( os.path.realpath(non_existing_dir_path), os.path.realpath( params.optional_creatable_empty_directory( "directory_which_does_not_exist")), ) with self.assertRaises(ParameterError): params.optional_creatable_empty_directory("a_file") with self.assertRaises(ParameterError): params.optional_creatable_empty_directory("directory_which_exists") self.assertEqual( os.path.realpath(existing_dir_path), os.path.realpath( params.optional_creatable_empty_directory( "directory_which_exists", delete=True)), )
def test_optionals_when_present(self): params = Parameters.from_mapping({ "list": [1, 2, 3, ["a", "b", "c"]], "boolean": True, "float": 0.5, "integer": 42, "negative_int": -5, "namespace": { "fred": "meep" }, "string": "foo", }) assert params.optional_arbitrary_list("list") == [ 1, 2, 3, ["a", "b", "c"] ] assert params.optional_boolean("boolean") assert params.optional_floating_point("float") == 0.5 assert params.optional_integer("integer") == 42 assert params.optional_positive_integer("integer") == 42 with self.assertRaises(ParameterError): params.optional_positive_integer("negative_int") assert params.optional_namespace("namespace").as_nested_dicts() == { "fred": "meep" } assert params.optional_string("string") == "foo"
def split_key_value_store( input_store: KeyValueStore, *, num_parts: int, random_seed: Optional[int] = None) -> Tuple[KeyValueStore]: """ Splits *input_store* into *num_parts* pieces of nearly equal size. Some of the resulting key-value stores may be empty. """ if num_parts <= 0: raise RuntimeError("Number of parts must be positive") split_locator = input_store.locator / "split" split_output_dir = directory_for(split_locator) param_args = { "input": input_store.input_parameters(), "num_slices": num_parts, "output_dir": split_output_dir, } if random_seed: param_args["random_seed"] = random_seed split_job = run_python_on_parameters( split_locator, split_entry_point, Parameters.from_mapping(param_args), depends_on=input_store, ) return tuple( ZipKeyValueStore( path=split_output_dir / f"{slice_index}.zip", depends_on=split_job, locator=split_locator / str(slice_index), ) for slice_index in range(num_parts))
def downsample(input_store: KeyValueStore, *, limit: int, output_locator: Optional[Locator] = None) -> KeyValueStore: """ Convince function to run `vistautils.scripts.downsample_key_value_store` as a Pegasus Job """ if not output_locator: output_locator = input_store.locator / f"downsampled-{limit}" output_zip_path = directory_for(output_locator) / "downsampled.zip" downsample_job = run_python_on_parameters( output_locator, downsample_key_value_store, Parameters.from_mapping({ "input": input_store.input_parameters(), "output_zip_path": output_zip_path, "num_to_sample": limit, "random_seed": 0, }), depends_on=input_store, ) return ZipKeyValueStore( path=output_zip_path, locator=output_locator, depends_on=[input_store.depends_on, downsample_job], )
def test_dax_with_categories(tmp_path): workflow_params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "partition": "gaia", "home_dir": str(tmp_path), }) initialize_vista_pegasus_wrapper(workflow_params) multiply_job_name = Locator(_parse_parts("jobs/multiply")) multiply_output_file = tmp_path / "multiplied_nums.txt" multiply_input_file = tmp_path / "raw_nums.txt" multiply_params = Parameters.from_mapping({ "input_file": multiply_input_file, "output_file": multiply_output_file, "x": 4 }) multiply_job_category = "arithmetic" run_python_on_parameters( multiply_job_name, multiply_by_x_main, multiply_params, depends_on=[], category=multiply_job_category, ) # Check that the multiply job has the appropriate category set in the DAX file dax_file = write_workflow_description() assert dax_file.exists() assert _job_in_dax_has_category(dax_file, multiply_job_name, multiply_job_category) assert not _job_in_dax_has_category(dax_file, multiply_job_name, "an-arbitrary-category")
def test_not_clearing_ckpts(monkeypatch, tmp_path): workflow_params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "partition": "scavenge", "home_dir": str(tmp_path), }) initialize_vista_pegasus_wrapper(workflow_params) multiply_job_name = Locator(_parse_parts("jobs/multiply")) multiply_output_file = tmp_path / "multiplied_nums.txt" multiply_input_file = tmp_path / "raw_nums.txt" multiply_params = Parameters.from_mapping({ "input_file": multiply_input_file, "output_file": multiply_output_file, "x": 4 }) multiple_dir = directory_for(multiply_job_name) checkpointed_multiply_file = multiple_dir / "___ckpt" checkpointed_multiply_file.touch() multiply_output_file.touch() run_python_on_parameters(multiply_job_name, multiply_by_x_main, multiply_params, depends_on=[]) monkeypatch.setattr("builtins.input", lambda _: "n") write_workflow_description() assert checkpointed_multiply_file.exists()
def test_relative_path_from_yaml_list(): params = Parameters.from_mapping( {"file_list": ["fred/bob.txt", "foo.txt"]}) assert list( params.path_list_from_file( "file_list", resolve_relative_to=Path("/hello/world"))) == [ Path("/hello/world/fred/bob.txt"), Path("/hello/world/foo.txt") ]
def test_relative_path_list(tmp_path): file_list = tmp_path / "list.txt" CharSink.to_file(file_list).write("\n".join(["fred/bob.txt", "foo.txt"])) params = Parameters.from_mapping({"file_list": str(file_list)}) assert list( params.path_list_from_file( "file_list", resolve_relative_to=Path("/hello/world"))) == [ Path("/hello/world/fred/bob.txt"), Path("/hello/world/foo.txt") ]
def test_environmental_variable_interpolation(self): loader = YAMLParametersLoader() os.environ["___TEST_PARAMETERS___"] = "foo" os.environ["___TEST_CLASHING_PARAM___"] = "bar" loaded_params = loader.load_string(ENV_VAR_INTERPOLATION_INPUT) reference_params = Parameters.from_mapping( yaml.safe_load(ENV_VAR_INTERPOLATION_REFERENCE)) self.assertEqual(reference_params, loaded_params)
def test_relative_path_map(tmp_path): file_map = tmp_path / "map.txt" CharSink.to_file(file_map).write("\n".join( ["one\tfred/bob.txt", "two\tfoo.txt"])) params = Parameters.from_mapping({"file_map": str(file_map)}) assert dict( params.path_map_from_file( "file_map", resolve_relative_to=Path("/hello/world"))) == { "one": Path("/hello/world/fred/bob.txt"), "two": Path("/hello/world/foo.txt") }
def test_sub_namespaces(): foo_params = Parameters.from_mapping({"foo": "boo"}) bar_params = Parameters.from_mapping({"bar": "far"}) result_none = foo_params.sub_namespaces() expected_none = immutableset() assert result_none == expected_none one_params = Parameters.from_mapping({"one_foo": foo_params}) result_one = one_params.sub_namespaces() expected_one = immutableset([foo_params]) assert result_one == expected_one deep_params = Parameters.from_mapping({ "bariest": bar_params, "one": one_params }) result_deep = deep_params.sub_namespaces() expected_deep = immutableset([bar_params, one_params]) assert result_deep == expected_deep
def test_writing_to_yaml(self): params = Parameters.from_mapping({ "hello": "world", "moo": { "nested_dict": { "lalala": "fooo", "meep": 2, "list": [1, 2, 3] } }, "some_path": Path("/hello/world"), "path_list": [Path("/meep/lalala"), Path("/moo/cow")], }) string_buffer = CharSink.to_string() YAMLParametersWriter().write(params, string_buffer) self.assertEqual(TestParameters.WRITING_REFERENCE, string_buffer.last_string_written) with self.assertRaisesRegex( RuntimeError, "bytes and bytearrays are not legal parameter values"): YAMLParametersWriter().write( Parameters.from_mapping({"illegal": b"bytes"}), CharSink.to_nowhere()) with self.assertRaisesRegex( RuntimeError, "bytes and bytearrays are not legal parameter values"): YAMLParametersWriter().write( Parameters.from_mapping({"illegal": bytearray()}), CharSink.to_nowhere()) with self.assertRaisesRegex( RuntimeError, "Don't know how to serialize out .* as a parameter value"): YAMLParametersWriter().write( Parameters.from_mapping({"illegal": Parameters}), CharSink.to_nowhere())
def test_integer(self): params = Parameters.from_mapping({"test_int": 5}) self.assertEqual(5, params.integer("test_int")) self.assertEqual(2, params.integer("not_appearing", default=2)) with self.assertRaisesRegex(ParameterError, "Invalid value for integer parameter"): params.integer("test_int", valid_range=Range.closed(1, 3)) with self.assertRaisesRegex(ParameterError, "Invalid value for integer parameter"): params.integer("not_appearing", default=2, valid_range=Range.closed(10, 20))
def test_split_key_value_store_explicit_split_non_exhaustive_allowed( tmp_path: Path): output = tmp_path / "foo" keys = tmp_path / "foo_keys" with keys.open("w") as kf: kf.write("key1\nkey2") foo_params = Parameters.from_mapping({ "output_file": str(output), "keys_file": str(keys) }) key_value_path = tmp_path / "key_value" with KeyValueSink.zip_character_sink(key_value_path) as sink: sink.put("key1", "value1") sink.put("key2", "value2") sink.put("key3", "value3") sink.put("key4", "value4") input_params = Parameters.from_mapping({ "type": "zip", "path": str(key_value_path) }) final_params = Parameters.from_mapping({ "input": input_params, "explicit_split": Parameters.from_mapping({"foo": foo_params}), "must_be_exhaustive": False, }) split_key_value_store.main(final_params) reference = {"key1": "value1", "key2": "value2"} with KeyValueSource.zip_character_source(output) as source: assert set(source.keys()) == set(reference.keys()) for key, reference_value in reference.items(): assert source[key] == reference_value
def test_pickled_object_from_file(self): temp_dir = Path(tempfile.mkdtemp()).absolute() pickled_obj_file = temp_dir / "pickle" obj = {"foo": "bar", "thing": "amabob"} with pickled_obj_file.open("wb") as bf: pickle.dump(obj, bf) params = Parameters.from_mapping( {"pickled_obj_file": str(pickled_obj_file.absolute())}) # noinspection PyTypeChecker self.assertEqual(obj, params.pickled_object_from_file("pickled_obj_file"))
def test_float(self): params = Parameters.from_mapping({"test_float": 5.5}) self.assertEqual(5.5, params.floating_point("test_float")) self.assertEqual( 5.5, params.floating_point("test_float", valid_range=Range.open(5, 6))) with self.assertRaisesRegex( ParameterError, "For parameter test_float, expected a float in the range \\(0.0..1.0\\) but got 5.5", ): params.floating_point("test_float", valid_range=Range.open(0.0, 1.0))
def test_namespace_prefix(self): assert Parameters.from_mapping({ "hello": { "world": { "foo": "bar" } } }).namespace("hello").namespace("world").namespace_prefix == ("hello", "world") assert Parameters.empty( namespace_prefix=("foo", )).namespace_prefix == ("foo", ) # test it works even for empty parameters assert Parameters.empty().namespace_or_empty("foo").namespace_or_empty( "bar").namespace_prefix == ("foo", "bar")
def test_split_key_value_store_explicit_split_non_exhaustive_disallowed( tmp_path: Path): output = tmp_path / "foo" keys = tmp_path / "foo_keys" with keys.open("w") as kf: kf.write("key1\nkey2") foo_params = Parameters.from_mapping({ "output_file": str(output), "keys_file": str(keys) }) key_value_path = tmp_path / "key_value" with KeyValueSink.zip_character_sink(key_value_path) as sink: sink.put("key1", "value1") sink.put("key2", "value2") sink.put("key3", "value3") sink.put("key4", "value4") input_params = Parameters.from_mapping({ "type": "zip", "path": str(key_value_path) }) final_params = Parameters.from_mapping({ "input": input_params, "explicit_split": Parameters.from_mapping({"foo": foo_params}), }) with raises( RuntimeError, match= ("Expected the split to be a partition, but .* were not included in any output split, " "including .*. If you did not intend the split to be exhaustive, please specify set " "parameter must_be_exhaustive to False"), ): split_key_value_store.main(final_params)
def test_split_key_value_store_even_split_random_seed(tmp_path: Path): output = tmp_path / "foo" key_value_path = tmp_path / "key_value" with KeyValueSink.zip_character_sink(key_value_path) as sink: sink.put("key1", "value1") sink.put("key2", "value2") sink.put("key3", "value3") sink.put("key4", "value4") input_params = Parameters.from_mapping({ "type": "zip", "path": str(key_value_path) }) final_params = Parameters.from_mapping({ "input": input_params, "num_slices": 2, "random_seed": 2, # deterministic seed "output_dir": str(output), }) split_key_value_store.main(final_params) zip1 = {"key1": "value1", "key3": "value3"} zip0 = {"key2": "value2", "key4": "value4"} for handle in output.glob("*.zip"): with KeyValueSource.zip_character_source(handle) as source: print(handle.stem, source.keys()) assert len(source.keys()) == 2 # num_slices / total keys if handle.stem == 1: for key, reference_value in zip1.items(): assert source[key] == reference_value elif handle.stem == 0: for key, reference_value in zip0.items(): assert source[key] == reference_value
def test_namespaced_items(): params = Parameters.from_mapping({ "hello": "world", "foo": { "bar": "meep", "inner": { "not_a_string": 42 } } }) assert set(params.namespaced_items()) == { ("hello", "world"), ("foo.bar", "meep"), ("foo.inner.not_a_string", 42), }
def test_downsample_key_value_store(tmp_path: Path): key_value_path = tmp_path / "key_value.zip" with KeyValueSink.zip_character_sink(key_value_path) as sink: sink["key1"] = "value1" sink["key2"] = "value2" sink["key3"] = "value3" sink["key4"] = "value4" input_params = Parameters.from_mapping({ "type": "zip", "path": str(key_value_path) }) output = tmp_path / "output.zip" main_params = Parameters.from_mapping({ "input": input_params, "num_to_sample": 2, "output_zip_path": str(output) }) downsample_key_value_store.main(main_params) reference = [ ("key1", "value1"), ("key2", "value2"), ("key3", "value3"), ("key4", "value4"), ] with KeyValueSource.zip_character_source(output) as source: contents = source.items() assert next(contents) in reference assert next(contents) in reference with raises(StopIteration): next(contents)
def test_enum_members(): class TestEnum(Enum): boo = 1 far = 2 bat = 3 params = Parameters.from_mapping({"member": "boo", "invalid": "cat"}) success = params.enum("member", TestEnum) assert success == TestEnum.boo default = params.enum("not_there", TestEnum, default=TestEnum.far) assert default == TestEnum.far with pytest.raises(ParameterError, match="For parameter .*, .* could not be found in .*"): params.enum("invalid", TestEnum)
def test_assert_exactly_one_present(): params = Parameters.from_mapping({"foo": "bar", "moo": "cow"}) params.assert_exactly_one_present(["foo", "foo2"]) with pytest.raises( ParameterError, match="At most one of .* can be specified " "but these were specified: .*", ): params.assert_exactly_one_present(["foo", "moo"]) with pytest.raises( ParameterError, match="Exactly one of the parameters .* " "should be specified, but none were", ): params.assert_exactly_one_present(["not-here"])
def test_string(self): params = Parameters.from_mapping({"hello": "world"}) self.assertEqual("world", params.string("hello")) self.assertEqual( "world", params.string("hello", valid_options=("world", "Mars"))) with self.assertRaisesRegex( ParameterError, "Parameter foo not found. In in root context available parameters " "are \\['hello'\\], available namespaces are \\[\\]", ): params.string("foo") with self.assertRaisesRegex( ParameterError, "The value world for the parameter hello is not one of the " "valid options \\('Earth', 'Mars'\\)", ): params.string("hello", valid_options=("Earth", "Mars"))
def join_to_key_value_zip(key_value_zips_to_join: Iterable[ZipKeyValueStore], *, output_locator: Locator) -> ZipKeyValueStore: key_value_zips_to_join = tuple(key_value_zips_to_join) output_zip_path = directory_for(output_locator) / "joined.zip" join_job = run_python_on_parameters( output_locator, join_key_value_stores, Parameters.from_mapping({ "input_store_list_file": [p.path for p in key_value_zips_to_join], "output": { "type": "zip", "path": output_zip_path }, }), depends_on=key_value_zips_to_join, ) return ZipKeyValueStore(path=output_zip_path, locator=output_locator, depends_on=join_job)
def test_composed_key_value_transform(tmp_path): kvs = {"doc1": 5, "doc2": 10} def add1(values, **kwargs): # pylint:disable=unused-argument return {key: val + 1 for key, val in values.items()} def subtract2(values, **kwargs): # pylint:disable=unused-argument return {key: val - 2 for key, val in values.items()} composed_transforms = compose_key_value_store_transforms( transforms=[add1, subtract2]) params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "partition": "gaia", "home_dir": str(tmp_path), }) initialize_vista_pegasus_wrapper(params) transformed_kvs = transform_key_value_store(kvs, composed_transforms, output_locator=Locator([]), parallelism=1) expected_kvs = {"doc1": 4, "doc2": 9} assert expected_kvs == transformed_kvs