def test_outcome(): projectroot = Path(__file__).parent / "test_project" projectpath = str(projectroot.absolute()) if str(projectpath) not in sys.path: sys.path.insert(0, projectpath) # Clear the runtime directory and cd into it clean_project(projectroot) os.makedirs(projectroot / "data", exist_ok=True) os.chdir(projectroot) # Define a task which takes different outcomes from tasks import Polar task_succeed = Polar(x=1, y=0, reason="pytest") task_undefined = Polar(x=0, y=0, reason="pytest") task_succeed.run() task_undefined.run() from smttask.view import RecordStoreView RecordStoreView.default_project_dir = projectpath recordlist = RecordStoreView().list # Most recent records come first assert "undefined" in recordlist[0].outcome assert "undefined" not in recordlist[1].outcome
def test_unpure_digest(): # Clear the runtime directory and cd into it clean_project(projectroot) os.makedirs(datadir, exist_ok=True) os.chdir(projectroot) # Create some dummy files for the task to find for c in "abc": with open(datadir / f"{c}.dat", 'w') as f: f.write(c * 10) @UnpureMemoizedTask def ListDir(root: str) -> List[str]: return sorted(os.listdir(root)) # Create three tasks pointing the the same directory but with different # str arguments (otherwise smttask recognizes that they are the identical, # and creates only one Task) task1 = ListDir(root=str(datadir)) task2 = ListDir(root=str(datadir / '..') + '/' + '/'.join(datadir.parts[-1:])) task3 = ListDir(root=str(datadir / '../..') + '/' + '/'.join(datadir.parts[-2:])) assert task1.run() == [f"{c}.dat" for c in "abc"] assert task1.digest == task2.digest # At this point task3.digest is still undetermined because it has not been, # run, but task1 and task2 have ran and fixed their digest # Add some dummy files – this should change the output of the # UnpureTask, and therefore its digest for c in "de": with open(datadir / f"{c}.dat", 'w') as f: f.write(c * 10) assert task1.run() == [f"{c}.dat" for c in "abc"] # Result unchanged assert task2.run() == [f"{c}.dat" for c in "abc"] # Result unchanged assert task3.run() == [f"{c}.dat" for c in "abcde"] # Reflects updated data # Tasks created before the new files locked in the old list in their digest assert task1.digest != task3.digest assert task1.digest == task2.digest # Still the same # Forcing a task to recompute will update its digest with pytest.warns(UserWarning): task2.run(recompute=True) # User warning that digest has changed assert task2.digest == task3.digest
def wip_test_pydantic_input(): """ A failing test for Issue#2 : "When serializing a Pydantic model, use its own json encoders." """ projectroot = Path(__file__).parent / "test_project" projectpath = str(projectroot.absolute()) if str(projectpath) not in sys.path: sys.path.insert(0, projectpath) # Clear the runtime directory and cd into it clean_project(projectroot) os.makedirs(projectroot / "data", exist_ok=True) os.chdir(projectroot) # Define some dummy tasks from tasks import PydanticCounter, CountingWithPydanticObject py_count = PydanticCounter(counter=3) task = CountingWithPydanticObject(n=10, pobj=py_count) task2 = smttask.Task.from_desc(task.desc.json()) assert task2.run() == 13
def test_create_task(caplog): projectroot = Path(__file__).parent / "test_project" projectpath = str(projectroot.absolute()) if str(projectpath) not in sys.path: sys.path.insert(0, projectpath) # Clear the runtime directory and cd into it clean_project(projectroot) os.makedirs(projectroot / "data", exist_ok=True) os.chdir(projectroot) from smttask import Create, Task, NotComputed, config from data_types import Point from mackelab_toolbox.typing import safe_packages safe_packages.add("data_types") # Define some dummy tasks # Note that we can create `Create` tasks directly in the run file tasks = [Create(Point)(x=i * 0.3, y=1 - i * 0.3) for i in range(3)] task_digests = ['ed85744d5a', '127d88b74d', '97fa31904f'] # Delete any leftover cache for task in tasks: task.clear() # Run the tasks with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name): caplog.clear() for i, (task, digest) in enumerate(zip(tasks, task_digests)): point = task.run( cache=False) # cache=False to test reloading from disk below assert task.hashed_digest == digest assert task.unhashed_digests == {} assert task.digest == digest assert caplog.records[0].msg == "Running task in memory." assert point.x == i * 0.3 assert point.y == 1 - i * 0.3 assert task._run_result is NotComputed # Not cached # Run tasks again with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name): caplog.clear() for i, task in enumerate(tasks): point = task.run(cache=True) assert caplog.records[0].msg == "Running task in memory." assert len(task._run_result) == 1 assert task._run_result.obj is point # Run tasks a 3rd time # They should be reloaded from memory with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name): caplog.clear() for i, task in enumerate(tasks): point = task.run(cache=True) assert caplog.records[0].msg == "Loading memoized result." assert len(task._run_result) == 1 assert task._run_result.obj is point # Test deserialization new_task = Task.from_desc(task.desc.json()) # Task recognizes that it is being constructed with the same arguments, and simply returns the preexisting instance assert new_task is task
def test_recorded_task(caplog): # OPTIMIZATION/TIMING: Running 3 tasks takes ~30 seconds # (everything else in this test takes < 100ms) projectroot = Path(__file__).parent / "test_project" projectpath = str(projectroot.absolute()) if str(projectpath) not in sys.path: sys.path.insert(0, projectpath) # Clear the runtime directory and cd into it clean_project(projectroot) os.makedirs(projectroot / "data", exist_ok=True) os.chdir(projectroot) # Define some dummy tasks from smttask import Task from tasks import Square_x tasks = [Square_x(x=x, reason="pytest") for x in (1.1, 2.1, 5)] task_digests = ['7ad6c9eb99', '2eb601a664', '1a247b2f98'] # Delete any leftover cache for task in tasks: task.clear() # Run the tasks with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name): caplog.clear() for task in tasks: task.run( cache=False) # cache=False to test reloading from disk below assert caplog.records[ 0].msg == "No previously saved result was found; running task." # Assert that the outputs were produced at the expected locations assert set(os.listdir(projectroot / "data")) == set( ["run_dump", "Square_x"]) for task, digest in zip(tasks, task_digests): assert task.hashed_digest == digest assert task.unhashed_digests == {} assert task.digest == digest assert os.path.exists(projectroot / f"data/Square_x/{digest}_.json") assert os.path.islink(projectroot / f"data/Square_x/{digest}_.json") assert os.path.exists(projectroot / f"data/run_dump/Square_x/{digest}_.json") assert os.path.isfile(projectroot / f"data/run_dump/Square_x/{digest}_.json") # Run the tasks again # They should be reloaded from disk with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name): for task in tasks: caplog.clear() task.run(cache=True) # cache=True => now saved in memory assert caplog.records[ 0].msg == "Loading result of previous run from disk." # Run the tasks a 3rd time # They should be reloaded from memory with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name): for task in tasks: caplog.clear() task.run() # cache=False to test assert caplog.records[0].msg == "Loading memoized result." # Assert that status tags are saved # TODO: Test every possible tag value. Will require tasks which fail after each update of `status` project = load_project() for label in project.get_labels(): record = project.get_record(label) assert record.tags == {'_finished_'} # Test deserialization new_task = Task.from_desc(task.desc.json()) # Task recognizes that it is being constructed with the same arguments, and simply returns the preexisting instance assert new_task is task
def test_iterative_task(caplog): projectroot = Path(__file__).parent / "test_project" projectpath = str(projectroot.absolute()) if str(projectpath) not in sys.path: sys.path.insert(0, projectpath) # Clear the runtime directory and cd into it clean_project(projectroot) os.makedirs(projectroot / "data", exist_ok=True) os.chdir(projectroot) # Define some dummy tasks from smttask import Task from tasks import PowSeq tasks = { 1: PowSeq(start_n=1, n=1, a=3, p=3, reason="pytest"), 2: PowSeq(start_n=1, n=2, a=3, p=3, reason="pytest"), 3: PowSeq(start_n=1, n=3, a=3, p=3, reason="pytest"), 4: PowSeq(start_n=1, n=4, a=3, p=3, reason="pytest") } hashed_digest = "b2c7aa835f" # Delete any leftover cache for task in tasks.values(): task.clear() with caplog.at_level(logging.DEBUG, logger=tasks[1].logger.name): caplog.clear() # Compute n=2 from scratch n = 2 result = tasks[n].run(cache=False) assert caplog.records[ 0].msg == "No previously saved result was found; running task." assert result[0] == n assert result[1] == 3**3 for nm in ['a', 'n']: assert os.path.exists( projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json") assert os.path.islink( projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json") assert os.path.exists( projectroot / f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json") assert os.path.isfile( projectroot / f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json") with open(projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_a.json") as f: a = int(f.read()) assert a == 3**3 # Reload n=2 from disk caplog.clear() n = 2 result = tasks[n].run(cache=False) assert caplog.records[ 0].msg == "Found output from a previous run matching these parameters." assert caplog.records[ 1].msg == "Loading result of previous run from disk." # Compute n=4, starting from n=2 reloaded from disk caplog.clear() n = 4 result = tasks[n].run(cache=False) assert caplog.records[ 0].msg == "Found output from a previous run matching these parameters but with only 2 iterations." assert caplog.records[ 1].msg == "Loading result of previous run from disk." assert caplog.records[ 2].msg == "Continuing from a previous partial result." assert result[0] == n assert result[1] == ((3**3)**3)**3 for nm in ['a', 'n']: assert os.path.exists( projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json") assert os.path.islink( projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json") assert os.path.exists( projectroot / f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json") assert os.path.isfile( projectroot / f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json") with open(projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_a.json") as f: a = int(f.read()) assert a == ((3**3)**3)**3 # Reload n=4 from disk caplog.clear() n = 4 result = tasks[n].run(cache=False) assert caplog.records[ 0].msg == "Found output from a previous run matching these parameters." assert caplog.records[ 1].msg == "Loading result of previous run from disk." # Compute n=1 from scratch caplog.clear() n = 1 result = tasks[n].run(cache=False) assert caplog.records[ 0].msg == "No previously saved result was found; running task." assert result[0] == n assert result[1] == 3 for nm in ['a', 'n']: assert os.path.exists( projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json") assert os.path.islink( projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json") assert os.path.exists( projectroot / f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json") assert os.path.isfile( projectroot / f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json") with open(projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_a.json") as f: a = int(f.read()) assert a == 3 # Compute n=3, starting from n=2 reloaded from disk caplog.clear() n = 3 result = tasks[n].run(cache=False) assert caplog.records[ 0].msg == "Found output from a previous run matching these parameters but with only 2 iterations." assert caplog.records[ 1].msg == "Loading result of previous run from disk." assert caplog.records[ 2].msg == "Continuing from a previous partial result." assert result[0] == n assert result[1] == (3**3)**3 for nm in ['a', 'n']: assert os.path.exists( projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json") assert os.path.islink( projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json") assert os.path.exists( projectroot / f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json") assert os.path.isfile( projectroot / f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json") with open(projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_a.json") as f: a = int(f.read()) assert a == (3**3)**3 # Test deserialization new_task = Task.from_desc(task.desc.json()) # Task recognizes that it is being constructed with the same arguments, and simply returns the preexisting instance assert new_task is task
def test_multiple_output_task(caplog): projectroot = Path(__file__).parent / "test_project" projectpath = str(projectroot.absolute()) if str(projectpath) not in sys.path: sys.path.insert(0, projectpath) # Clear the runtime directory and cd into it clean_project(projectroot) os.makedirs(projectroot / "data", exist_ok=True) os.chdir(projectroot) # Define some dummy tasks from tasks import SquareAndCube_x tasks = [ SquareAndCube_x(reason="pytest", x=x, pmax=5) for x in (1.1, 2.1, 5) ] task_digests = ['860feb44ee', '4b754dd53d', 'fcde864238'] # Delete any leftover cache for task in tasks: task.clear() # Run the tasks with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name): for task in tasks: caplog.clear() result = task.run( cache=False) # cache=False to test reloading from disk below assert caplog.records[ 0].msg == "No previously saved result was found; running task." x = 5. assert result == (x**2, x**3, (x**4, x**5)) assert isinstance(result[2], tuple) # Assert that the outputs were produced at the expected locations assert set(os.listdir(projectroot / "data")) == set( ["run_dump", "SquareAndCube_x"]) for task, digest in zip(tasks, task_digests): assert task.hashed_digest == digest assert task.unhashed_digests == {} assert task.digest == digest assert os.path.exists(projectroot / f"data/SquareAndCube_x/{digest}_sqr.json") assert os.path.islink(projectroot / f"data/SquareAndCube_x/{digest}_sqr.json") assert os.path.exists(projectroot / f"data/SquareAndCube_x/{digest}_cube.json") assert os.path.islink(projectroot / f"data/SquareAndCube_x/{digest}_cube.json") assert os.path.exists(projectroot / f"data/SquareAndCube_x/{digest}_4.json") assert os.path.islink(projectroot / f"data/SquareAndCube_x/{digest}_4.json") assert os.path.exists(projectroot / f"data/SquareAndCube_x/{digest}_5.json") assert os.path.islink(projectroot / f"data/SquareAndCube_x/{digest}_5.json") assert os.path.exists( projectroot / f"data/run_dump/SquareAndCube_x/{digest}_sqr.json") assert os.path.isfile( projectroot / f"data/run_dump/SquareAndCube_x/{digest}_sqr.json") assert os.path.exists( projectroot / f"data/run_dump/SquareAndCube_x/{digest}_cube.json") assert os.path.isfile( projectroot / f"data/run_dump/SquareAndCube_x/{digest}_cube.json") assert os.path.exists(projectroot / f"data/run_dump/SquareAndCube_x/{digest}_4.json") assert os.path.exists(projectroot / f"data/run_dump/SquareAndCube_x/{digest}_4.json") assert os.path.isfile(projectroot / f"data/run_dump/SquareAndCube_x/{digest}_5.json") assert os.path.isfile(projectroot / f"data/run_dump/SquareAndCube_x/{digest}_5.json") # Run the tasks again # They should be reloaded from disk with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name): for task in tasks: caplog.clear() result = task.run(cache=True) # cache=True => now saved in memory assert caplog.records[ 0].msg == "Loading result of previous run from disk." assert result == (x**2, x**3, (x**4, x**5)) # Run the tasks a 3rd time # They should be reloaded from memory with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name): for task in tasks: caplog.clear() task.run() # cache=False to test assert caplog.records[0].msg == "Loading memoized result."
def test_ui_run_mp(): # Add test directory to import search path # Use PYTHONPATH environment variable, because that will be seen by # subprocesses spawned by multiprocessing.Pool projectroot = testroot/"test_project" projectpath = str(projectroot.absolute()) if str(projectpath) not in sys.path: # sys.path.insert(0, projectpath) PYTHONPATH = os.getenv("PYTHONPATH", "") if PYTHONPATH: PYTHONPATH = os.pathsep + PYTHONPATH os.environ["PYTHONPATH"] = projectpath + PYTHONPATH # Clear the runtime directory and cd into it clean_project(projectroot) os.makedirs(projectroot/"data", exist_ok=True) os.chdir(projectroot) from tasks import Square_x, Failing, Orbit os.makedirs('tasklist', exist_ok=True) Square_x(x=3, reason="smttask run test").save('tasklist') Square_x(x=4, reason="smttask run test").save('tasklist') # %bash # Running with --no-record does not create a record and does not delete the taskdesc file PYTHONPATH=f"{str(projectpath)}" smttask run tasklist/Square_x__661eaf10bc.taskdesc.json --no-record assert [no saved record] assert os.path.exists(projectpath/"tasklist/Square_x__661eaf10bc.taskdesc.json") # Running with leave creates a record and does not delete the taskdesc file PYTHONPATH=f"{str(projectpath)}" smttask run tasklist/Square_x__661eaf10bc.taskdesc.json --leave assert [new record] assert os.path.exists(projectpath/"tasklist/Square_x__661eaf10bc.taskdesc.json") # Running with defaults creates a record and deletes the taskdesc file PYTHONPATH=f"{str(projectpath)}" smttask run tasklist/Square_x__661eaf10bc.taskdesc.json assert [new record] assert not os.path.exists(projectpath/"tasklist/Square_x__661eaf10bc.taskdesc.json") # Add tasks back Square_x(x=3, reason="smttask run test").save('tasklist') Square_x(x=4, reason="smttask run test").save('tasklist') # Both tasks are executed PYTHONPATH="$HOME/usr/local/python/smttask/tests/test_project" smttask run tasklist/Square_x__*.taskdesc.json -v assert [2 new records] assert not os.path.exists(projectpath/"tasklist/Square_x__661eaf10bc.taskdesc.json") assert not os.path.exists(projectpath/"tasklist/Square_x__e8d9eedb55.taskdesc.json") # Tasks using tqdm work and use SMTTASK_PROCESS_NUM to distinguish progress bars # => values around n=30000000 take about 15s to run, appropriate for testing KeyboardInterrupt Orbit(start_n=0, n=30000213, x=1.2, y=2, reason="smttask run test (tqdm)").save('tasklist') Orbit(start_n=0, n=30000215, x=3.3, y=40.1, reason="smttask run test (tqdm)").save('tasklist') Orbit(start_n=0, n=30000217, x=3.3, y=40.1, reason="smttask run test (tqdm)").save('tasklist') Orbit(start_n=0, n=30000219, x=3.3, y=40.1, reason="smttask run test (tqdm)").save('tasklist') PYTHONPATH="$HOME/usr/local/python/smttask/tests/test_project" smttask run tasklist/Orbit__*.taskdesc.json -v --cores 2 assert [2 new records] # Failures Failing(x=3, reason="smttask run test (failure)").save('tasklist') from tqdm import tqdm a = 3 for n in tqdm(range(3, 20)): a = a**2 from time import sleep
def test_pure_functions(): # Add test directory to import search path projectroot = testroot / "test_project" projectpath = str(projectroot.absolute()) if str(projectpath) not in sys.path: sys.path.insert(0, projectpath) # Clear the runtime directory and cd into it clean_project(projectroot) os.makedirs(projectroot / "data", exist_ok=True) os.chdir(projectroot) from tasks import AddPureFunctions def f1(x): return x + 1 def f2(p): return 1.5**p def g1(x, a): return x + a def g2(x, p): return x**p # Test Function arithmetic (see CompositePureFunction) xlst = (-1.2, 0.5, 3) pure_f1 = PureFunction(f1) pure_f2 = PureFunction(f2) with pytest.raises(TypeError): # Fails because f2 is not pure h = pure_f1 + f2 # Test all ops, include reversed versions h = pure_f1 + pure_f2 assert [h(x) for x in xlst] == [f1(x) + f2(x) for x in xlst] h = pure_f1 + 5 assert [h(x) for x in xlst] == [f1(x) + 5 for x in xlst] h = 9.2 + pure_f1 assert [h(x) for x in xlst] == [9.2 + f1(x) for x in xlst] h = pure_f1 - pure_f2 assert [h(x) for x in xlst] == [f1(x) - f2(x) for x in xlst] h = pure_f1 - 5 assert [h(x) for x in xlst] == [f1(x) - 5 for x in xlst] h = 9.2 - pure_f1 assert [h(x) for x in xlst] == [9.2 - f1(x) for x in xlst] h = pure_f1 * pure_f2 assert [h(x) for x in xlst] == [f1(x) * f2(x) for x in xlst] h = pure_f1 * 5 assert [h(x) for x in xlst] == [f1(x) * 5 for x in xlst] h = 9.2 * pure_f1 assert [h(x) for x in xlst] == [9.2 * f1(x) for x in xlst] h = pure_f1 / pure_f2 assert [h(x) for x in xlst] == [f1(x) / f2(x) for x in xlst] h = pure_f1 / 5 assert [h(x) for x in xlst] == [f1(x) / 5 for x in xlst] h = 9.2 / pure_f1 assert [h(x) for x in xlst] == [9.2 / f1(x) for x in xlst] task1 = AddPureFunctions(f1=f1, f2=f2, g1=functools.partial(g1, a=1), g2=functools.partial(g2, x=1.5), f3=h) assert task1.digest == 'b3f5fddcf8' assert task1.desc.json( ) == '{"taskname": "AddPureFunctions", "module": "tasks", "inputs": {"digest": "b3f5fddcf8", "hashed_digest": "b3f5fddcf8", "unhashed_digests": {}, "f1": "def f1(x):\\n return (x + 1)", "f2": "def f2(p):\\n return (1.5 ** p)", "g1": ["PartialPureFunction", "def g1(x, a):\\n return (x + a)", {"a": 1}], "g2": ["PartialPureFunction", "def g2(x, p):\\n return (x ** p)", {"x": 1.5}], "f3": ["CompositePureFunction", "truediv", [9.2, "def f1(x):\\n return (x + 1)"]]}, "reason": null}' task1.run() # Check that serialize->deserialize works from mackelab_toolbox.serialize import config as serialize_config serialize_config.trust_all_inputs = True task2 = smttask.Task.from_desc(task1.desc.json()) assert task1.run()(0.5, 2) == task2.run()(0.5, 2) output = task1.Outputs.parse_result(task1.run(), _task=task1) assert output.json( ) == '{"": "def h(x, p):\\n return ((((f1(x) + f2(p)) + g1(x)) + g2(p=p)) + f3(x))"}'
def test_rebuild_input_datastore(): # TODO: Test that more recent tasks overwrite older ones # TODO: Explicitely test behaviour when record output no longer exists (e.g. was deleted) from smttask import config from smttask.view import RecordStoreView from smttask.utils import compute_input_symlinks import shutil projectroot = Path(__file__).parent / "test_project" projectpath = str(projectroot.absolute()) if str(projectpath) not in sys.path: sys.path.insert(0, projectpath) # Clear the runtime directory and cd into it clean_project(projectroot) os.makedirs(projectroot / "data", exist_ok=True) os.chdir(projectroot) from tasks import Square_x, SquareAndCube_x # Create and run some tasks. Include tasks with both single & multiple outputs tasks = [Square_x(x=x, reason="pytest") for x in (1.1, 2.1, 5)] tasks += [ SquareAndCube_x(x=x, pmax=5, reason="pytest") for x in (1.1, 2.1, 5) ] for task in tasks: task.run() # Assert that the outputs were produced at the expected locations outroot = Path(config.project.data_store.root) inroot = Path(config.project.input_datastore.root) for task in tasks: for relpath in task.outputpaths.values(): assert (outroot / relpath).exists() assert (outroot / relpath).is_file() assert not (outroot / relpath).is_symlink() assert (inroot / relpath).exists() assert (inroot / relpath).is_symlink() assert (outroot / relpath).resolve() == (inroot / relpath).resolve() # Delete the input data store, but leave the output data store intact for path in inroot.iterdir(): # NOTE: With Python 3.9+, we could use Path.is_relative_to # This would avoid this clunky pattern where raising an exception # is the normal code path try: outroot.relative_to(path) except ValueError: # path not part of path to outroot - delete if path.is_dir(): shutil.rmtree(path) else: path.unlink() # Assert that the links in the input datastore no longer exist for task in tasks: for relpath in task.outputpaths.values(): assert (outroot / relpath).exists() assert (outroot / relpath).is_file() assert not (inroot / relpath).exists() # Rebuild the input data store recordlist = RecordStoreView() recordlist.rebuild_input_datastore(compute_input_symlinks) # Assert that the correct links were added back to the input data store outroot = Path(config.project.data_store.root) inroot = Path(config.project.input_datastore.root) for task in tasks: for relpath in task.outputpaths.values(): assert (outroot / relpath).exists() assert (outroot / relpath).is_file() assert (inroot / relpath).exists() assert (inroot / relpath).is_symlink() assert (outroot / relpath).resolve() == (inroot / relpath).resolve()