コード例 #1
0
ファイル: test_metadata.py プロジェクト: alcrene/smttask
def test_outcome():

    projectroot = Path(__file__).parent / "test_project"
    projectpath = str(projectroot.absolute())
    if str(projectpath) not in sys.path:
        sys.path.insert(0, projectpath)

    # Clear the runtime directory and cd into it
    clean_project(projectroot)
    os.makedirs(projectroot / "data", exist_ok=True)
    os.chdir(projectroot)

    # Define a task which takes different outcomes
    from tasks import Polar
    task_succeed = Polar(x=1, y=0, reason="pytest")
    task_undefined = Polar(x=0, y=0, reason="pytest")

    task_succeed.run()
    task_undefined.run()

    from smttask.view import RecordStoreView
    RecordStoreView.default_project_dir = projectpath
    recordlist = RecordStoreView().list
    # Most recent records come first
    assert "undefined" in recordlist[0].outcome
    assert "undefined" not in recordlist[1].outcome
コード例 #2
0
def test_unpure_digest():
    # Clear the runtime directory and cd into it
    clean_project(projectroot)
    os.makedirs(datadir, exist_ok=True)
    os.chdir(projectroot)

    # Create some dummy files for the task to find
    for c in "abc":
        with open(datadir / f"{c}.dat", 'w') as f:
            f.write(c * 10)

    @UnpureMemoizedTask
    def ListDir(root: str) -> List[str]:
        return sorted(os.listdir(root))

    # Create three tasks pointing the the same directory but with different
    # str arguments (otherwise smttask recognizes that they are the identical,
    # and creates only one Task)
    task1 = ListDir(root=str(datadir))
    task2 = ListDir(root=str(datadir / '..') + '/' +
                    '/'.join(datadir.parts[-1:]))
    task3 = ListDir(root=str(datadir / '../..') + '/' +
                    '/'.join(datadir.parts[-2:]))
    assert task1.run() == [f"{c}.dat" for c in "abc"]
    assert task1.digest == task2.digest
    # At this point task3.digest is still undetermined because it has not been,
    # run, but task1 and task2 have ran and fixed their digest

    # Add some dummy files – this should change the output of the
    # UnpureTask, and therefore its digest
    for c in "de":
        with open(datadir / f"{c}.dat", 'w') as f:
            f.write(c * 10)

    assert task1.run() == [f"{c}.dat" for c in "abc"]  # Result unchanged
    assert task2.run() == [f"{c}.dat" for c in "abc"]  # Result unchanged
    assert task3.run() == [f"{c}.dat"
                           for c in "abcde"]  # Reflects updated data

    # Tasks created before the new files locked in the old list in their digest
    assert task1.digest != task3.digest
    assert task1.digest == task2.digest  # Still the same

    # Forcing a task to recompute will update its digest
    with pytest.warns(UserWarning):
        task2.run(recompute=True)  # User warning that digest has changed
    assert task2.digest == task3.digest
コード例 #3
0
def wip_test_pydantic_input():
    """
    A failing test for Issue#2 :
    "When serializing a Pydantic model, use its own json encoders."
    """
    projectroot = Path(__file__).parent / "test_project"
    projectpath = str(projectroot.absolute())
    if str(projectpath) not in sys.path:
        sys.path.insert(0, projectpath)

    # Clear the runtime directory and cd into it
    clean_project(projectroot)
    os.makedirs(projectroot / "data", exist_ok=True)
    os.chdir(projectroot)

    # Define some dummy tasks
    from tasks import PydanticCounter, CountingWithPydanticObject
    py_count = PydanticCounter(counter=3)
    task = CountingWithPydanticObject(n=10, pobj=py_count)
    task2 = smttask.Task.from_desc(task.desc.json())
    assert task2.run() == 13
コード例 #4
0
def test_create_task(caplog):

    projectroot = Path(__file__).parent / "test_project"
    projectpath = str(projectroot.absolute())
    if str(projectpath) not in sys.path:
        sys.path.insert(0, projectpath)

    # Clear the runtime directory and cd into it
    clean_project(projectroot)
    os.makedirs(projectroot / "data", exist_ok=True)
    os.chdir(projectroot)

    from smttask import Create, Task, NotComputed, config
    from data_types import Point
    from mackelab_toolbox.typing import safe_packages
    safe_packages.add("data_types")

    # Define some dummy tasks
    # Note that we can create `Create` tasks directly in the run file
    tasks = [Create(Point)(x=i * 0.3, y=1 - i * 0.3) for i in range(3)]
    task_digests = ['ed85744d5a', '127d88b74d', '97fa31904f']

    # Delete any leftover cache
    for task in tasks:
        task.clear()

    # Run the tasks
    with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name):
        caplog.clear()
        for i, (task, digest) in enumerate(zip(tasks, task_digests)):
            point = task.run(
                cache=False)  # cache=False to test reloading from disk below
            assert task.hashed_digest == digest
            assert task.unhashed_digests == {}
            assert task.digest == digest
            assert caplog.records[0].msg == "Running task in memory."
            assert point.x == i * 0.3
            assert point.y == 1 - i * 0.3
            assert task._run_result is NotComputed  # Not cached

    # Run tasks again
    with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name):
        caplog.clear()
        for i, task in enumerate(tasks):
            point = task.run(cache=True)
            assert caplog.records[0].msg == "Running task in memory."
            assert len(task._run_result) == 1
            assert task._run_result.obj is point

    # Run tasks a 3rd time
    # They should be reloaded from memory
    with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name):
        caplog.clear()
        for i, task in enumerate(tasks):
            point = task.run(cache=True)
            assert caplog.records[0].msg == "Loading memoized result."
            assert len(task._run_result) == 1
            assert task._run_result.obj is point

    # Test deserialization
    new_task = Task.from_desc(task.desc.json())
    # Task recognizes that it is being constructed with the same arguments, and simply returns the preexisting instance
    assert new_task is task
コード例 #5
0
def test_recorded_task(caplog):
    # OPTIMIZATION/TIMING: Running 3 tasks takes ~30 seconds
    #   (everything else in this test takes < 100ms)

    projectroot = Path(__file__).parent / "test_project"
    projectpath = str(projectroot.absolute())
    if str(projectpath) not in sys.path:
        sys.path.insert(0, projectpath)

    # Clear the runtime directory and cd into it
    clean_project(projectroot)
    os.makedirs(projectroot / "data", exist_ok=True)
    os.chdir(projectroot)

    # Define some dummy tasks
    from smttask import Task
    from tasks import Square_x
    tasks = [Square_x(x=x, reason="pytest") for x in (1.1, 2.1, 5)]
    task_digests = ['7ad6c9eb99', '2eb601a664', '1a247b2f98']

    # Delete any leftover cache
    for task in tasks:
        task.clear()

    # Run the tasks
    with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name):
        caplog.clear()
        for task in tasks:
            task.run(
                cache=False)  # cache=False to test reloading from disk below
            assert caplog.records[
                0].msg == "No previously saved result was found; running task."

    # Assert that the outputs were produced at the expected locations
    assert set(os.listdir(projectroot / "data")) == set(
        ["run_dump", "Square_x"])
    for task, digest in zip(tasks, task_digests):
        assert task.hashed_digest == digest
        assert task.unhashed_digests == {}
        assert task.digest == digest
        assert os.path.exists(projectroot / f"data/Square_x/{digest}_.json")
        assert os.path.islink(projectroot / f"data/Square_x/{digest}_.json")
        assert os.path.exists(projectroot /
                              f"data/run_dump/Square_x/{digest}_.json")
        assert os.path.isfile(projectroot /
                              f"data/run_dump/Square_x/{digest}_.json")

    # Run the tasks again
    # They should be reloaded from disk
    with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name):
        for task in tasks:
            caplog.clear()
            task.run(cache=True)  # cache=True => now saved in memory
            assert caplog.records[
                0].msg == "Loading result of previous run from disk."

    # Run the tasks a 3rd time
    # They should be reloaded from memory
    with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name):
        for task in tasks:
            caplog.clear()
            task.run()  # cache=False to test
            assert caplog.records[0].msg == "Loading memoized result."

    # Assert that status tags are saved
    # TODO: Test every possible tag value. Will require tasks which fail after each update of `status`
    project = load_project()
    for label in project.get_labels():
        record = project.get_record(label)
        assert record.tags == {'_finished_'}

    # Test deserialization
    new_task = Task.from_desc(task.desc.json())
    # Task recognizes that it is being constructed with the same arguments, and simply returns the preexisting instance
    assert new_task is task
コード例 #6
0
def test_iterative_task(caplog):

    projectroot = Path(__file__).parent / "test_project"
    projectpath = str(projectroot.absolute())
    if str(projectpath) not in sys.path:
        sys.path.insert(0, projectpath)

    # Clear the runtime directory and cd into it
    clean_project(projectroot)
    os.makedirs(projectroot / "data", exist_ok=True)
    os.chdir(projectroot)

    # Define some dummy tasks
    from smttask import Task
    from tasks import PowSeq
    tasks = {
        1: PowSeq(start_n=1, n=1, a=3, p=3, reason="pytest"),
        2: PowSeq(start_n=1, n=2, a=3, p=3, reason="pytest"),
        3: PowSeq(start_n=1, n=3, a=3, p=3, reason="pytest"),
        4: PowSeq(start_n=1, n=4, a=3, p=3, reason="pytest")
    }
    hashed_digest = "b2c7aa835f"

    # Delete any leftover cache
    for task in tasks.values():
        task.clear()

    with caplog.at_level(logging.DEBUG, logger=tasks[1].logger.name):
        caplog.clear()
        # Compute n=2 from scratch
        n = 2
        result = tasks[n].run(cache=False)
        assert caplog.records[
            0].msg == "No previously saved result was found; running task."
        assert result[0] == n
        assert result[1] == 3**3
        for nm in ['a', 'n']:
            assert os.path.exists(
                projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
            assert os.path.islink(
                projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
            assert os.path.exists(
                projectroot /
                f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
            assert os.path.isfile(
                projectroot /
                f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
        with open(projectroot /
                  f"data/PowSeq/{hashed_digest}__n_{n}_a.json") as f:
            a = int(f.read())
        assert a == 3**3

        # Reload n=2 from disk
        caplog.clear()
        n = 2
        result = tasks[n].run(cache=False)
        assert caplog.records[
            0].msg == "Found output from a previous run matching these parameters."
        assert caplog.records[
            1].msg == "Loading result of previous run from disk."

        # Compute n=4, starting from n=2 reloaded from disk
        caplog.clear()
        n = 4
        result = tasks[n].run(cache=False)
        assert caplog.records[
            0].msg == "Found output from a previous run matching these parameters but with only 2 iterations."
        assert caplog.records[
            1].msg == "Loading result of previous run from disk."
        assert caplog.records[
            2].msg == "Continuing from a previous partial result."
        assert result[0] == n
        assert result[1] == ((3**3)**3)**3
        for nm in ['a', 'n']:
            assert os.path.exists(
                projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
            assert os.path.islink(
                projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
            assert os.path.exists(
                projectroot /
                f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
            assert os.path.isfile(
                projectroot /
                f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
        with open(projectroot /
                  f"data/PowSeq/{hashed_digest}__n_{n}_a.json") as f:
            a = int(f.read())
        assert a == ((3**3)**3)**3

        # Reload n=4 from disk
        caplog.clear()
        n = 4
        result = tasks[n].run(cache=False)
        assert caplog.records[
            0].msg == "Found output from a previous run matching these parameters."
        assert caplog.records[
            1].msg == "Loading result of previous run from disk."

        # Compute n=1 from scratch
        caplog.clear()
        n = 1
        result = tasks[n].run(cache=False)
        assert caplog.records[
            0].msg == "No previously saved result was found; running task."
        assert result[0] == n
        assert result[1] == 3
        for nm in ['a', 'n']:
            assert os.path.exists(
                projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
            assert os.path.islink(
                projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
            assert os.path.exists(
                projectroot /
                f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
            assert os.path.isfile(
                projectroot /
                f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
        with open(projectroot /
                  f"data/PowSeq/{hashed_digest}__n_{n}_a.json") as f:
            a = int(f.read())
        assert a == 3

        # Compute n=3, starting from n=2 reloaded from disk
        caplog.clear()
        n = 3
        result = tasks[n].run(cache=False)
        assert caplog.records[
            0].msg == "Found output from a previous run matching these parameters but with only 2 iterations."
        assert caplog.records[
            1].msg == "Loading result of previous run from disk."
        assert caplog.records[
            2].msg == "Continuing from a previous partial result."
        assert result[0] == n
        assert result[1] == (3**3)**3
        for nm in ['a', 'n']:
            assert os.path.exists(
                projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
            assert os.path.islink(
                projectroot / f"data/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
            assert os.path.exists(
                projectroot /
                f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
            assert os.path.isfile(
                projectroot /
                f"data/run_dump/PowSeq/{hashed_digest}__n_{n}_{nm}.json")
        with open(projectroot /
                  f"data/PowSeq/{hashed_digest}__n_{n}_a.json") as f:
            a = int(f.read())
        assert a == (3**3)**3

    # Test deserialization
    new_task = Task.from_desc(task.desc.json())
    # Task recognizes that it is being constructed with the same arguments, and simply returns the preexisting instance
    assert new_task is task
コード例 #7
0
def test_multiple_output_task(caplog):

    projectroot = Path(__file__).parent / "test_project"
    projectpath = str(projectroot.absolute())
    if str(projectpath) not in sys.path:
        sys.path.insert(0, projectpath)

    # Clear the runtime directory and cd into it
    clean_project(projectroot)
    os.makedirs(projectroot / "data", exist_ok=True)
    os.chdir(projectroot)

    # Define some dummy tasks
    from tasks import SquareAndCube_x
    tasks = [
        SquareAndCube_x(reason="pytest", x=x, pmax=5) for x in (1.1, 2.1, 5)
    ]
    task_digests = ['860feb44ee', '4b754dd53d', 'fcde864238']

    # Delete any leftover cache
    for task in tasks:
        task.clear()

    # Run the tasks
    with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name):
        for task in tasks:
            caplog.clear()
            result = task.run(
                cache=False)  # cache=False to test reloading from disk below
            assert caplog.records[
                0].msg == "No previously saved result was found; running task."
    x = 5.
    assert result == (x**2, x**3, (x**4, x**5))
    assert isinstance(result[2], tuple)

    # Assert that the outputs were produced at the expected locations
    assert set(os.listdir(projectroot / "data")) == set(
        ["run_dump", "SquareAndCube_x"])
    for task, digest in zip(tasks, task_digests):
        assert task.hashed_digest == digest
        assert task.unhashed_digests == {}
        assert task.digest == digest
        assert os.path.exists(projectroot /
                              f"data/SquareAndCube_x/{digest}_sqr.json")
        assert os.path.islink(projectroot /
                              f"data/SquareAndCube_x/{digest}_sqr.json")
        assert os.path.exists(projectroot /
                              f"data/SquareAndCube_x/{digest}_cube.json")
        assert os.path.islink(projectroot /
                              f"data/SquareAndCube_x/{digest}_cube.json")
        assert os.path.exists(projectroot /
                              f"data/SquareAndCube_x/{digest}_4.json")
        assert os.path.islink(projectroot /
                              f"data/SquareAndCube_x/{digest}_4.json")
        assert os.path.exists(projectroot /
                              f"data/SquareAndCube_x/{digest}_5.json")
        assert os.path.islink(projectroot /
                              f"data/SquareAndCube_x/{digest}_5.json")
        assert os.path.exists(
            projectroot / f"data/run_dump/SquareAndCube_x/{digest}_sqr.json")
        assert os.path.isfile(
            projectroot / f"data/run_dump/SquareAndCube_x/{digest}_sqr.json")
        assert os.path.exists(
            projectroot / f"data/run_dump/SquareAndCube_x/{digest}_cube.json")
        assert os.path.isfile(
            projectroot / f"data/run_dump/SquareAndCube_x/{digest}_cube.json")
        assert os.path.exists(projectroot /
                              f"data/run_dump/SquareAndCube_x/{digest}_4.json")
        assert os.path.exists(projectroot /
                              f"data/run_dump/SquareAndCube_x/{digest}_4.json")
        assert os.path.isfile(projectroot /
                              f"data/run_dump/SquareAndCube_x/{digest}_5.json")
        assert os.path.isfile(projectroot /
                              f"data/run_dump/SquareAndCube_x/{digest}_5.json")

    # Run the tasks again
    # They should be reloaded from disk
    with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name):
        for task in tasks:
            caplog.clear()
            result = task.run(cache=True)  # cache=True => now saved in memory
            assert caplog.records[
                0].msg == "Loading result of previous run from disk."
    assert result == (x**2, x**3, (x**4, x**5))

    # Run the tasks a 3rd time
    # They should be reloaded from memory
    with caplog.at_level(logging.DEBUG, logger=tasks[0].logger.name):
        for task in tasks:
            caplog.clear()
            task.run()  # cache=False to test
            assert caplog.records[0].msg == "Loading memoized result."
コード例 #8
0
ファイル: wip_test_ui.py プロジェクト: alcrene/smttask
def test_ui_run_mp():
    # Add test directory to import search path
    # Use PYTHONPATH environment variable, because that will be seen by
    # subprocesses spawned by multiprocessing.Pool
    projectroot = testroot/"test_project"
    projectpath = str(projectroot.absolute())
    if str(projectpath) not in sys.path:
        # sys.path.insert(0, projectpath)
        PYTHONPATH = os.getenv("PYTHONPATH", "")
        if PYTHONPATH:
            PYTHONPATH = os.pathsep + PYTHONPATH
        os.environ["PYTHONPATH"] = projectpath + PYTHONPATH
    # Clear the runtime directory and cd into it
    clean_project(projectroot)
    os.makedirs(projectroot/"data", exist_ok=True)
    os.chdir(projectroot)

    from tasks import Square_x, Failing, Orbit
    os.makedirs('tasklist', exist_ok=True)

    Square_x(x=3, reason="smttask run test").save('tasklist')
    Square_x(x=4, reason="smttask run test").save('tasklist')
    # %bash
    # Running with --no-record does not create a record and does not delete the taskdesc file
    PYTHONPATH=f"{str(projectpath)}" smttask run tasklist/Square_x__661eaf10bc.taskdesc.json --no-record
    assert [no saved record]
    assert os.path.exists(projectpath/"tasklist/Square_x__661eaf10bc.taskdesc.json")
    # Running with leave creates a record and does not delete the taskdesc file
    PYTHONPATH=f"{str(projectpath)}" smttask run tasklist/Square_x__661eaf10bc.taskdesc.json --leave
    assert [new record]
    assert os.path.exists(projectpath/"tasklist/Square_x__661eaf10bc.taskdesc.json")
    # Running with defaults creates a record and deletes the taskdesc file
    PYTHONPATH=f"{str(projectpath)}" smttask run tasklist/Square_x__661eaf10bc.taskdesc.json
    assert [new record]
    assert not os.path.exists(projectpath/"tasklist/Square_x__661eaf10bc.taskdesc.json")

    # Add tasks back
    Square_x(x=3, reason="smttask run test").save('tasklist')
    Square_x(x=4, reason="smttask run test").save('tasklist')

    # Both tasks are executed
    PYTHONPATH="$HOME/usr/local/python/smttask/tests/test_project" smttask run tasklist/Square_x__*.taskdesc.json -v
    assert [2 new records]
    assert not os.path.exists(projectpath/"tasklist/Square_x__661eaf10bc.taskdesc.json")
    assert not os.path.exists(projectpath/"tasklist/Square_x__e8d9eedb55.taskdesc.json")

    # Tasks using tqdm work and use SMTTASK_PROCESS_NUM to distinguish progress bars
    # => values around n=30000000 take about 15s to run, appropriate for testing KeyboardInterrupt
    Orbit(start_n=0, n=30000213, x=1.2, y=2, reason="smttask run test (tqdm)").save('tasklist')
    Orbit(start_n=0, n=30000215, x=3.3, y=40.1, reason="smttask run test (tqdm)").save('tasklist')
    Orbit(start_n=0, n=30000217, x=3.3, y=40.1, reason="smttask run test (tqdm)").save('tasklist')
    Orbit(start_n=0, n=30000219, x=3.3, y=40.1, reason="smttask run test (tqdm)").save('tasklist')
    PYTHONPATH="$HOME/usr/local/python/smttask/tests/test_project" smttask run tasklist/Orbit__*.taskdesc.json -v --cores 2
    assert [2 new records]

    # Failures
    Failing(x=3, reason="smttask run test (failure)").save('tasklist')

    from tqdm import tqdm
    a = 3
    for n in tqdm(range(3, 20)):
        a = a**2

    from time import sleep
コード例 #9
0
ファイル: test_typing.py プロジェクト: alcrene/smttask
def test_pure_functions():

    # Add test directory to import search path
    projectroot = testroot / "test_project"
    projectpath = str(projectroot.absolute())
    if str(projectpath) not in sys.path:
        sys.path.insert(0, projectpath)
    # Clear the runtime directory and cd into it
    clean_project(projectroot)
    os.makedirs(projectroot / "data", exist_ok=True)
    os.chdir(projectroot)

    from tasks import AddPureFunctions

    def f1(x):
        return x + 1

    def f2(p):
        return 1.5**p

    def g1(x, a):
        return x + a

    def g2(x, p):
        return x**p

    # Test Function arithmetic (see CompositePureFunction)
    xlst = (-1.2, 0.5, 3)
    pure_f1 = PureFunction(f1)
    pure_f2 = PureFunction(f2)
    with pytest.raises(TypeError):
        # Fails because f2 is not pure
        h = pure_f1 + f2
    # Test all ops, include reversed versions
    h = pure_f1 + pure_f2
    assert [h(x) for x in xlst] == [f1(x) + f2(x) for x in xlst]
    h = pure_f1 + 5
    assert [h(x) for x in xlst] == [f1(x) + 5 for x in xlst]
    h = 9.2 + pure_f1
    assert [h(x) for x in xlst] == [9.2 + f1(x) for x in xlst]
    h = pure_f1 - pure_f2
    assert [h(x) for x in xlst] == [f1(x) - f2(x) for x in xlst]
    h = pure_f1 - 5
    assert [h(x) for x in xlst] == [f1(x) - 5 for x in xlst]
    h = 9.2 - pure_f1
    assert [h(x) for x in xlst] == [9.2 - f1(x) for x in xlst]
    h = pure_f1 * pure_f2
    assert [h(x) for x in xlst] == [f1(x) * f2(x) for x in xlst]
    h = pure_f1 * 5
    assert [h(x) for x in xlst] == [f1(x) * 5 for x in xlst]
    h = 9.2 * pure_f1
    assert [h(x) for x in xlst] == [9.2 * f1(x) for x in xlst]
    h = pure_f1 / pure_f2
    assert [h(x) for x in xlst] == [f1(x) / f2(x) for x in xlst]
    h = pure_f1 / 5
    assert [h(x) for x in xlst] == [f1(x) / 5 for x in xlst]
    h = 9.2 / pure_f1
    assert [h(x) for x in xlst] == [9.2 / f1(x) for x in xlst]

    task1 = AddPureFunctions(f1=f1,
                             f2=f2,
                             g1=functools.partial(g1, a=1),
                             g2=functools.partial(g2, x=1.5),
                             f3=h)

    assert task1.digest == 'b3f5fddcf8'
    assert task1.desc.json(
    ) == '{"taskname": "AddPureFunctions", "module": "tasks", "inputs": {"digest": "b3f5fddcf8", "hashed_digest": "b3f5fddcf8", "unhashed_digests": {}, "f1": "def f1(x):\\n    return (x + 1)", "f2": "def f2(p):\\n    return (1.5 ** p)", "g1": ["PartialPureFunction", "def g1(x, a):\\n    return (x + a)", {"a": 1}], "g2": ["PartialPureFunction", "def g2(x, p):\\n    return (x ** p)", {"x": 1.5}], "f3": ["CompositePureFunction", "truediv", [9.2, "def f1(x):\\n    return (x + 1)"]]}, "reason": null}'

    task1.run()

    # Check that serialize->deserialize works
    from mackelab_toolbox.serialize import config as serialize_config
    serialize_config.trust_all_inputs = True
    task2 = smttask.Task.from_desc(task1.desc.json())
    assert task1.run()(0.5, 2) == task2.run()(0.5, 2)

    output = task1.Outputs.parse_result(task1.run(), _task=task1)

    assert output.json(
    ) == '{"": "def h(x, p):\\n    return ((((f1(x) + f2(p)) + g1(x)) + g2(p=p)) + f3(x))"}'
コード例 #10
0
def test_rebuild_input_datastore():
    # TODO: Test that more recent tasks overwrite older ones
    # TODO: Explicitely test behaviour when record output no longer exists (e.g. was deleted)
    from smttask import config
    from smttask.view import RecordStoreView
    from smttask.utils import compute_input_symlinks
    import shutil

    projectroot = Path(__file__).parent / "test_project"
    projectpath = str(projectroot.absolute())
    if str(projectpath) not in sys.path:
        sys.path.insert(0, projectpath)

    # Clear the runtime directory and cd into it
    clean_project(projectroot)
    os.makedirs(projectroot / "data", exist_ok=True)
    os.chdir(projectroot)
    from tasks import Square_x, SquareAndCube_x

    # Create and run some tasks. Include tasks with both single & multiple outputs
    tasks = [Square_x(x=x, reason="pytest") for x in (1.1, 2.1, 5)]
    tasks += [
        SquareAndCube_x(x=x, pmax=5, reason="pytest") for x in (1.1, 2.1, 5)
    ]
    for task in tasks:
        task.run()

    # Assert that the outputs were produced at the expected locations
    outroot = Path(config.project.data_store.root)
    inroot = Path(config.project.input_datastore.root)
    for task in tasks:
        for relpath in task.outputpaths.values():
            assert (outroot / relpath).exists()
            assert (outroot / relpath).is_file()
            assert not (outroot / relpath).is_symlink()
            assert (inroot / relpath).exists()
            assert (inroot / relpath).is_symlink()
            assert (outroot / relpath).resolve() == (inroot /
                                                     relpath).resolve()

    # Delete the input data store, but leave the output data store intact
    for path in inroot.iterdir():
        # NOTE: With Python 3.9+, we could use Path.is_relative_to
        #       This would avoid this clunky pattern where raising an exception
        #       is the normal code path
        try:
            outroot.relative_to(path)
        except ValueError:
            # path not part of path to outroot - delete
            if path.is_dir():
                shutil.rmtree(path)
            else:
                path.unlink()

    # Assert that the links in the input datastore no longer exist
    for task in tasks:
        for relpath in task.outputpaths.values():
            assert (outroot / relpath).exists()
            assert (outroot / relpath).is_file()
            assert not (inroot / relpath).exists()

    # Rebuild the input data store
    recordlist = RecordStoreView()
    recordlist.rebuild_input_datastore(compute_input_symlinks)

    # Assert that the correct links were added back to the input data store
    outroot = Path(config.project.data_store.root)
    inroot = Path(config.project.input_datastore.root)
    for task in tasks:
        for relpath in task.outputpaths.values():
            assert (outroot / relpath).exists()
            assert (outroot / relpath).is_file()
            assert (inroot / relpath).exists()
            assert (inroot / relpath).is_symlink()
            assert (outroot / relpath).resolve() == (inroot /
                                                     relpath).resolve()