def test_locator():
    job = Locator(_parse_parts("job"))
    example = Locator(_parse_parts("example/path"))
    combined = example / job
    combined_from_string = example / "job"

    assert combined.__repr__() == combined_from_string.__repr__()
    with pytest.raises(RuntimeError):
        _ = combined / 90
Beispiel #2
0
def test_locator():
    job = Locator(_parse_parts("job"))
    example = Locator(_parse_parts("example/path"))
    combined = example / job
    combined_from_string = example / "job"

    assert combined.__repr__() == combined_from_string.__repr__()
    with pytest.raises(RuntimeError):
        _ = combined / 90

    # Confirm we can't create a locator with an equals sign in the name
    with pytest.raises(ValueError):
        _ = Locator(_parse_parts("x=20"))
Beispiel #3
0
def test_dax_with_categories(tmp_path):
    workflow_params = Parameters.from_mapping({
        "workflow_name":
        "Test",
        "workflow_created":
        "Testing",
        "workflow_log_dir":
        str(tmp_path / "log"),
        "workflow_directory":
        str(tmp_path / "working"),
        "site":
        "saga",
        "namespace":
        "test",
        "partition":
        "gaia",
        "home_dir":
        str(tmp_path),
    })
    initialize_vista_pegasus_wrapper(workflow_params)

    multiply_job_name = Locator(_parse_parts("jobs/multiply"))
    multiply_output_file = tmp_path / "multiplied_nums.txt"
    multiply_input_file = tmp_path / "raw_nums.txt"
    multiply_params = Parameters.from_mapping({
        "input_file": multiply_input_file,
        "output_file": multiply_output_file,
        "x": 4
    })
    multiply_job_category = "arithmetic"

    run_python_on_parameters(
        multiply_job_name,
        multiply_by_x_main,
        multiply_params,
        depends_on=[],
        category=multiply_job_category,
    )

    # Check that the multiply job has the appropriate category set in the DAX file
    dax_file = write_workflow_description()
    assert dax_file.exists()

    assert _job_in_dax_has_category(dax_file, multiply_job_name,
                                    multiply_job_category)
    assert not _job_in_dax_has_category(dax_file, multiply_job_name,
                                        "an-arbitrary-category")
Beispiel #4
0
def test_not_clearing_ckpts(monkeypatch, tmp_path):

    workflow_params = Parameters.from_mapping({
        "workflow_name":
        "Test",
        "workflow_created":
        "Testing",
        "workflow_log_dir":
        str(tmp_path / "log"),
        "workflow_directory":
        str(tmp_path / "working"),
        "site":
        "saga",
        "namespace":
        "test",
        "partition":
        "scavenge",
        "home_dir":
        str(tmp_path),
    })

    initialize_vista_pegasus_wrapper(workflow_params)

    multiply_job_name = Locator(_parse_parts("jobs/multiply"))
    multiply_output_file = tmp_path / "multiplied_nums.txt"
    multiply_input_file = tmp_path / "raw_nums.txt"
    multiply_params = Parameters.from_mapping({
        "input_file": multiply_input_file,
        "output_file": multiply_output_file,
        "x": 4
    })

    multiple_dir = directory_for(multiply_job_name)

    checkpointed_multiply_file = multiple_dir / "___ckpt"
    checkpointed_multiply_file.touch()
    multiply_output_file.touch()

    run_python_on_parameters(multiply_job_name,
                             multiply_by_x_main,
                             multiply_params,
                             depends_on=[])
    monkeypatch.setattr("builtins.input", lambda _: "n")
    write_workflow_description()
    assert checkpointed_multiply_file.exists()
def test_dax_with_job_on_saga(tmp_path):
    workflow_params = Parameters.from_mapping({
        "workflow_name":
        "Test",
        "workflow_created":
        "Testing",
        "workflow_log_dir":
        str(tmp_path / "log"),
        "workflow_directory":
        str(tmp_path / "working"),
        "site":
        "saga",
        "namespace":
        "test",
        "partition":
        "scavenge",
    })
    slurm_params = Parameters.from_mapping({
        "partition": "scavenge",
        "num_cpus": 1,
        "num_gpus": 0,
        "memory": "4G"
    })
    multiply_input_file = tmp_path / "raw_nums.txt"
    random = Random()
    random.seed(0)
    nums = immutableset(int(random.random() * 100) for _ in range(0, 25))
    multiply_output_file = tmp_path / "multiplied_nums.txt"
    sorted_output_file = tmp_path / "sorted_nums.txt"
    with multiply_input_file.open("w") as mult_file:
        mult_file.writelines(f"{num}\n" for num in nums)
    multiply_params = Parameters.from_mapping({
        "input_file": multiply_input_file,
        "output_file": multiply_output_file,
        "x": 4
    })
    sort_params = Parameters.from_mapping({
        "input_file": multiply_output_file,
        "output_file": sorted_output_file
    })

    resources = SlurmResourceRequest.from_parameters(slurm_params)
    workflow_builder = WorkflowBuilder.from_parameters(workflow_params)

    multiply_job_name = Locator(_parse_parts("jobs/multiply"))
    multiply_artifact = ValueArtifact(
        multiply_output_file,
        depends_on=workflow_builder.run_python_on_parameters(
            multiply_job_name,
            multiply_by_x_main,
            multiply_params,
            depends_on=[]),
        locator=Locator("multiply"),
    )
    multiple_dir = workflow_builder.directory_for(multiply_job_name)
    assert (multiple_dir / "___run.sh").exists()
    assert (multiple_dir / "____params.params").exists()

    sort_job_name = Locator(_parse_parts("jobs/sort"))
    sort_dir = workflow_builder.directory_for(sort_job_name)
    workflow_builder.run_python_on_parameters(
        sort_job_name,
        sort_nums_main,
        sort_params,
        depends_on=[multiply_artifact],
        resource_request=resources,
    )
    assert (sort_dir / "___run.sh").exists()
    assert (sort_dir / "____params.params").exists()

    dax_file_one = workflow_builder.write_dax_to_dir(tmp_path)
    dax_file_two = workflow_builder.write_dax_to_dir()

    assert dax_file_one.exists()
    assert dax_file_two.exists()

    submit_script_one = tmp_path / "submit_script_one.sh"
    submit_script_two = tmp_path / "submit_script_two.sh"
    build_submit_script(
        submit_script_one,
        str(dax_file_one),
        workflow_builder._workflow_directory,  # pylint:disable=protected-access
    )
    build_submit_script(
        submit_script_two,
        str(dax_file_two),
        workflow_builder._workflow_directory,  # pylint:disable=protected-access
    )

    assert submit_script_one.exists()
    assert submit_script_two.exists()

    submit_script_process = subprocess.Popen(
        ["sh", str(submit_script_one)],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        encoding="utf-8",
    )
    stdout, stderr = submit_script_process.communicate()

    print(stdout)
    print(stderr)
def test_category_max_jobs(tmp_path):
    workflow_params = Parameters.from_mapping({
        "workflow_name":
        "Test",
        "workflow_created":
        "Testing",
        "workflow_log_dir":
        str(tmp_path / "log"),
        "workflow_directory":
        str(tmp_path / "working"),
        "site":
        "saga",
        "namespace":
        "test",
        "partition":
        "scavenge",
    })
    multiply_slurm_params = Parameters.from_mapping({
        "partition": "scavenge",
        "num_cpus": 1,
        "num_gpus": 0,
        "memory": "4G"
    })
    multiply_resources = SlurmResourceRequest.from_parameters(
        multiply_slurm_params)
    workflow_builder = WorkflowBuilder.from_parameters(workflow_params)

    multiply_job_name = Locator(_parse_parts("jobs/multiply"))
    multiply_output_file = tmp_path / "multiplied_nums.txt"
    multiply_input_file = tmp_path / "raw_nums.txt"
    multiply_params = Parameters.from_mapping({
        "input_file": multiply_input_file,
        "output_file": multiply_output_file,
        "x": 4
    })

    multiply_artifact = ValueArtifact(
        multiply_output_file,
        depends_on=workflow_builder.run_python_on_parameters(
            multiply_job_name,
            multiply_by_x_main,
            multiply_params,
            depends_on=[],
            resource_request=multiply_resources,
        ),
        locator=Locator("multiply"),
    )

    sort_slurm_params = Parameters.from_mapping({
        "partition": "ephemeral",
        "num_cpus": 1,
        "num_gpus": 0,
        "memory": "4G"
    })
    sort_resources = SlurmResourceRequest.from_parameters(sort_slurm_params)

    sort_job_name = Locator(_parse_parts("jobs/sort"))
    sorted_output_file = tmp_path / "sorted_nums.txt"
    sort_params = Parameters.from_mapping({
        "input_file": multiply_output_file,
        "output_file": sorted_output_file
    })
    workflow_builder.run_python_on_parameters(
        sort_job_name,
        sort_nums_main,
        sort_params,
        depends_on=[multiply_artifact],
        resource_request=sort_resources,
    )

    workflow_builder.limit_jobs_for_category("scavenge", 1)
    workflow_builder.write_dax_to_dir()

    config = workflow_params.existing_directory(
        "workflow_directory") / "pegasus.conf"
    assert config.exists()

    # Make sure the config contains the appropriate maxjobs lines and no inappropriate maxjobs lines
    with config.open("r") as f:
        lines = f.readlines()
    assert any(["dagman.scavenge.maxjobs=1" in line for line in lines])
    assert not any(["dagman.ephemeral.maxjobs=" in line for line in lines])
def test_dax_with_saga_categories(tmp_path):
    workflow_params = Parameters.from_mapping({
        "workflow_name":
        "Test",
        "workflow_created":
        "Testing",
        "workflow_log_dir":
        str(tmp_path / "log"),
        "workflow_directory":
        str(tmp_path / "working"),
        "site":
        "saga",
        "namespace":
        "test",
        "partition":
        "scavenge",
    })
    multiply_partition = "scavenge"
    multiply_slurm_params = Parameters.from_mapping({
        "partition": multiply_partition,
        "num_cpus": 1,
        "num_gpus": 0,
        "memory": "4G"
    })
    multiply_resources = SlurmResourceRequest.from_parameters(
        multiply_slurm_params)
    workflow_builder = WorkflowBuilder.from_parameters(workflow_params)

    multiply_job_name = Locator(_parse_parts("jobs/multiply"))
    multiply_output_file = tmp_path / "multiplied_nums.txt"
    multiply_input_file = tmp_path / "raw_nums.txt"
    multiply_params = Parameters.from_mapping({
        "input_file": multiply_input_file,
        "output_file": multiply_output_file,
        "x": 4
    })

    multiply_artifact = ValueArtifact(
        multiply_output_file,
        depends_on=workflow_builder.run_python_on_parameters(
            multiply_job_name,
            multiply_by_x_main,
            multiply_params,
            depends_on=[],
            resource_request=multiply_resources,
        ),
        locator=Locator("multiply"),
    )

    sort_partition = "ephemeral"
    sort_slurm_params = Parameters.from_mapping({
        "partition": sort_partition,
        "num_cpus": 1,
        "num_gpus": 0,
        "memory": "4G"
    })
    sort_resources = SlurmResourceRequest.from_parameters(sort_slurm_params)

    sort_job_name = Locator(_parse_parts("jobs/sort"))
    sorted_output_file = tmp_path / "sorted_nums.txt"
    sort_params = Parameters.from_mapping({
        "input_file": multiply_output_file,
        "output_file": sorted_output_file
    })
    workflow_builder.run_python_on_parameters(
        sort_job_name,
        sort_nums_main,
        sort_params,
        depends_on=[multiply_artifact],
        resource_request=sort_resources,
    )

    dax_file = workflow_builder.write_dax_to_dir()
    assert dax_file.exists()

    # Check that the multiply and sort jobs have the appropriate partition-defined categories set in
    # the DAX file
    assert _job_in_dax_has_category(dax_file, multiply_job_name,
                                    multiply_partition)
    assert not _job_in_dax_has_category(dax_file, multiply_job_name,
                                        sort_partition)
    assert _job_in_dax_has_category(dax_file, sort_job_name, sort_partition)
    assert not _job_in_dax_has_category(dax_file, sort_job_name,
                                        multiply_partition)
def test_dax_with_checkpointed_jobs_on_saga(tmp_path):
    workflow_params = Parameters.from_mapping({
        "workflow_name":
        "Test",
        "workflow_created":
        "Testing",
        "workflow_log_dir":
        str(tmp_path / "log"),
        "workflow_directory":
        str(tmp_path / "working"),
        "site":
        "saga",
        "namespace":
        "test",
        "partition":
        "scavenge",
    })
    slurm_params = Parameters.from_mapping({
        "partition": "scavenge",
        "num_cpus": 1,
        "num_gpus": 0,
        "memory": "4G"
    })
    resources = SlurmResourceRequest.from_parameters(slurm_params)
    workflow_builder = WorkflowBuilder.from_parameters(workflow_params)

    multiply_job_name = Locator(_parse_parts("jobs/multiply"))
    multiply_output_file = tmp_path / "multiplied_nums.txt"
    multiply_input_file = tmp_path / "raw_nums.txt"
    multiply_params = Parameters.from_mapping({
        "input_file": multiply_input_file,
        "output_file": multiply_output_file,
        "x": 4
    })

    multiple_dir = workflow_builder.directory_for(multiply_job_name)

    # Create checkpointed file so that when trying to create the job again,
    # Pegasus just adds the file to the Replica Catalog
    checkpointed_multiply_file = multiple_dir / "___ckpt"
    checkpointed_multiply_file.touch()
    multiply_output_file.touch()

    assert checkpointed_multiply_file.exists()
    assert multiply_output_file.exists()

    multiply_artifact = ValueArtifact(
        multiply_output_file,
        depends_on=workflow_builder.run_python_on_parameters(
            multiply_job_name,
            multiply_by_x_main,
            multiply_params,
            depends_on=[]),
        locator=Locator("multiply"),
    )

    sort_job_name = Locator(_parse_parts("jobs/sort"))
    sorted_output_file = tmp_path / "sorted_nums.txt"
    sort_params = Parameters.from_mapping({
        "input_file": multiply_output_file,
        "output_file": sorted_output_file
    })
    workflow_builder.run_python_on_parameters(
        sort_job_name,
        sort_nums_main,
        sort_params,
        depends_on=[multiply_artifact],
        resource_request=resources,
    )

    replica_catalog = workflow_params.existing_directory(
        "workflow_directory") / "rc.dat"
    assert replica_catalog.exists()

    # Make sure the Replica Catalog is not empty
    assert replica_catalog.stat().st_size > 0
Beispiel #9
0
def test_dax_with_job_on_saga_with_dict_as_params(tmp_path):
    workflow_params = Parameters.from_mapping({
        "workflow_name":
        "Test",
        "workflow_created":
        "Testing",
        "workflow_log_dir":
        str(tmp_path / "log"),
        "workflow_directory":
        str(tmp_path / "working"),
        "site":
        "saga",
        "namespace":
        "test",
        "partition":
        "gaia",
        "experiment_name":
        "fred",
        "home_dir":
        str(tmp_path),
    })
    slurm_params = Parameters.from_mapping({
        "partition": "gaia",
        "num_cpus": 1,
        "num_gpus": 0,
        "memory": "4G"
    })
    multiply_input_file = tmp_path / "raw_nums.txt"
    random = Random()
    random.seed(0)
    nums = immutableset(int(random.random() * 100) for _ in range(25))
    multiply_output_file = tmp_path / "multiplied_nums.txt"
    sorted_output_file = tmp_path / "sorted_nums.txt"
    add_output_file = tmp_path / "add_nums.txt"
    with multiply_input_file.open("w") as mult_file:
        mult_file.writelines(f"{num}\n" for num in nums)
    multiply_params = {
        "input_file": multiply_input_file,
        "output_file": multiply_output_file,
        "x": 4,
    }

    sort_params = {
        "input_file": multiply_output_file,
        "output_file": sorted_output_file
    }

    add_args = f"{sorted_output_file} {add_output_file} --y 10"

    job_profile = PegasusProfile(namespace="pegasus",
                                 key="transfer.bypass.input.staging",
                                 value="True")
    resources = SlurmResourceRequest.from_parameters(slurm_params)
    initialize_vista_pegasus_wrapper(workflow_params)

    multiply_job_name = Locator(_parse_parts("jobs/multiply"))
    multiply_artifact = ValueArtifact(
        multiply_output_file,
        depends_on=run_python_on_parameters(
            multiply_job_name,
            multiply_by_x_main,
            multiply_params,
            depends_on=[],
            job_profiles=[job_profile],
        ),
        locator=Locator("multiply"),
    )
    multiple_dir = directory_for(multiply_job_name)
    assert (multiple_dir / "___run.sh").exists()
    assert (multiple_dir / "____params.params").exists()

    sort_job_name = Locator(_parse_parts("jobs/sort"))
    sort_dir = directory_for(sort_job_name)
    sort_artifact = run_python_on_parameters(
        sort_job_name,
        sort_nums_main,
        sort_params,
        depends_on=[multiply_artifact],
        resource_request=resources,
        category="add",
    )
    assert (sort_dir / "___run.sh").exists()
    assert (sort_dir / "____params.params").exists()

    add_job_name = Locator(_parse_parts("jobs/add"))
    add_dir = directory_for(add_job_name)
    run_python_on_args(add_job_name,
                       "add_job_main.py",
                       add_args,
                       depends_on=[sort_artifact])
    assert (add_dir / "___run.sh").exists()

    dax_file_one = write_workflow_description(tmp_path)
    dax_file_two = write_workflow_description()

    assert dax_file_one.exists()
    assert dax_file_two.exists()

    submit_script_one = tmp_path / "submit_script_one.sh"
    submit_script_two = tmp_path / "submit_script_two.sh"
    build_submit_script(submit_script_one, str(dax_file_one),
                        experiment_directory())
    build_submit_script(submit_script_two, str(dax_file_two),
                        experiment_directory())

    assert submit_script_one.exists()
    assert submit_script_two.exists()

    site_catalog = workflow_params.existing_directory(
        "workflow_directory") / "sites.yml"
    assert site_catalog.exists()

    replica_catalog = (
        workflow_params.existing_directory("workflow_directory") /
        "replicas.yml")
    assert replica_catalog.exists()

    transformations_catalog = (
        workflow_params.existing_directory("workflow_directory") /
        "transformations.yml")
    assert transformations_catalog.exists()

    properties_file = (
        workflow_params.existing_directory("workflow_directory") /
        "pegasus.properties")
    assert properties_file.exists()
Beispiel #10
0
def test_dax_with_job_in_container(tmp_path):
    workflow_params = Parameters.from_mapping({
        "workflow_name":
        "Test",
        "workflow_created":
        "Testing",
        "workflow_log_dir":
        str(tmp_path / "log"),
        "workflow_directory":
        str(tmp_path / "working"),
        "site":
        "saga",
        "namespace":
        "test",
        "partition":
        "gaia",
        "experiment_name":
        "fred",
        "home_dir":
        str(tmp_path),
    })

    slurm_params = Parameters.from_mapping({
        "partition": "gaia",
        "num_cpus": 1,
        "num_gpus": 0,
        "memory": "4G"
    })

    multiply_input_file = tmp_path / "raw_nums.txt"
    random = Random()
    random.seed(0)
    nums = immutableset(int(random.random() * 100) for _ in range(25))
    multiply_output_file = tmp_path / "multiplied_nums.txt"
    sorted_output_file = tmp_path / "sorted_nums.txt"

    with multiply_input_file.open("w") as mult_file:
        mult_file.writelines(f"{num}\n" for num in nums)

    multiply_params = Parameters.from_mapping({
        "input_file": multiply_input_file,
        "output_file": multiply_output_file,
        "x": 4
    })
    sort_params = Parameters.from_mapping({
        "input_file": multiply_output_file,
        "output_file": sorted_output_file
    })

    resources = SlurmResourceRequest.from_parameters(slurm_params)
    initialize_vista_pegasus_wrapper(workflow_params)

    # Add Container
    example_docker = add_container("example_container", "docker",
                                   tmp_path / "docker.img")

    with pytest.raises(ValueError):
        _ = add_container("fake_container", "invalid",
                          tmp_path / "invalid_docker.img")

    multiply_job_name = Locator(_parse_parts("jobs/multiply"))

    multiply_artifact = ValueArtifact(
        multiply_output_file,
        depends_on=run_python_on_parameters(
            multiply_job_name,
            multiply_by_x_main,
            multiply_params,
            depends_on=[],
            container=example_docker,
        ),
        locator=Locator("multiply"),
    )
    multiple_dir = directory_for(multiply_job_name)
    assert (multiple_dir / "___run.sh").exists()
    assert (multiple_dir / "____params.params").exists()

    sort_job_name = Locator(_parse_parts("jobs/sort"))
    sort_dir = directory_for(sort_job_name)
    run_python_on_parameters(
        sort_job_name,
        sort_nums_main,
        sort_params,
        depends_on=[multiply_artifact],
        resource_request=resources,
        container=example_docker,
    )
    assert (sort_dir / "___run.sh").exists()
    assert (sort_dir / "____params.params").exists()

    dax_file_one = write_workflow_description()

    assert dax_file_one.exists()

    site_catalog = workflow_params.existing_directory(
        "workflow_directory") / "sites.yml"
    assert site_catalog.exists()

    replica_catalog = (
        workflow_params.existing_directory("workflow_directory") /
        "replicas.yml")
    assert replica_catalog.exists()

    transformations_catalog = (
        workflow_params.existing_directory("workflow_directory") /
        "transformations.yml")
    assert transformations_catalog.exists()

    properties_file = (
        workflow_params.existing_directory("workflow_directory") /
        "pegasus.properties")
    assert properties_file.exists()
Beispiel #11
0
def test_dax_test_exclude_nodes_on_saga(tmp_path):

    sample_exclude = "saga01,saga03,saga21,saga05"
    sample_include = "saga06"

    params = Parameters.from_mapping({
        "workflow_name":
        "Test",
        "workflow_created":
        "Testing",
        "workflow_log_dir":
        str(tmp_path / "log"),
        "workflow_directory":
        str(tmp_path / "working"),
        "site":
        "saga",
        "namespace":
        "test",
        "partition":
        "gaia",
        "exclude_list":
        sample_exclude,
        "home_dir":
        str(tmp_path),
    })
    slurm_params = Parameters.from_mapping({
        "partition": "gaia",
        "num_cpus": 1,
        "num_gpus": 0,
        "memory": "4G"
    })
    multiply_input_file = tmp_path / "raw_nums.txt"
    random = Random()
    random.seed(0)
    nums = immutableset(int(random.random() * 100) for _ in range(0, 25))
    multiply_output_file = tmp_path / "multiplied_nums.txt"
    sorted_output_file = tmp_path / "sorted_nums.txt"
    with multiply_input_file.open("w") as mult_file:
        mult_file.writelines(f"{num}\n" for num in nums)
    multiply_params = Parameters.from_mapping({
        "input_file": multiply_input_file,
        "output_file": multiply_output_file,
        "x": 4
    })

    sort_params = Parameters.from_mapping({
        "input_file": multiply_output_file,
        "output_file": sorted_output_file
    })

    resources = SlurmResourceRequest.from_parameters(
        slurm_params.unify({"run_on_single_node": sample_include}))
    initialize_vista_pegasus_wrapper(params)

    multiply_job_locator = Locator(_parse_parts("jobs/multiply"))
    multiply_artifact = ValueArtifact(
        multiply_output_file,
        depends_on=run_python_on_parameters(multiply_job_locator,
                                            multiply_by_x_main,
                                            multiply_params,
                                            depends_on=[]),
        locator=Locator("multiply"),
    )
    sort_job_locator = Locator(_parse_parts("jobs/sort"))
    run_python_on_parameters(
        sort_job_locator,
        sort_nums_main,
        sort_params,
        depends_on=[multiply_artifact],
        resource_request=resources,
    )

    dax_file = write_workflow_description(tmp_path)
    with dax_file.open("r") as dax:
        dax_yaml = load(dax, Loader=SafeLoader)
    root = dax_yaml["jobs"]

    for item in root:
        if item["type"] == "job":
            if "pegasus" in item["profiles"]:
                if item["name"] == "jobs_multiply":
                    assert (f"--exclude={sample_exclude}"
                            in item["profiles"]["pegasus"]["glite.arguments"])
                elif item["name"] == "jobs_sort":
                    assert "--exclude=" in item["profiles"]["pegasus"][
                        "glite.arguments"]
                    assert (f"--nodelist={sample_include}"
                            in item["profiles"]["pegasus"]["glite.arguments"])
                else:
                    assert False
Beispiel #12
0
def test_category_max_jobs(tmp_path):
    workflow_params = Parameters.from_mapping({
        "workflow_name":
        "Test",
        "workflow_created":
        "Testing",
        "workflow_log_dir":
        str(tmp_path / "log"),
        "workflow_directory":
        str(tmp_path / "working"),
        "site":
        "saga",
        "namespace":
        "test",
        "partition":
        "gaia",
        "home_dir":
        str(tmp_path),
    })
    multiply_slurm_params = Parameters.from_mapping({
        "partition": "gaia",
        "num_cpus": 1,
        "num_gpus": 0,
        "memory": "4G"
    })
    multiply_resources = SlurmResourceRequest.from_parameters(
        multiply_slurm_params)
    initialize_vista_pegasus_wrapper(workflow_params)

    multiply_job_name = Locator(_parse_parts("jobs/multiply"))
    multiply_output_file = tmp_path / "multiplied_nums.txt"
    multiply_input_file = tmp_path / "raw_nums.txt"
    multiply_params = Parameters.from_mapping({
        "input_file": multiply_input_file,
        "output_file": multiply_output_file,
        "x": 4
    })

    multiply_artifact = ValueArtifact(
        multiply_output_file,
        depends_on=run_python_on_parameters(
            multiply_job_name,
            multiply_by_x_main,
            multiply_params,
            depends_on=[],
            resource_request=multiply_resources,
        ),
        locator=Locator("multiply"),
    )

    sort_slurm_params = Parameters.from_mapping({
        "partition": "ephemeral",
        "num_cpus": 1,
        "num_gpus": 0,
        "memory": "4G",
        "job_time_in_minutes": 120,
    })
    sort_resources = SlurmResourceRequest.from_parameters(sort_slurm_params)

    sort_job_name = Locator(_parse_parts("jobs/sort"))
    sorted_output_file = tmp_path / "sorted_nums.txt"
    sort_params = Parameters.from_mapping({
        "input_file": multiply_output_file,
        "output_file": sorted_output_file
    })
    run_python_on_parameters(
        sort_job_name,
        sort_nums_main,
        sort_params,
        depends_on=[multiply_artifact],
        resource_request=sort_resources,
    )

    limit_jobs_for_category("gaia", 1)
    write_workflow_description()

    site_catalog = workflow_params.existing_directory(
        "workflow_directory") / "sites.yml"
    assert site_catalog.exists()

    replica_catalog = (
        workflow_params.existing_directory("workflow_directory") /
        "replicas.yml")
    assert replica_catalog.exists()

    transformations_catalog = (
        workflow_params.existing_directory("workflow_directory") /
        "transformations.yml")
    assert transformations_catalog.exists()

    properties_file = (
        workflow_params.existing_directory("workflow_directory") /
        "pegasus.properties")
    assert properties_file.exists()

    # Make sure the config contains the appropriate maxjobs lines and no inappropriate maxjobs lines
    with properties_file.open("r") as f:
        lines = f.readlines()
    for line in lines:
        print(line)
    assert any("dagman.gaia.maxjobs = 1" in line for line in lines)
    assert all("dagman.ephemeral.maxjobs =" not in line for line in lines)
Beispiel #13
0
def test_dax_with_checkpointed_jobs_on_saga(tmp_path):
    workflow_params = Parameters.from_mapping({
        "workflow_name":
        "Test",
        "workflow_created":
        "Testing",
        "workflow_log_dir":
        str(tmp_path / "log"),
        "workflow_directory":
        str(tmp_path / "working"),
        "site":
        "saga",
        "namespace":
        "test",
        "partition":
        "gaia",
        "home_dir":
        str(tmp_path),
    })
    slurm_params = Parameters.from_mapping({
        "partition": "gaia",
        "num_cpus": 1,
        "num_gpus": 0,
        "memory": "4G"
    })
    resources = SlurmResourceRequest.from_parameters(slurm_params)
    initialize_vista_pegasus_wrapper(workflow_params)

    multiply_job_name = Locator(_parse_parts("jobs/multiply"))
    multiply_output_file = tmp_path / "multiplied_nums.txt"
    multiply_input_file = tmp_path / "raw_nums.txt"
    multiply_params = Parameters.from_mapping({
        "input_file": multiply_input_file,
        "output_file": multiply_output_file,
        "x": 4
    })

    multiple_dir = directory_for(multiply_job_name)

    # Create checkpointed file so that when trying to create the job again,
    # Pegasus just adds the file to the Replica Catalog
    checkpointed_multiply_file = multiple_dir / "___ckpt"
    checkpointed_multiply_file.touch()
    multiply_output_file.touch()

    assert checkpointed_multiply_file.exists()
    assert multiply_output_file.exists()

    multiply_artifact = ValueArtifact(
        multiply_output_file,
        depends_on=run_python_on_parameters(multiply_job_name,
                                            multiply_by_x_main,
                                            multiply_params,
                                            depends_on=[]),
        locator=Locator("multiply"),
    )

    sort_job_name = Locator(_parse_parts("jobs/sort"))
    sorted_output_file = tmp_path / "sorted_nums.txt"
    sort_params = Parameters.from_mapping({
        "input_file": multiply_output_file,
        "output_file": sorted_output_file
    })
    run_python_on_parameters(
        sort_job_name,
        sort_nums_main,
        sort_params,
        depends_on=[multiply_artifact],
        resource_request=resources,
    )

    write_workflow_description()

    site_catalog = workflow_params.existing_directory(
        "workflow_directory") / "sites.yml"
    assert site_catalog.exists()

    replica_catalog = (
        workflow_params.existing_directory("workflow_directory") /
        "replicas.yml")
    assert replica_catalog.exists()

    transformations_catalog = (
        workflow_params.existing_directory("workflow_directory") /
        "transformations.yml")
    assert transformations_catalog.exists()

    properties_file = (
        workflow_params.existing_directory("workflow_directory") /
        "pegasus.properties")
    assert properties_file.exists()

    # Make sure the Replica Catalog is not empty
    assert replica_catalog.stat().st_size > 0