Beispiel #1
0
def test_create_or_update_jobs(tmp_work_dir):
    repo_url = str(Path(__file__).parent.resolve() / "fixtures/git-repo")
    job_request = JobRequest(
        id="123",
        repo_url=repo_url,
        # GIT_DIR=tests/fixtures/git-repo git rev-parse v1
        commit="d1e88b31cbe8f67c58f938adb5ee500d54a69764",
        branch="v1",
        requested_actions=["generate_cohort"],
        cancelled_actions=[],
        workspace="1",
        database_name="dummy",
        original={},
    )
    create_or_update_jobs(job_request)
    old_job = find_one(Job)
    assert old_job.job_request_id == "123"
    assert old_job.state == State.PENDING
    assert old_job.repo_url == repo_url
    assert old_job.commit == "d1e88b31cbe8f67c58f938adb5ee500d54a69764"
    assert old_job.workspace == "1"
    assert old_job.action == "generate_cohort"
    assert old_job.wait_for_job_ids == []
    assert old_job.requires_outputs_from == []
    assert old_job.run_command == (
        "cohortextractor:latest generate_cohort --expectations-population=1000"
        " --output-dir=.")
    assert old_job.output_spec == {"highly_sensitive": {"cohort": "input.csv"}}
    assert old_job.status_message is None
    # Check no new jobs created from same JobRequest
    create_or_update_jobs(job_request)
    new_job = find_one(Job)
    assert old_job == new_job
Beispiel #2
0
def test_adding_job_creates_dependencies(tmp_work_dir):
    create_jobs_with_project_file(make_job_request(action="analyse_data"),
                                  TEST_PROJECT)
    analyse_job = find_one(Job, action="analyse_data")
    prepare_1_job = find_one(Job, action="prepare_data_1")
    prepare_2_job = find_one(Job, action="prepare_data_2")
    generate_job = find_one(Job, action="generate_cohort")
    assert set(
        analyse_job.wait_for_job_ids) == {prepare_1_job.id, prepare_2_job.id}
    assert prepare_1_job.wait_for_job_ids == [generate_job.id]
    assert prepare_2_job.wait_for_job_ids == [generate_job.id]
    assert generate_job.wait_for_job_ids == []
Beispiel #3
0
def test_local_run_copes_with_detritus_of_earlier_interrupted_run(
    extraction_tool, tmp_path
):
    # This test simulates the case where an earlier run has been interrupted (for example by the user pressing ctrl-c).
    # In particular we put a couple of jobs in unfinished states, which they could never be left in under normal
    # operation. The correct behaviour of the local run, which this tests for, is for such unfinished jobs to be marked
    # as cancelled on the next run.
    project_dir = tmp_path / "project"
    shutil.copytree(str(FIXTURE_DIR / "full_project"), project_dir)
    config.DATABASE_FILE = project_dir / "metadata" / "db.sqlite"

    project = load_pipeline(project_dir / "project.yaml")
    database.insert(SavedJobRequest(id="previous-request", original={}))

    def job(job_id, action, state):
        spec = get_action_specification(
            project,
            action,
            using_dummy_data_backend=config.USING_DUMMY_DATA_BACKEND,
        )
        return Job(
            id=job_id,
            job_request_id="previous-request",
            state=state,
            status_message="",
            repo_url=str(project_dir),
            workspace=project_dir.name,
            database_name="a-database",
            action=action,
            wait_for_job_ids=[],
            requires_outputs_from=spec.needs,
            run_command=spec.run,
            output_spec=spec.outputs,
            created_at=int(time.time()),
            updated_at=int(time.time()),
            outputs={},
        )

    # FIXME: consolidate these when databuilder supports more columns in dummy data
    if extraction_tool == "cohortextractor":
        actions = ["generate_cohort", "prepare_data_m_cohortextractor"]
    else:
        actions = ["generate_dataset", "analyse_data_databuilder"]

    database.insert(job(job_id="123", action=actions[0], state=State.RUNNING))
    database.insert(job(job_id="456", action=actions[1], state=State.PENDING))
    assert local_run.main(project_dir=project_dir, actions=[actions[1]])

    assert database.find_one(Job, id="123").cancelled
    assert database.find_one(Job, id="123").state == State.FAILED
    assert database.find_one(Job, id="456").cancelled
    assert database.find_one(Job, id="456").state == State.FAILED
Beispiel #4
0
def test_cancelled_jobs_are_flagged(tmp_work_dir):
    job_request = make_job_request(action="analyse_data")
    create_jobs_with_project_file(job_request, TEST_PROJECT)
    job_request.cancelled_actions = ["prepare_data_1", "prepare_data_2"]
    create_or_update_jobs(job_request)
    analyse_job = find_one(Job, action="analyse_data")
    prepare_1_job = find_one(Job, action="prepare_data_1")
    prepare_2_job = find_one(Job, action="prepare_data_2")
    generate_job = find_one(Job, action="generate_cohort")
    assert analyse_job.cancelled == 0
    assert prepare_1_job.cancelled == 1
    assert prepare_2_job.cancelled == 1
    assert generate_job.cancelled == 0
Beispiel #5
0
def test_create_or_update_jobs_with_git_error(tmp_work_dir):
    repo_url = str(Path(__file__).parent.resolve() / "fixtures/git-repo")
    bad_commit = "0" * 40
    job_request = JobRequest(
        id="123",
        repo_url=repo_url,
        commit=bad_commit,
        branch="v1",
        requested_actions=["generate_cohort"],
        cancelled_actions=[],
        workspace="1",
        database_name="dummy",
        original={},
    )
    create_or_update_jobs(job_request)
    j = find_one(Job)
    assert j.job_request_id == "123"
    assert j.state == State.FAILED
    assert j.repo_url == repo_url
    assert j.commit == bad_commit
    assert j.workspace == "1"
    assert j.wait_for_job_ids is None
    assert j.requires_outputs_from is None
    assert j.run_command is None
    assert j.output_spec is None
    assert (j.status_message ==
            f"GitError: Error fetching commit {bad_commit} from {repo_url}")
Beispiel #6
0
def test_existing_active_jobs_are_picked_up_when_checking_dependencies(
        tmp_work_dir):
    create_jobs_with_project_file(make_job_request(action="prepare_data_1"),
                                  TEST_PROJECT)
    prepare_1_job = find_one(Job, action="prepare_data_1")
    generate_job = find_one(Job, action="generate_cohort")
    assert prepare_1_job.wait_for_job_ids == [generate_job.id]
    # Now schedule a job which has the above jobs as dependencies
    create_jobs_with_project_file(make_job_request(action="analyse_data"),
                                  TEST_PROJECT)
    # Check that it's waiting on the existing jobs
    analyse_job = find_one(Job, action="analyse_data")
    prepare_2_job = find_one(Job, action="prepare_data_2")
    assert set(
        analyse_job.wait_for_job_ids) == {prepare_1_job.id, prepare_2_job.id}
    assert prepare_2_job.wait_for_job_ids == [generate_job.id]
Beispiel #7
0
def test_existing_cancelled_jobs_are_ignored_up_when_checking_dependencies(
    tmp_work_dir, ):
    create_jobs_with_project_file(make_job_request(action="generate_cohort"),
                                  TEST_PROJECT)
    cancelled_generate_job = find_one(Job, action="generate_cohort")
    update_where(Job, {"cancelled": True}, id=cancelled_generate_job.id)

    # Now schedule a job which has the above job as a dependency
    create_jobs_with_project_file(make_job_request(action="prepare_data_1"),
                                  TEST_PROJECT)

    # Check that it's spawned a new instance of the cancelled job and wired up the dependencies correctly
    prepare_job = find_one(Job, action="prepare_data_1")
    new_generate_job = find_one(Job, action="generate_cohort", cancelled=0)
    assert new_generate_job.id != cancelled_generate_job.id

    assert len(prepare_job.wait_for_job_ids) == 1
    assert prepare_job.wait_for_job_ids[0] == new_generate_job.id
Beispiel #8
0
def test_update_excluding_a_field(tmp_work_dir):
    job = Job(id="foo123", action="foo", commit="commit-of-glory")
    insert(job)
    job.action = "bar"
    job.commit = "commit-of-doom"
    update(job, exclude_fields=["commit"])
    j = find_one(Job, id="foo123")
    assert j.action == "bar"
    assert j.commit == "commit-of-glory"
Beispiel #9
0
def test_basic_roundtrip(tmp_work_dir):
    job = Job(
        id="foo123",
        job_request_id="bar123",
        state=State.RUNNING,
        output_spec={"hello": [1, 2, 3]},
    )
    insert(job)
    j = find_one(Job, job_request_id__in=["bar123", "baz123"])
    assert job.id == j.id
    assert job.output_spec == j.output_spec
Beispiel #10
0
def test_find_one_fails_if_there_is_more_than_one_result(tmp_work_dir):
    insert(Job(id="foo123", workspace="the-workspace"))
    insert(Job(id="foo456", workspace="the-workspace"))
    with pytest.raises(ValueError):
        find_one(Job, workspace="the-workspace")
Beispiel #11
0
def test_find_one_fails_if_there_are_no_results(tmp_work_dir):
    with pytest.raises(ValueError):
        find_one(Job, id="foo123")
Beispiel #12
0
def test_find_one_returns_a_single_value(tmp_work_dir):
    insert(Job(id="foo123", workspace="the-workspace"))
    job = find_one(Job, id="foo123")
    assert job.workspace == "the-workspace"
Beispiel #13
0
def test_update(tmp_work_dir):
    job = Job(id="foo123", action="foo")
    insert(job)
    job.action = "bar"
    update(job)
    assert find_one(Job, id="foo123").action == "bar"
Beispiel #14
0
def get_flag(name):
    """Get a flag from the db"""
    return find_one(Flag, id=name)