Esempio n. 1
0
def test_create_or_update_jobs_with_git_error(tmp_work_dir):
    repo_url = str(Path(__file__).parent.resolve() / "fixtures/git-repo")
    bad_commit = "0" * 40
    job_request = JobRequest(
        id="123",
        repo_url=repo_url,
        commit=bad_commit,
        branch="v1",
        requested_actions=["generate_cohort"],
        cancelled_actions=[],
        workspace="1",
        database_name="dummy",
        original={},
    )
    create_or_update_jobs(job_request)
    j = find_one(Job)
    assert j.job_request_id == "123"
    assert j.state == State.FAILED
    assert j.repo_url == repo_url
    assert j.commit == bad_commit
    assert j.workspace == "1"
    assert j.wait_for_job_ids is None
    assert j.requires_outputs_from is None
    assert j.run_command is None
    assert j.output_spec is None
    assert (j.status_message ==
            f"GitError: Error fetching commit {bad_commit} from {repo_url}")
def test_create_or_update_jobs_with_git_error(tmp_work_dir):
    repo_url = str(Path(__file__).parent.resolve() / "fixtures/git-repo")
    job_request = JobRequest(
        id="123",
        repo_url=repo_url,
        commit=None,
        branch="no-such-branch",
        requested_actions=["generate_cohort"],
        workspace="1",
        database_name="dummy",
        original={},
    )
    create_or_update_jobs(job_request)
    jobs = find_where(Job)
    assert len(jobs) == 1
    j = jobs[0]
    assert j.job_request_id == "123"
    assert j.state == State.FAILED
    assert j.repo_url == repo_url
    assert j.commit == None
    assert j.workspace == "1"
    assert j.wait_for_job_ids == None
    assert j.requires_outputs_from == None
    assert j.run_command == None
    assert j.output_spec == None
    assert (
        j.status_message
        == f"GitError: Error resolving ref 'no-such-branch' from {repo_url}"
    )
def test_create_or_update_jobs(tmp_work_dir):
    repo_url = str(Path(__file__).parent.resolve() / "fixtures/git-repo")
    job_request = JobRequest(
        id="123",
        repo_url=repo_url,
        commit=None,
        branch="v1",
        requested_actions=["generate_cohort"],
        workspace="1",
        database_name="dummy",
        original={},
    )
    create_or_update_jobs(job_request)
    jobs = find_where(Job)
    assert len(jobs) == 1
    j = jobs[0]
    assert j.job_request_id == "123"
    assert j.state == State.PENDING
    assert j.repo_url == repo_url
    assert j.commit == "d1e88b31cbe8f67c58f938adb5ee500d54a69764"
    assert j.workspace == "1"
    assert j.action == "generate_cohort"
    assert j.wait_for_job_ids == []
    assert j.requires_outputs_from == []
    assert j.run_command == (
        "cohortextractor:latest generate_cohort --expectations-population=1000"
        " --output-dir=."
    )
    assert j.output_spec == {"highly_sensitive": {"cohort": "input.csv"}}
    assert j.status_message == None
    # Check no new jobs created from same JobRequest
    create_or_update_jobs(job_request)
    new_jobs = find_where(Job)
    assert jobs == new_jobs
Esempio n. 4
0
def test_job_request_from_remote_format():
    remote_job_request = {
        "identifier": "123",
        "workspace": {
            "name": "testing",
            "repo": "https://github.com/opensafely/foo",
            "branch": "master",
            "db": "full",
        },
        "requested_actions": ["generate_cohort"],
        "force_run_dependencies": True,
    }
    expected = JobRequest(
        id="123",
        repo_url="https://github.com/opensafely/foo",
        commit=None,
        branch="master",
        workspace="testing",
        database_name="full",
        requested_actions=["generate_cohort"],
        force_run_dependencies=True,
        original=remote_job_request,
    )
    job_request = job_request_from_remote_format(remote_job_request)
    assert job_request == expected
Esempio n. 5
0
def job_request_factory(**kwargs):
    if "id" not in kwargs:
        kwargs["id"] = base64.b32encode(secrets.token_bytes(10)).decode("ascii").lower()

    values = deepcopy(JOB_REQUEST_DEFAULTS)
    values.update(kwargs)
    job_request = JobRequest(**values)
    insert(SavedJobRequest(id=job_request.id, original=job_request.original))
    return job_request
Esempio n. 6
0
def create_job_request_and_jobs(project_dir, actions, force_run_dependencies):
    job_request = JobRequest(
        id=random_id(),
        repo_url=str(project_dir),
        commit=None,
        requested_actions=actions,
        cancelled_actions=[],
        workspace=project_dir.name,
        database_name="dummy",
        force_run_dependencies=force_run_dependencies,
        # The default behaviour of refusing to run if a dependency has failed
        # makes for an awkward workflow when iterating in development
        force_run_failed=True,
        branch="",
        original={"created_by": getuser()},
    )

    project_file_path = project_dir / "project.yaml"
    if not project_file_path.exists():
        raise ProjectValidationError(
            f"No project.yaml file found in {project_dir}")
    # NOTE: Similar but non-identical logic is implemented for running jobs in
    # production in `jobrunner.create_or_update_jobs.create_jobs`. If you make
    # changes below then consider what, if any, the appropriate corresponding
    # changes might be for production jobs.
    pipeline_config = load_pipeline(project_file_path)
    latest_jobs = calculate_workspace_state(job_request.workspace)

    # On the server out-of-band deletion of an existing output is considered an error, so we ignore that case when
    # scheduling and allow jobs with missing dependencies to fail loudly when they are actually run. However for local
    # running we should allow researchers to delete outputs on disk and automatically rerun the actions that create
    # if they are needed. So here we check whether any files are missing for completed actions and, if so, treat them
    # as though they had not been run -- this will automatically trigger a rerun.
    latest_jobs_with_files_present = [
        job for job in latest_jobs
        if all_output_files_present(project_dir, job)
    ]

    try:
        if not actions:
            raise UnknownActionError("At least one action must be supplied")
        new_jobs = get_new_jobs_to_run(job_request, pipeline_config,
                                       latest_jobs_with_files_present)
    except UnknownActionError as e:
        # Annotate the exception with a list of valid action names so we can
        # show them to the user
        e.valid_actions = [RUN_ALL_COMMAND] + pipeline_config.all_actions
        raise e
    assert_new_jobs_created(new_jobs, latest_jobs_with_files_present)
    resolve_reusable_action_references(new_jobs)
    insert_into_database(job_request, new_jobs)
    return job_request, new_jobs
def make_job_request(action="generate_cohort", **kwargs):
    job_request = JobRequest(
        id=str(uuid.uuid4()),
        repo_url="https://example.com/repo.git",
        commit="abcdef0123456789",
        workspace="1",
        database_name="full",
        requested_actions=[action],
        original={},
    )
    for key, value in kwargs.items():
        setattr(job_request, key, value)
    return job_request
Esempio n. 8
0
def job_request_from_remote_format(job_request):
    """
    Convert a JobRequest as received from the job-server into our own internal
    representation
    """
    return JobRequest(
        id=str(job_request["identifier"]),
        repo_url=job_request["workspace"]["repo"],
        commit=job_request["sha"],
        branch=job_request["workspace"]["branch"],
        requested_actions=job_request["requested_actions"],
        cancelled_actions=job_request["cancelled_actions"],
        workspace=job_request["workspace"]["name"],
        database_name=job_request["workspace"]["db"],
        force_run_dependencies=job_request["force_run_dependencies"],
        original=job_request,
    )
Esempio n. 9
0
def make_job_request(action=None, actions=None, **kwargs):
    assert not (actions and action)
    if not actions:
        if action:
            actions = [action]
        else:
            actions = ["generate_cohort"]
    job_request = JobRequest(
        id=str(uuid.uuid4()),
        repo_url="https://example.com/repo.git",
        commit="abcdef0123456789",
        workspace="1",
        database_name="full",
        requested_actions=actions,
        cancelled_actions=[],
        original={},
    )
    for key, value in kwargs.items():
        setattr(job_request, key, value)
    return job_request
Esempio n. 10
0
def test_validate_job_request(params, exc_msg, monkeypatch):
    monkeypatch.setattr("jobrunner.config.USING_DUMMY_DATA_BACKEND", False)
    repo_url = str(Path(__file__).parent.resolve() / "fixtures/git-repo")
    kwargs = dict(
        id="123",
        repo_url=repo_url,
        # GIT_DIR=tests/fixtures/git-repo git rev-parse v1
        commit="d1e88b31cbe8f67c58f938adb5ee500d54a69764",
        branch="v1",
        requested_actions=["generate_cohort"],
        cancelled_actions=[],
        workspace="1",
        database_name="full",  # note db from from job-server is 'full'
        original={},
    )
    kwargs.update(params)
    job_request = JobRequest(**kwargs)

    with pytest.raises(JobRequestError, match=exc_msg):
        validate_job_request(job_request)
Esempio n. 11
0
import time

from jobrunner.models import Job, JobRequest
from jobrunner import log_utils, local_run


FROZEN_TIMESTAMP = 1608568119.1467905
FROZEN_TIMESTRING = datetime.utcfromtimestamp(FROZEN_TIMESTAMP).isoformat()

repo_url = "https://github.com/opensafely/project"
test_job = Job(id="id", action="action", repo_url=repo_url)
test_request = JobRequest(
    id="request",
    repo_url=repo_url,
    workspace="workspace",
    commit="commit",
    requested_actions=["action"],
    cancelled_actions=[],
    database_name="dummy",
)


def test_formatting_filter():
    record = logging.makeLogRecord({})
    assert log_utils.formatting_filter(record)
    assert record.action == ""

    record = logging.makeLogRecord({"job": test_job})
    assert log_utils.formatting_filter(record)
    assert record.action == "action: "
    assert record.tags == "project=project action=action id=id"