def test_create_or_update_jobs_with_git_error(tmp_work_dir): repo_url = str(Path(__file__).parent.resolve() / "fixtures/git-repo") bad_commit = "0" * 40 job_request = JobRequest( id="123", repo_url=repo_url, commit=bad_commit, branch="v1", requested_actions=["generate_cohort"], cancelled_actions=[], workspace="1", database_name="dummy", original={}, ) create_or_update_jobs(job_request) j = find_one(Job) assert j.job_request_id == "123" assert j.state == State.FAILED assert j.repo_url == repo_url assert j.commit == bad_commit assert j.workspace == "1" assert j.wait_for_job_ids is None assert j.requires_outputs_from is None assert j.run_command is None assert j.output_spec is None assert (j.status_message == f"GitError: Error fetching commit {bad_commit} from {repo_url}")
def test_create_or_update_jobs_with_git_error(tmp_work_dir): repo_url = str(Path(__file__).parent.resolve() / "fixtures/git-repo") job_request = JobRequest( id="123", repo_url=repo_url, commit=None, branch="no-such-branch", requested_actions=["generate_cohort"], workspace="1", database_name="dummy", original={}, ) create_or_update_jobs(job_request) jobs = find_where(Job) assert len(jobs) == 1 j = jobs[0] assert j.job_request_id == "123" assert j.state == State.FAILED assert j.repo_url == repo_url assert j.commit == None assert j.workspace == "1" assert j.wait_for_job_ids == None assert j.requires_outputs_from == None assert j.run_command == None assert j.output_spec == None assert ( j.status_message == f"GitError: Error resolving ref 'no-such-branch' from {repo_url}" )
def test_create_or_update_jobs(tmp_work_dir): repo_url = str(Path(__file__).parent.resolve() / "fixtures/git-repo") job_request = JobRequest( id="123", repo_url=repo_url, commit=None, branch="v1", requested_actions=["generate_cohort"], workspace="1", database_name="dummy", original={}, ) create_or_update_jobs(job_request) jobs = find_where(Job) assert len(jobs) == 1 j = jobs[0] assert j.job_request_id == "123" assert j.state == State.PENDING assert j.repo_url == repo_url assert j.commit == "d1e88b31cbe8f67c58f938adb5ee500d54a69764" assert j.workspace == "1" assert j.action == "generate_cohort" assert j.wait_for_job_ids == [] assert j.requires_outputs_from == [] assert j.run_command == ( "cohortextractor:latest generate_cohort --expectations-population=1000" " --output-dir=." ) assert j.output_spec == {"highly_sensitive": {"cohort": "input.csv"}} assert j.status_message == None # Check no new jobs created from same JobRequest create_or_update_jobs(job_request) new_jobs = find_where(Job) assert jobs == new_jobs
def test_job_request_from_remote_format(): remote_job_request = { "identifier": "123", "workspace": { "name": "testing", "repo": "https://github.com/opensafely/foo", "branch": "master", "db": "full", }, "requested_actions": ["generate_cohort"], "force_run_dependencies": True, } expected = JobRequest( id="123", repo_url="https://github.com/opensafely/foo", commit=None, branch="master", workspace="testing", database_name="full", requested_actions=["generate_cohort"], force_run_dependencies=True, original=remote_job_request, ) job_request = job_request_from_remote_format(remote_job_request) assert job_request == expected
def job_request_factory(**kwargs): if "id" not in kwargs: kwargs["id"] = base64.b32encode(secrets.token_bytes(10)).decode("ascii").lower() values = deepcopy(JOB_REQUEST_DEFAULTS) values.update(kwargs) job_request = JobRequest(**values) insert(SavedJobRequest(id=job_request.id, original=job_request.original)) return job_request
def create_job_request_and_jobs(project_dir, actions, force_run_dependencies): job_request = JobRequest( id=random_id(), repo_url=str(project_dir), commit=None, requested_actions=actions, cancelled_actions=[], workspace=project_dir.name, database_name="dummy", force_run_dependencies=force_run_dependencies, # The default behaviour of refusing to run if a dependency has failed # makes for an awkward workflow when iterating in development force_run_failed=True, branch="", original={"created_by": getuser()}, ) project_file_path = project_dir / "project.yaml" if not project_file_path.exists(): raise ProjectValidationError( f"No project.yaml file found in {project_dir}") # NOTE: Similar but non-identical logic is implemented for running jobs in # production in `jobrunner.create_or_update_jobs.create_jobs`. If you make # changes below then consider what, if any, the appropriate corresponding # changes might be for production jobs. pipeline_config = load_pipeline(project_file_path) latest_jobs = calculate_workspace_state(job_request.workspace) # On the server out-of-band deletion of an existing output is considered an error, so we ignore that case when # scheduling and allow jobs with missing dependencies to fail loudly when they are actually run. However for local # running we should allow researchers to delete outputs on disk and automatically rerun the actions that create # if they are needed. So here we check whether any files are missing for completed actions and, if so, treat them # as though they had not been run -- this will automatically trigger a rerun. latest_jobs_with_files_present = [ job for job in latest_jobs if all_output_files_present(project_dir, job) ] try: if not actions: raise UnknownActionError("At least one action must be supplied") new_jobs = get_new_jobs_to_run(job_request, pipeline_config, latest_jobs_with_files_present) except UnknownActionError as e: # Annotate the exception with a list of valid action names so we can # show them to the user e.valid_actions = [RUN_ALL_COMMAND] + pipeline_config.all_actions raise e assert_new_jobs_created(new_jobs, latest_jobs_with_files_present) resolve_reusable_action_references(new_jobs) insert_into_database(job_request, new_jobs) return job_request, new_jobs
def make_job_request(action="generate_cohort", **kwargs): job_request = JobRequest( id=str(uuid.uuid4()), repo_url="https://example.com/repo.git", commit="abcdef0123456789", workspace="1", database_name="full", requested_actions=[action], original={}, ) for key, value in kwargs.items(): setattr(job_request, key, value) return job_request
def job_request_from_remote_format(job_request): """ Convert a JobRequest as received from the job-server into our own internal representation """ return JobRequest( id=str(job_request["identifier"]), repo_url=job_request["workspace"]["repo"], commit=job_request["sha"], branch=job_request["workspace"]["branch"], requested_actions=job_request["requested_actions"], cancelled_actions=job_request["cancelled_actions"], workspace=job_request["workspace"]["name"], database_name=job_request["workspace"]["db"], force_run_dependencies=job_request["force_run_dependencies"], original=job_request, )
def make_job_request(action=None, actions=None, **kwargs): assert not (actions and action) if not actions: if action: actions = [action] else: actions = ["generate_cohort"] job_request = JobRequest( id=str(uuid.uuid4()), repo_url="https://example.com/repo.git", commit="abcdef0123456789", workspace="1", database_name="full", requested_actions=actions, cancelled_actions=[], original={}, ) for key, value in kwargs.items(): setattr(job_request, key, value) return job_request
def test_validate_job_request(params, exc_msg, monkeypatch): monkeypatch.setattr("jobrunner.config.USING_DUMMY_DATA_BACKEND", False) repo_url = str(Path(__file__).parent.resolve() / "fixtures/git-repo") kwargs = dict( id="123", repo_url=repo_url, # GIT_DIR=tests/fixtures/git-repo git rev-parse v1 commit="d1e88b31cbe8f67c58f938adb5ee500d54a69764", branch="v1", requested_actions=["generate_cohort"], cancelled_actions=[], workspace="1", database_name="full", # note db from from job-server is 'full' original={}, ) kwargs.update(params) job_request = JobRequest(**kwargs) with pytest.raises(JobRequestError, match=exc_msg): validate_job_request(job_request)
import time from jobrunner.models import Job, JobRequest from jobrunner import log_utils, local_run FROZEN_TIMESTAMP = 1608568119.1467905 FROZEN_TIMESTRING = datetime.utcfromtimestamp(FROZEN_TIMESTAMP).isoformat() repo_url = "https://github.com/opensafely/project" test_job = Job(id="id", action="action", repo_url=repo_url) test_request = JobRequest( id="request", repo_url=repo_url, workspace="workspace", commit="commit", requested_actions=["action"], cancelled_actions=[], database_name="dummy", ) def test_formatting_filter(): record = logging.makeLogRecord({}) assert log_utils.formatting_filter(record) assert record.action == "" record = logging.makeLogRecord({"job": test_job}) assert log_utils.formatting_filter(record) assert record.action == "action: " assert record.tags == "project=project action=action id=id"