def test_gets_one_job(db): job_factory(workspace="the-workspace", action="the-action", state=State.SUCCEEDED) job = only(calculate_workspace_state("the-workspace")) assert job.action == "the-action" assert job.state == State.SUCCEEDED
def test_gets_a_job_for_each_action(db): job_factory(workspace="the-workspace", action="action1") job_factory(workspace="the-workspace", action="action2") jobs = calculate_workspace_state("the-workspace") assert len(jobs) == 2 for action in ["action1", "action2"]: assert action in [job.action for job in jobs]
def list_outputs_from_action(workspace, action): for job in calculate_workspace_state(workspace): if job.action == action: return job.output_files # The action has never been run before return []
def create_job_request_and_jobs(project_dir, actions, force_run_dependencies): job_request = JobRequest( id=random_id(), repo_url=str(project_dir), commit=None, requested_actions=actions, cancelled_actions=[], workspace=project_dir.name, database_name="dummy", force_run_dependencies=force_run_dependencies, # The default behaviour of refusing to run if a dependency has failed # makes for an awkward workflow when iterating in development force_run_failed=True, branch="", original={"created_by": getuser()}, ) project_file_path = project_dir / "project.yaml" if not project_file_path.exists(): raise ProjectValidationError( f"No project.yaml file found in {project_dir}") # NOTE: Similar but non-identical logic is implemented for running jobs in # production in `jobrunner.create_or_update_jobs.create_jobs`. If you make # changes below then consider what, if any, the appropriate corresponding # changes might be for production jobs. pipeline_config = load_pipeline(project_file_path) latest_jobs = calculate_workspace_state(job_request.workspace) # On the server out-of-band deletion of an existing output is considered an error, so we ignore that case when # scheduling and allow jobs with missing dependencies to fail loudly when they are actually run. However for local # running we should allow researchers to delete outputs on disk and automatically rerun the actions that create # if they are needed. So here we check whether any files are missing for completed actions and, if so, treat them # as though they had not been run -- this will automatically trigger a rerun. latest_jobs_with_files_present = [ job for job in latest_jobs if all_output_files_present(project_dir, job) ] try: if not actions: raise UnknownActionError("At least one action must be supplied") new_jobs = get_new_jobs_to_run(job_request, pipeline_config, latest_jobs_with_files_present) except UnknownActionError as e: # Annotate the exception with a list of valid action names so we can # show them to the user e.valid_actions = [RUN_ALL_COMMAND] + pipeline_config.all_actions raise e assert_new_jobs_created(new_jobs, latest_jobs_with_files_present) resolve_reusable_action_references(new_jobs) insert_into_database(job_request, new_jobs) return job_request, new_jobs
def test_gets_the_latest_job_for_an_action(db): job_factory( workspace="the-workspace", action="the-action", created_at=1000, state=State.FAILED, ) job_factory( workspace="the-workspace", action="the-action", created_at=2000, state=State.SUCCEEDED, ) job = only(calculate_workspace_state("the-workspace")) assert job.state == State.SUCCEEDED
def test_ignores_cancelled_jobs(db): job_factory( workspace="the-workspace", action="the-action", created_at=1000, state=State.FAILED, ) job_factory( workspace="the-workspace", action="the-action", created_at=2000, state=State.SUCCEEDED, cancelled=True, ) job = only(calculate_workspace_state("the-workspace")) assert job.state == State.FAILED
def get_latest_jobs_for_actions_in_project(workspace, pipeline_config): return [ job for job in calculate_workspace_state(workspace) if job.action in pipeline_config.all_actions ]
def test_doesnt_include_dummy_error_jobs(db): job_factory(workspace="the-workspace", action="__error__") jobs = calculate_workspace_state("the-workspace") assert not jobs