def test_delete_non_retained_job_pipeline_runs_on_job_run_retain_all( test_app, client, celery, pipeline, abortable_async_res, monkeypatch, mocker): # Multiple parameters so that the job consists of multiple pipeline # runs. job_spec = create_job_spec(pipeline.project.uuid, pipeline.uuid, parameters=[{}, {}, {}]) job_uuid = client.post("/api/jobs/", json=job_spec).get_json()["uuid"] client.put( f"/api/jobs/{job_uuid}", json={ "confirm_draft": True, "cron_schedule": "* * * * *" }, ) # Trigger a job run. with test_app.app_context(): with TwoPhaseExecutor(db.session) as tpe: namespace_jobs.RunJob(tpe).transaction(job_uuid) # Set as done the pipeline runs of this job run. pipeline_runs = client.get( f"/api/jobs/{job_uuid}/pipeline_runs").get_json()["pipeline_runs"] for run in pipeline_runs: client.put( f'/api/jobs/{job_uuid}/{run["uuid"]}', json={ "status": "SUCCESS", "finished_time": datetime.datetime.now().isoformat(), }, ) pipeline_runs = client.get( f"/api/jobs/{job_uuid}/pipeline_runs").get_json()["pipeline_runs"] for run in pipeline_runs: assert run["status"] == "SUCCESS" # Trigger another job run. with test_app.app_context(): with TwoPhaseExecutor(db.session) as tpe: namespace_jobs.RunJob(tpe).transaction(job_uuid) # The previously existing pipeline runs should still be there. pipeline_runs = client.get( f"/api/jobs/{job_uuid}/pipeline_runs").get_json()["pipeline_runs"] assert len(pipeline_runs) == 6 assert all([ task[1]["name"] != "app.core.tasks.delete_job_pipeline_run_directories" for task in celery.tasks ])
def discoverFSDeletedProjects(): """Cleanup projects that were deleted from the filesystem.""" project_paths = [ entry.name for entry in os.scandir(app.config["PROJECTS_DIR"]) if entry.is_dir() ] fs_removed_projects = Project.query.filter( Project.path.notin_(project_paths), # This way we do not delete a project that is already being # deleted twice, and avoid considering a project that is # being initialized as deleted from the filesystem. Project.status.in_(["READY"]), ).all() # Use a TwoPhaseExecutor for each project so that issues in one # project do not hinder the deletion of others. for proj_uuid in [project.uuid for project in fs_removed_projects]: try: with TwoPhaseExecutor(db.session) as tpe: DeleteProject(tpe).transaction(proj_uuid) except Exception as e: current_app.logger.error( ("Error during project deletion (discovery) of " f"{proj_uuid}: {e}."))
def discoverFSCreatedProjects(): """Detect projects that were added through the file system.""" # Detect new projects by detecting directories that were not # registered in the db as projects. existing_project_paths = [project.path for project in Project.query.all()] project_paths = [ entry.name for entry in os.scandir(app.config["PROJECTS_DIR"]) if entry.is_dir() ] new_project_paths = set(project_paths) - set(existing_project_paths) # Use a TwoPhaseExecutor for each project so that issues in one # project do not hinder the discovery of others. for new_project_path in new_project_paths: try: with TwoPhaseExecutor(db.session) as tpe: CreateProject(tpe).transaction(new_project_path) except Exception as e: current_app.logger.error( ( "Error during project initialization (discovery) of " f"{new_project_path}: {e}." ) )
def pipelines_get(project_uuid): try: with TwoPhaseExecutor(db.session) as tpe: SyncProjectPipelinesDBState(tpe).transaction(project_uuid) except Exception as e: msg = ( "Error during project pipelines synchronization of " f"{project_uuid}: {str(e)}." ) return jsonify({"message": msg}), 500 pipelines = Pipeline.query.filter(Pipeline.project_uuid == project_uuid).all() pipelines_augmented = [] for pipeline in pipelines: pipeline_augmented = { "uuid": pipeline.uuid, "path": pipeline.path, } pipeline_json = get_pipeline_json(pipeline.uuid, pipeline.project_uuid) if pipeline_json is not None: pipeline_augmented["name"] = pipeline_json["name"] else: pipeline_augmented["name"] = "Warning: pipeline file was not found." pipelines_augmented.append(pipeline_augmented) json_string = json.dumps({"success": True, "result": pipelines_augmented}) return json_string, 200, {"content-type": "application/json"}
def projects_get(): discoverFSDeletedProjects() discoverFSCreatedProjects() # Projects that are in a INITIALIZING or DELETING state won't # be shown until ready. projects = projects_schema.dump( Project.query.filter_by(status="READY").all()) for project in projects: # Discover both pipelines of newly initialized projects and # manually initialized pipelines of existing projects. Use a # a TwoPhaseExecutor for each project so that issues in one # project do not hinder the pipeline synchronization of # others. try: with TwoPhaseExecutor(db.session) as tpe: SyncProjectPipelinesDBState(tpe).transaction( project["uuid"]) except Exception as e: current_app.logger.error( ("Error during project pipelines synchronization of " f'{project["path"]}: {e}.')) counts = project_entity_counts(project["uuid"]) project.update(counts) return jsonify(projects)
def post(self): """Launches an interactive session.""" post_data = request.get_json() try: with TwoPhaseExecutor(db.session) as tpe: CreateInteractiveSession(tpe).transaction( post_data["project_uuid"], post_data["pipeline_uuid"], post_data["pipeline_path"], post_data["project_dir"], post_data["host_userdir"], ) except JupyterBuildInProgressException: return {"message": "JupyterBuildInProgress"}, 423 except Exception as e: current_app.logger.error(e) return {"message": str(e)}, 500 isess = models.InteractiveSession.query.filter_by( project_uuid=post_data["project_uuid"], pipeline_uuid=post_data["pipeline_uuid"], ).one_or_none() return marshal(isess.as_dict(), schema.session), 201
def pipelines_delete(project_uuid, pipeline_uuid): try: with TwoPhaseExecutor(db.session) as tpe: DeletePipeline(tpe).transaction(project_uuid, pipeline_uuid) except Exception as e: return {"message": str(e)}, 500 return jsonify({"success": True})
def delete(self, job_uuid): try: with TwoPhaseExecutor(db.session) as tpe: DeleteJob(tpe).transaction(job_uuid) except Exception as e: msg = f"Error during job deletion:{e}" return {"message": msg}, 500 return jsonify({"message": "Job termination was successful."})
def post(self): try: with TwoPhaseExecutor(db.session) as tpe: task = ImportGitProject(tpe).transaction( request.json["url"], request.json.get("project_name")) except Exception as e: return jsonify({"message": str(e)}), 500 return background_task_schema.dump(task)
def test_delete_non_retained_job_pipeline_runs_on_job_run_update_retain_n( max_retained_pipeline_runs, test_app, client, celery, pipeline, abortable_async_res, monkeypatch, mocker, ): # Multiple parameters so that the job consists of multiple pipeline # runs. job_spec = create_job_spec( pipeline.project.uuid, pipeline.uuid, parameters=[{}, {}, {}], max_retained_pipeline_runs=max_retained_pipeline_runs, ) job_uuid = client.post("/api/jobs/", json=job_spec).get_json()["uuid"] client.put( f"/api/jobs/{job_uuid}", json={ "confirm_draft": True, "cron_schedule": "* * * * *" }, ) # Trigger a job run. with test_app.app_context(): with TwoPhaseExecutor(db.session) as tpe: namespace_jobs.RunJob(tpe).transaction(job_uuid) # Set as done the pipeline runs of this job run. pipeline_runs = client.get( f"/api/jobs/{job_uuid}/pipeline_runs").get_json()["pipeline_runs"] for run in pipeline_runs: client.put( f'/api/jobs/{job_uuid}/{run["uuid"]}', json={ "status": "SUCCESS", "finished_time": datetime.datetime.now().isoformat(), }, ) expected_deleted_runs_n = max(0, 3 - max_retained_pipeline_runs) pipeline_runs.sort(key=lambda x: x["pipeline_run_index"]) expected_deleted_run_uuids = set( [run["uuid"] for run in pipeline_runs[:expected_deleted_runs_n]]) deleted_run_uuids = set([ uuid for task in celery.tasks if task[1]["name"] == "app.core.tasks.delete_job_pipeline_run_directories" for uuid in task[1]["kwargs"]["pipeline_run_uuids"] ]) assert expected_deleted_run_uuids == deleted_run_uuids
def _handle_recurring_scheduler_job(job_type: str, interval: int, handle_func: Callable, app: Flask) -> None: try: with app.app_context(): with TwoPhaseExecutor(db.session) as tpe: _HandleRecurringSchedulerJob(tpe).transaction( job_type, interval, handle_func, app) except sqlalchemy.exc.IntegrityError: logger.debug(f"SchedulerJob with type {job_type} already exists.") except Exception: logger.error(f"Failed to run job with type: {job_type}.")
def projects_delete(): try: with TwoPhaseExecutor(db.session) as tpe: DeleteProject(tpe).transaction(request.json["project_uuid"]) except Exception as e: return ( jsonify({"message": f"Failed to delete the project. Error: {e}"}), 500, ) return jsonify({"message": "Project deleted."})
def delete(self, project_uuid, environment_uuid): """Removes an environment image given project and env uuids. Will stop any run or job making use of this environment. """ try: with TwoPhaseExecutor(db.session) as tpe: DeleteImage(tpe).transaction(project_uuid, environment_uuid) except Exception as e: return {"message": str(e)}, 500 return {"message": "Environment image was successfully deleted."}, 200
def delete(self, run_uuid): """Stops a pipeline run given its UUID.""" try: with TwoPhaseExecutor(db.session) as tpe: could_abort = AbortPipelineRun(tpe).transaction(run_uuid) except Exception as e: return {"message": str(e)}, 500 if could_abort: return {"message": "Run termination was successful."}, 200 else: return {"message": "Run does not exist or is not running."}, 400
def pipelines_create(project_uuid): pipeline_path = request.json["pipeline_path"] pipeline_name = request.json["name"] try: with TwoPhaseExecutor(db.session) as tpe: CreatePipeline(tpe).transaction(project_uuid, pipeline_name, pipeline_path) except Exception as e: return jsonify({"message": str(e)}), 409 return jsonify({"success": True})
def delete(self, project_uuid, pipeline_uuid): """Delete a pipeline. Any session, run, job related to the pipeline is stopped and removed from the db. """ try: with TwoPhaseExecutor(db.session) as tpe: DeletePipeline(tpe).transaction(project_uuid, pipeline_uuid) except Exception as e: return {"message": str(e)}, 500 return {"message": "Pipeline deletion was successful."}, 200
def put(self, project_uuid, pipeline_uuid): """Restarts the memory-server of the session.""" try: with TwoPhaseExecutor(db.session) as tpe: could_restart = RestartMemoryServer(tpe).transaction( project_uuid, pipeline_uuid) except Exception as e: return {"message": str(e)}, 500 if not could_restart: return {"message": "SessionNotRunning"}, 500 return {"message": "Session restart was successful."}, 200
def delete(self, project_uuid): """Delete a project. Any session, run, job related to the project is stopped and removed from the db. Environment images are removed. """ try: with TwoPhaseExecutor(db.session) as tpe: DeleteProject(tpe).transaction(project_uuid) except Exception as e: return {"message": str(e)}, 500 return {"message": "Project deletion was successful."}, 200
def delete(self, project_uuid, pipeline_uuid): """Shutdowns session.""" try: with TwoPhaseExecutor(db.session) as tpe: could_shutdown = StopInteractiveSession(tpe).transaction( project_uuid, pipeline_uuid) except Exception as e: return {"message": str(e)}, 500 if could_shutdown: return {"message": "Session shutdown was successful."}, 200 else: return {"message": "Session not found."}, 400
def delete(self, project_uuid, environment_uuid): """Delete an environment. Any session, run, job or environment build related to the environment will be aborted, environment images will be removed. """ try: with TwoPhaseExecutor(db.session) as tpe: DeleteEnvironment(tpe).transaction(project_uuid, environment_uuid) except Exception as e: return {"message": str(e)}, 500 return {"message": "Environment deletion was successful."}, 200
def post(self): """Starts a new (interactive) pipeline run.""" post_data = request.get_json() post_data["run_config"]["run_endpoint"] = "runs" try: with TwoPhaseExecutor(db.session) as tpe: run = CreateInteractiveRun(tpe).transaction( post_data["project_uuid"], post_data["run_config"], pipeline=construct_pipeline(**post_data), ) except Exception as e: return {"message": str(e)}, 500 return marshal(run, schema.interactive_run), 201
def post(self): """Queues a list of environment builds. Only unique requests are considered, meaning that a request containing duplicate environment_image_build_requests will produce an environment build only for each unique environment_image_build_request. Note that requesting an environment_image_build for an environment (identified by project_uuid, environment_uuid, project_path) will REVOKE/ABORT any other active (queued or actually started) environment build for that environment. This implies that only an environment build can be active (queued or actually started) for a given environment. """ # Keep only unique requests. post_data = request.get_json() builds_requests = post_data["environment_image_build_requests"] builds_requests = set([(req["project_uuid"], req["environment_uuid"], req["project_path"]) for req in builds_requests]) builds_requests = [{ "project_uuid": req[0], "environment_uuid": req[1], "project_path": req[2], } for req in builds_requests] defined_builds = [] failed_requests = [] # Start a celery task for each unique environment build request. for build_request in builds_requests: try: with TwoPhaseExecutor(db.session) as tpe: defined_builds.append( CreateEnvironmentImageBuild(tpe).transaction( build_request)) except Exception: failed_requests.append(build_request) return_data = {"environment_image_builds": defined_builds} return_code = 200 if failed_requests: return_data["failed_requests"] = failed_requests return_code = 500 return return_data, return_code
def filemanager_import_project_from_data(): """Import a project from the data directory. A temporary workaround to import uploaded projects correctly. """ name = request.args.get("name") if name is None or os.path.sep in name: return jsonify({"message": f"Invalid name: {name}"}), 400 from_path = safe_join("/userdir/data", name) to_path = safe_join("/userdir/projects", name) os.rename(from_path, to_path) # Pick up the project from the fs. with TwoPhaseExecutor(db.session) as tpe: project_uuid = CreateProject(tpe).transaction(name) return {"project_uuid": project_uuid}, 201
def delete(self, job_uuid): """Delete a job. The job is stopped if its running, related entities are then removed from the db. """ try: with TwoPhaseExecutor(db.session) as tpe: could_delete = DeleteJob(tpe).transaction(job_uuid) except Exception as e: return {"message": str(e)}, 500 if could_delete: return {"message": "Job deletion was successful."}, 200 else: return {"message": "Job does not exist."}, 404
def projects_post(): try: with TwoPhaseExecutor(db.session) as tpe: CreateProject(tpe).transaction(request.json["name"]) except Exception as e: # The sql integrity error message can be quite ugly. if isinstance(e, sqlalchemy.exc.IntegrityError): msg = f'Project "{request.json["name"]}" already exists.' else: msg = str(e) return ( jsonify({"message": msg}), 500, ) return jsonify({"message": "Project created."})
def post(self): """Queues a Jupyter build.""" try: with TwoPhaseExecutor(db.session) as tpe: jupyter_build = CreateJupyterBuild(tpe).transaction() except SessionInProgressException: return {"message": "SessionInProgressException"}, 500 except Exception: jupyter_build = None if jupyter_build is not None: return_data = {"jupyter_build": jupyter_build} return_code = 200 else: return_data = {} return_code = 500 return marshal(return_data, schema.jupyter_build_request_result), return_code
def delete(self, job_uuid): """Stops a job given its UUID. However, it will not delete any corresponding database entries, it will update the status of corresponding objects to "ABORTED". """ try: with TwoPhaseExecutor(db.session) as tpe: could_abort = AbortJob(tpe).transaction(job_uuid) except Exception as e: return {"message": str(e)}, 500 if could_abort: return {"message": "Job termination was successful."}, 200 else: return { "message": "Job does not exist or is already completed." }, 404
def pipelines_create(project_uuid): pipeline_path = request.json["pipeline_path"] pipeline_name = request.json["name"] try: with TwoPhaseExecutor(db.session) as tpe: CreatePipeline(tpe).transaction( project_uuid, pipeline_name, pipeline_path ) except FileExistsError: return ( jsonify({"message": "A pipeline with the given path already exists."}), 400, ) except Exception as e: return jsonify({"message": str(e)}), 409 return jsonify({"success": True})
def projects_get(): discoverFSDeletedProjects() discoverFSCreatedProjects() # Projects that are in a INITIALIZING or DELETING state won't # be shown until ready. projects = projects_schema.dump( Project.query.filter_by(status="READY").all()) for project in projects: # Discover both pipelines of newly initialized projects and # manually initialized pipelines of existing projects. Use a # a TwoPhaseExecutor for each project so that issues in one # project do not hinder the pipeline synchronization of # others. try: with TwoPhaseExecutor(db.session) as tpe: SyncProjectPipelinesDBState(tpe).transaction( project["uuid"]) except Exception as e: current_app.logger.error( ("Error during project pipelines synchronization of " f'{project["path"]}: {e}.')) project["pipeline_count"] = Pipeline.query.filter( Pipeline.project_uuid == project["uuid"]).count() project["environment_count"] = len( get_environments(project["uuid"])) resp = requests.get( f'http://{current_app.config["ORCHEST_API_ADDRESS"]}/api/jobs/', params={"project_uuid": project["uuid"]}, ) data = resp.json() if resp.status_code != 200: job_count = 0 else: job_count = len(data.get("jobs", [])) project["job_count"] = job_count return jsonify(projects)
def trigger_conditional_jupyter_image_build(app): # Use early return to satisfy all conditions for # triggering a build. # check if Jupyter setup_script is non-empty jupyter_setup_script = os.path.join("/userdir", _config.JUPYTER_SETUP_SCRIPT) if os.path.isfile(jupyter_setup_script): with open(jupyter_setup_script, "r") as file: if len(file.read()) == 0: app.logger.info( "Empty setup script, no need to trigger a jupyter build." ) return else: app.logger.info("No setup script, no need to trigger a jupyter build.") return if utils.get_active_custom_jupyter_images(): app.logger.info( "There are active custom jupyter images, no need to trigger a build." ) return if db.session.query( db.session.query(JupyterImageBuild) .filter(JupyterImageBuild.status.in_(["PENDING", "STARTED"])) .exists() ).scalar(): app.logger.info( "Ongoing custom jupyter image build, no need to trigger a build." ) return # Note: this is not race condition free in case of concurrent APIs # restarting. try: app.logger.info("Triggering custom jupyter build.") with TwoPhaseExecutor(db.session) as tpe: CreateJupyterEnvironmentBuild(tpe).transaction() except Exception: app.logger.error("Failed to build Jupyter image")