def make_job_dir_tree(job_name): job_dir = get_path("job_dir", job_name) if not os.path.exists(job_dir): os.mkdir(job_dir) runs_yaml_dir = get_path("runs_yaml_dir", job_name) if not os.path.exists(runs_yaml_dir): os.mkdir(runs_yaml_dir) runs_out_dir = get_path("runs_out_dir", job_name) if not os.path.exists(runs_out_dir): os.mkdir(runs_out_dir) runs_log_dir = get_path("runs_log_dir", job_name) if not os.path.exists(runs_log_dir): os.mkdir(runs_log_dir) job_wf_dir = get_path("job_wf_dir", job_name) if not os.path.exists(job_wf_dir): os.mkdir(job_wf_dir)
def terminate_runs( job_name, run_names, mode="terminate" # can be one of terminate, reset, or delete ): could_not_be_terminated = [] could_not_be_cleaned = [] succeeded = [] runs_info = get_runs_info(job_name, run_names, return_pid=True) db_changed = False for run_name in runs_info.keys(): try: if isinstance(runs_info[run_name]["time_started"], datetime) and \ not isinstance(runs_info[run_name]["time_finished"], datetime): if runs_info[run_name]["pid"] != -1: is_killed = kill_proc_tree(runs_info[run_name]["pid"]) if not is_killed: could_not_be_terminated.append(run_name) continue cleanup_zombie_process(runs_info[run_name]["pid"]) if mode == "terminate": job_manager.set_exec_ended(job_name, run_name, status="terminated by user", time_finished=datetime.now()) else: log_path = get_path("run_log", job_name, run_name) if os.path.exists(log_path): os.remove(log_path) run_out_dir = get_path("run_out_dir", job_name, run_name) if os.path.exists(run_out_dir): rmtree(run_out_dir) if mode == "delete": job_manager.delete_run(job_name, run_name) yaml_path = get_path("run_input", job_name, run_name) if os.path.exists(yaml_path): os.remove(yaml_path) else: os.mkdir(run_out_dir) job_manager.delete_exec(job_name, run_name) except Exception as e: print(str(e)) could_not_be_cleaned.append(run_name) continue succeeded.append(run_name) return succeeded, could_not_be_terminated, could_not_be_cleaned
def delete_job(job_id): run_ids = get_run_ids(job_id) _, could_not_be_terminated, could_not_be_cleaned = terminate_runs( job_id, run_ids, mode="delete") if len(could_not_be_terminated) > 0 or len(could_not_be_cleaned) > 0: return { "status": "failed run termination", "could_not_be_terminated": could_not_be_terminated, "could_not_be_cleaned": could_not_be_cleaned } try: job_dir = get_path("job_dir", job_id) if os.path.exists(job_dir): rmtree(job_dir) return {"status": "success"} except Exception as e: return {"status": "failed to remove job dir", "errorMessage": str(e)}
def get_param_values(): messages = [] data = {} try: data_req = request.get_json() access_token = data_req["access_token"] login_required(access_token=access_token) param_values, configs = gen_form_sheet( output_file_path=None, template_config_file_path=get_path( "job_templ", wf_target=data_req["wf_target"]), has_multiple_runs=data_req["batch_mode"], run_names=data_req["run_names"], param_is_run_specific=data_req["param_modes"], show_please_fill=True, metadata={"workflow_name": data_req["wf_target"]}) data = {"param_values": param_values, "configs": configs} except AssertionError as e: messages.append(handle_known_error(e, return_front_end_message=True)) except Exception as e: messages.append(handle_unknown_error(e, return_front_end_message=True)) return jsonify({"data": data, "messages": messages})
def generate_param_form_sheet(): # generate param form sheet with data sent # by the client messages = [] data = {} try: data_req = request.get_json() access_token = data_req["access_token"] login_required(access_token=access_token) sheet_format = data_req["sheet_format"] job_name = data_req["job_name"] wf_target = data_req["wf_target"] param_modes = data_req["param_modes"] run_names = data_req["run_names"] batch_mode = data_req["batch_mode"] temp_dir = make_temp_dir() # will stay, need to be cleaned up temp_dir_name = os.path.basename(temp_dir) output_file_path = os.path.join(temp_dir, f"{job_name}_inputs.{sheet_format}") gen_form_sheet(output_file_path=output_file_path, template_config_file_path=get_path("job_templ", wf_target=wf_target), has_multiple_runs=batch_mode, run_names=run_names, param_is_run_specific=param_modes, show_please_fill=True, metadata={"workflow_name": wf_target}) data["get_form_sheet_href"] = url_for( "get_param_form_sheet", job_name=job_name, temp_dir_name=temp_dir_name, access_token=access_token ## should be changed ) except AssertionError as e: messages.append(handle_known_error(e, return_front_end_message=True)) except Exception as e: messages.append(handle_unknown_error(e, return_front_end_message=True)) return jsonify({"data": data, "messages": messages})
def get_job_list(): messages = [] jobs = [] try: login_required() job_ids = get_job_ids() # for each dir: # - check if form sheet present # - if yes: # - read in form sheet metadata # - get list of runs for job_id in job_ids: job_dir = get_path("job_dir", job_id=job_id) try: job_param_sheet = get_path("job_param_sheet", job_id=job_id) except AssertionError as e: continue job_param_sheet_metadata = get_job_templ_info( "metadata", job_templ_path=job_param_sheet) if "workflow_name" not in job_param_sheet_metadata.keys( ) or job_param_sheet_metadata["workflow_name"] == "": messages.append({ "time": get_time_string(), "type": "warning", "text": "No workflow name was specified in the job_param_sheet of job \"" + job_id + "\". Ignoring." }) continue wf_target = job_param_sheet_metadata["workflow_name"] jobs.append({ "job_id": job_id, "job_abs_path": job_dir, "wf_target": wf_target }) except AssertionError as e: messages.append(handle_known_error(e, return_front_end_message=True)) messages.append( handle_unknown_error( e, alt_err_message= "An unkown error occured reading the execution directory", return_front_end_message=True)) # get exec profiles names: exec_profile_names = list(app.config["EXEC_PROFILES"].keys()) exec_profile_params = {} for exec_profile_name in exec_profile_names: exec_profile_params[exec_profile_name] = { "max_retries": app.config["EXEC_PROFILES"][exec_profile_name]["max_retries"], "max_parallel_exec": app.config["EXEC_PROFILES"][exec_profile_name] ["max_parallel_exec"], "allow_user_decrease_max_parallel_exec": app.config["EXEC_PROFILES"][exec_profile_name] ["allow_user_decrease_max_parallel_exec"], } return jsonify({ "data": { "exec_profiles": exec_profile_names, "exec_profile_params": exec_profile_params, "jobs": jobs }, "messages": messages })
def create_job(job_name, username, job_param_sheet=None, run_inputs=None, wf_target=None, validate_uris=True, search_paths=False, search_subdirs=False, search_dir=None, sheet_format="xlsx"): assert not ( job_param_sheet is None and (run_inputs is None or wf_target is None) ), "You have to either provide a job_param_sheet or a list of run_inputs plus a wf_target document" runs_yaml_dir = get_path("runs_yaml_dir", job_name=job_name) if wf_target is None: job_param_sheet_dest_path = get_path("job_param_sheet", job_name=job_name, param_sheet_format=sheet_format) copyfile(job_param_sheet, job_param_sheet_dest_path) wf_target = get_job_templ_info( "metadata", job_templ_path=job_param_sheet_dest_path)["workflow_name"] wf_type = get_workflow_type_from_file_ext(wf_target) # make directories: make_job_dir_tree(job_name) # make run yamls: if not job_param_sheet is None: assert not ( search_paths and search_dir is None ), "search_paths was set to True but no search dir has been defined." make_runs( sheet_file=job_param_sheet_dest_path, wf_type=wf_type, output_basename="", output_dir=runs_yaml_dir, validate_uris=validate_uris, search_paths=search_paths, search_subdirs=search_subdirs, allow_remote_uri=app.config["INPUT_SOURCES"]["URL"], allow_local_path=app.config["INPUT_SOURCES"]["local_file_system"], input_dir=search_dir) else: [copy(run_input, runs_yaml_dir) for run_input in run_inputs] # get run names from produced yamls_ runs_yaml_dir = get_path("runs_yaml_dir", job_name) run_yamls = fetch_files_in_dir(dir_path=runs_yaml_dir, file_exts=["yaml"], ignore_subdirs=True) run_names = [r["file_nameroot"] for r in run_yamls] # check if wf_target is absolute path and exists, else search for it in the wf_target dir: if os.path.exists(wf_target): wf_target = os.path.abspath(wf_target) allowed_dirs = get_allowed_base_dirs(job_name=job_name, allow_input=True, allow_upload=False, allow_download=False) assert not check_if_path_in_dirs( wf_target, allowed_dirs ) is None, "The provided wf_target file does not exit or you have no permission to access it." else: wf_target = get_path("wf", wf_target=wf_target) # copy wf_target document: copyfile(wf_target, get_path("job_wf", job_name=job_name, wf_type=wf_type)) # make output directories: for run_name in run_names: run_out_dir = get_path("run_out_dir", job_name, run_name) if not os.path.exists(run_out_dir): os.mkdir(run_out_dir) # add job to database: _ = job_manager.create_job(job_name=job_name, username=username, wf_target=wf_target) # add runs to database: job_manager.create_runs(run_names=run_names, job_name=job_name)
def read_run_input(job_name, run_name): yaml_path = get_path("run_input", job_name, run_name) content, _ = read_file_content(yaml_path) return content
def read_run_log(job_name, run_name): log_path = get_path("run_log", job_name, run_name) if not os.path.isfile(log_path): return "Run not started yet." content, _ = read_file_content(log_path) return content
def exec_runs(job_name, run_names, exec_profile_name, username=None, max_parrallel_exec_user_def=None, add_exec_info={}, send_email=True, access_token=None): if send_email and app.config["SEND_EMAIL"] and not app.config["USE_OIDC"]: if not username is None: user_email = user_manager.get_user_info(username)["email"] else: user_email = app.config["DEFAULT_EMAIL"] else: user_email = None # check if runs are already running: already_running_runs = job_manager.get_running_runs_names( job_name=job_name, run_names=run_names) run_names = sorted(list(set(run_names) - set(already_running_runs))) # create new exec entry in database: exec_profile = app.config["EXEC_PROFILES"][exec_profile_name] if not max_parrallel_exec_user_def is None and \ exec_profile["allow_user_decrease_max_parallel_exec"] and \ max_parrallel_exec_user_def < exec_profile["max_parallel_exec"]: exec_profile["max_parallel_exec"] = max_parrallel_exec_user_def exec_ids = {} for run_name in run_names: exec_ids[run_name] = job_manager.create_exec( job_name=job_name, run_name=run_name, wf_target=get_path("job_wf", job_name=job_name), run_input=get_path("run_input", job_name=job_name, run_name=run_name), out_dir=get_path("run_out_dir", job_name=job_name, run_name=run_name), global_temp_dir=app.config["TEMP_DIR"], log=get_path("run_log", job_name=job_name, run_name=run_name), status="submitting", err_message="", retry_count=0, time_started=datetime.now(), time_finished=None, #* timeout_limit=None, #* pid=-1, #* username=username, exec_profile=exec_profile, exec_profile_name=exec_profile_name, add_exec_info=add_exec_info, user_email=user_email, access_token=access_token) # start the background process: # the child process will be detached from the parent # and manages the its status in the database autonomously, # even if the parent process is terminated / fails, # the child process will continue started_runs = [] for run_name in run_names: create_background_process([ python_interpreter, os.path.join(basedir, "cwlab_bg_exec.py"), app.config["SQLALCHEMY_DATABASE_URI"], str(exec_ids[run_name]), str(app.config["DEBUG"]) ], get_path("debug_run_log", job_name=job_name, run_name=run_name)) started_runs.append(run_name) return started_runs, already_running_runs
def create_job(job_id, job_param_sheet=None, run_inputs=None, wf_target=None, validate_paths=True, search_paths=False, search_subdirs=False, search_dir=None, sheet_format="xlsx"): assert not ( job_param_sheet is None and (run_inputs is None or wf_target is None) ), "You have to either provide a job_param_sheet or a list of run_inputs plus a wf_target document" runs_yaml_dir = get_path("runs_yaml_dir", job_id=job_id) if wf_target is None: job_param_sheet_dest_path = get_path("job_param_sheet", job_id=job_id, param_sheet_format=sheet_format) copyfile(job_param_sheet, job_param_sheet_dest_path) wf_target = get_job_templ_info( "metadata", job_templ_path=job_param_sheet_dest_path)["workflow_name"] wf_type = get_workflow_type_from_file_ext(wf_target) # make directories: make_job_dir_tree(job_id) # make run yamls: if not job_param_sheet is None: assert not ( search_paths and search_dir is None ), "search_paths was set to True but no search dir has been defined." make_runs(sheet_file=job_param_sheet_dest_path, wf_type=wf_type, output_basename="", output_dir=runs_yaml_dir, validate_paths=validate_paths, search_paths=search_paths, search_subdirs=search_subdirs, input_dir=search_dir) else: [copy(run_input, runs_yaml_dir) for run_input in run_inputs] # check if wf_target is absolute path and exists, else search for it in the wf_target dir: if os.path.exists(wf_target): wf_target = os.path.abspath(wf_target) allowed_dirs = get_allowed_base_dirs(job_id=job_id, allow_input=True, allow_upload=False, allow_download=False) assert not check_if_path_in_dirs( wf_target, allowed_dirs ) is None, "The provided wf_target file does not exit or you have no permission to access it." else: wf_target = get_path("wf", wf_target=wf_target) # copy wf_target document: copyfile(wf_target, get_path("job_wf", job_id=job_id, wf_type=wf_type)) # make output directories: run_ids = get_run_ids(job_id) for run_id in run_ids: run_out_dir = get_path("run_out_dir", job_id, run_id) if not os.path.exists(run_out_dir): os.mkdir(run_out_dir)