コード例 #1
0
def make_job_dir_tree(job_name):
    job_dir = get_path("job_dir", job_name)
    if not os.path.exists(job_dir):
        os.mkdir(job_dir)
    runs_yaml_dir = get_path("runs_yaml_dir", job_name)
    if not os.path.exists(runs_yaml_dir):
        os.mkdir(runs_yaml_dir)
    runs_out_dir = get_path("runs_out_dir", job_name)
    if not os.path.exists(runs_out_dir):
        os.mkdir(runs_out_dir)
    runs_log_dir = get_path("runs_log_dir", job_name)
    if not os.path.exists(runs_log_dir):
        os.mkdir(runs_log_dir)
    job_wf_dir = get_path("job_wf_dir", job_name)
    if not os.path.exists(job_wf_dir):
        os.mkdir(job_wf_dir)
コード例 #2
0
def terminate_runs(
        job_name,
        run_names,
        mode="terminate"  # can be one of terminate, reset, or delete
):
    could_not_be_terminated = []
    could_not_be_cleaned = []
    succeeded = []
    runs_info = get_runs_info(job_name, run_names, return_pid=True)
    db_changed = False
    for run_name in runs_info.keys():
        try:
            if isinstance(runs_info[run_name]["time_started"], datetime) and \
                not isinstance(runs_info[run_name]["time_finished"], datetime):
                if runs_info[run_name]["pid"] != -1:
                    is_killed = kill_proc_tree(runs_info[run_name]["pid"])
                    if not is_killed:
                        could_not_be_terminated.append(run_name)
                        continue
                    cleanup_zombie_process(runs_info[run_name]["pid"])
            if mode == "terminate":
                job_manager.set_exec_ended(job_name,
                                           run_name,
                                           status="terminated by user",
                                           time_finished=datetime.now())
            else:
                log_path = get_path("run_log", job_name, run_name)
                if os.path.exists(log_path):
                    os.remove(log_path)
                run_out_dir = get_path("run_out_dir", job_name, run_name)
                if os.path.exists(run_out_dir):
                    rmtree(run_out_dir)

                if mode == "delete":
                    job_manager.delete_run(job_name, run_name)
                    yaml_path = get_path("run_input", job_name, run_name)
                    if os.path.exists(yaml_path):
                        os.remove(yaml_path)
                else:
                    os.mkdir(run_out_dir)
                    job_manager.delete_exec(job_name, run_name)
        except Exception as e:
            print(str(e))
            could_not_be_cleaned.append(run_name)
            continue
        succeeded.append(run_name)
    return succeeded, could_not_be_terminated, could_not_be_cleaned
コード例 #3
0
ファイル: exec.py プロジェクト: KerstenBreuer/CWLab
def delete_job(job_id):
    run_ids = get_run_ids(job_id)
    _, could_not_be_terminated, could_not_be_cleaned = terminate_runs(
        job_id, run_ids, mode="delete")
    if len(could_not_be_terminated) > 0 or len(could_not_be_cleaned) > 0:
        return {
            "status": "failed run termination",
            "could_not_be_terminated": could_not_be_terminated,
            "could_not_be_cleaned": could_not_be_cleaned
        }
    try:
        job_dir = get_path("job_dir", job_id)
        if os.path.exists(job_dir):
            rmtree(job_dir)
        return {"status": "success"}
    except Exception as e:
        return {"status": "failed to remove job dir", "errorMessage": str(e)}
コード例 #4
0
ファイル: create_job.py プロジェクト: krini-project/CWLab
def get_param_values():
    messages = []
    data = {}
    try:
        data_req = request.get_json()
        access_token = data_req["access_token"]
        login_required(access_token=access_token)
        param_values, configs = gen_form_sheet(
            output_file_path=None,
            template_config_file_path=get_path(
                "job_templ", wf_target=data_req["wf_target"]),
            has_multiple_runs=data_req["batch_mode"],
            run_names=data_req["run_names"],
            param_is_run_specific=data_req["param_modes"],
            show_please_fill=True,
            metadata={"workflow_name": data_req["wf_target"]})
        data = {"param_values": param_values, "configs": configs}
    except AssertionError as e:
        messages.append(handle_known_error(e, return_front_end_message=True))
    except Exception as e:
        messages.append(handle_unknown_error(e, return_front_end_message=True))
    return jsonify({"data": data, "messages": messages})
コード例 #5
0
ファイル: create_job.py プロジェクト: krini-project/CWLab
def generate_param_form_sheet():  # generate param form sheet with data sent
    # by the client
    messages = []
    data = {}
    try:
        data_req = request.get_json()
        access_token = data_req["access_token"]
        login_required(access_token=access_token)
        sheet_format = data_req["sheet_format"]
        job_name = data_req["job_name"]
        wf_target = data_req["wf_target"]
        param_modes = data_req["param_modes"]
        run_names = data_req["run_names"]
        batch_mode = data_req["batch_mode"]
        temp_dir = make_temp_dir()  # will stay, need to be cleaned up
        temp_dir_name = os.path.basename(temp_dir)
        output_file_path = os.path.join(temp_dir,
                                        f"{job_name}_inputs.{sheet_format}")
        gen_form_sheet(output_file_path=output_file_path,
                       template_config_file_path=get_path("job_templ",
                                                          wf_target=wf_target),
                       has_multiple_runs=batch_mode,
                       run_names=run_names,
                       param_is_run_specific=param_modes,
                       show_please_fill=True,
                       metadata={"workflow_name": wf_target})
        data["get_form_sheet_href"] = url_for(
            "get_param_form_sheet",
            job_name=job_name,
            temp_dir_name=temp_dir_name,
            access_token=access_token  ## should be changed
        )
    except AssertionError as e:
        messages.append(handle_known_error(e, return_front_end_message=True))
    except Exception as e:
        messages.append(handle_unknown_error(e, return_front_end_message=True))
    return jsonify({"data": data, "messages": messages})
コード例 #6
0
def get_job_list():
    messages = []
    jobs = []
    try:
        login_required()
        job_ids = get_job_ids()
        # for each dir:
        #   - check if form sheet present
        #   - if yes:
        #       - read in form sheet metadata
        #       - get list of runs
        for job_id in job_ids:
            job_dir = get_path("job_dir", job_id=job_id)
            try:
                job_param_sheet = get_path("job_param_sheet", job_id=job_id)
            except AssertionError as e:
                continue

            job_param_sheet_metadata = get_job_templ_info(
                "metadata", job_templ_path=job_param_sheet)
            if "workflow_name" not in job_param_sheet_metadata.keys(
            ) or job_param_sheet_metadata["workflow_name"] == "":
                messages.append({
                    "time":
                    get_time_string(),
                    "type":
                    "warning",
                    "text":
                    "No workflow name was specified in the job_param_sheet of job \""
                    + job_id + "\". Ignoring."
                })
                continue
            wf_target = job_param_sheet_metadata["workflow_name"]
            jobs.append({
                "job_id": job_id,
                "job_abs_path": job_dir,
                "wf_target": wf_target
            })
    except AssertionError as e:
        messages.append(handle_known_error(e, return_front_end_message=True))
        messages.append(
            handle_unknown_error(
                e,
                alt_err_message=
                "An unkown error occured reading the execution directory",
                return_front_end_message=True))

    # get exec profiles names:
    exec_profile_names = list(app.config["EXEC_PROFILES"].keys())
    exec_profile_params = {}
    for exec_profile_name in exec_profile_names:
        exec_profile_params[exec_profile_name] = {
            "max_retries":
            app.config["EXEC_PROFILES"][exec_profile_name]["max_retries"],
            "max_parallel_exec":
            app.config["EXEC_PROFILES"][exec_profile_name]
            ["max_parallel_exec"],
            "allow_user_decrease_max_parallel_exec":
            app.config["EXEC_PROFILES"][exec_profile_name]
            ["allow_user_decrease_max_parallel_exec"],
        }

    return jsonify({
        "data": {
            "exec_profiles": exec_profile_names,
            "exec_profile_params": exec_profile_params,
            "jobs": jobs
        },
        "messages": messages
    })
コード例 #7
0
def create_job(job_name,
               username,
               job_param_sheet=None,
               run_inputs=None,
               wf_target=None,
               validate_uris=True,
               search_paths=False,
               search_subdirs=False,
               search_dir=None,
               sheet_format="xlsx"):
    assert not (
        job_param_sheet is None and (run_inputs is None or wf_target is None)
    ), "You have to either provide a job_param_sheet or a list of run_inputs plus a wf_target document"

    runs_yaml_dir = get_path("runs_yaml_dir", job_name=job_name)
    if wf_target is None:
        job_param_sheet_dest_path = get_path("job_param_sheet",
                                             job_name=job_name,
                                             param_sheet_format=sheet_format)
        copyfile(job_param_sheet, job_param_sheet_dest_path)
        wf_target = get_job_templ_info(
            "metadata",
            job_templ_path=job_param_sheet_dest_path)["workflow_name"]
    wf_type = get_workflow_type_from_file_ext(wf_target)

    # make directories:
    make_job_dir_tree(job_name)

    # make run yamls:
    if not job_param_sheet is None:
        assert not (
            search_paths and search_dir is None
        ), "search_paths was set to True but no search dir has been defined."
        make_runs(
            sheet_file=job_param_sheet_dest_path,
            wf_type=wf_type,
            output_basename="",
            output_dir=runs_yaml_dir,
            validate_uris=validate_uris,
            search_paths=search_paths,
            search_subdirs=search_subdirs,
            allow_remote_uri=app.config["INPUT_SOURCES"]["URL"],
            allow_local_path=app.config["INPUT_SOURCES"]["local_file_system"],
            input_dir=search_dir)
    else:
        [copy(run_input, runs_yaml_dir) for run_input in run_inputs]

    # get run names from produced yamls_
    runs_yaml_dir = get_path("runs_yaml_dir", job_name)
    run_yamls = fetch_files_in_dir(dir_path=runs_yaml_dir,
                                   file_exts=["yaml"],
                                   ignore_subdirs=True)
    run_names = [r["file_nameroot"] for r in run_yamls]

    # check if wf_target is absolute path and exists, else search for it in the wf_target dir:
    if os.path.exists(wf_target):
        wf_target = os.path.abspath(wf_target)
        allowed_dirs = get_allowed_base_dirs(job_name=job_name,
                                             allow_input=True,
                                             allow_upload=False,
                                             allow_download=False)
        assert not check_if_path_in_dirs(
            wf_target, allowed_dirs
        ) is None, "The provided wf_target file does not exit or you have no permission to access it."
    else:
        wf_target = get_path("wf", wf_target=wf_target)
    # copy wf_target document:
    copyfile(wf_target, get_path("job_wf", job_name=job_name, wf_type=wf_type))

    # make output directories:
    for run_name in run_names:
        run_out_dir = get_path("run_out_dir", job_name, run_name)
        if not os.path.exists(run_out_dir):
            os.mkdir(run_out_dir)

    # add job to database:
    _ = job_manager.create_job(job_name=job_name,
                               username=username,
                               wf_target=wf_target)

    # add runs to database:
    job_manager.create_runs(run_names=run_names, job_name=job_name)
コード例 #8
0
def read_run_input(job_name, run_name):
    yaml_path = get_path("run_input", job_name, run_name)
    content, _ = read_file_content(yaml_path)
    return content
コード例 #9
0
def read_run_log(job_name, run_name):
    log_path = get_path("run_log", job_name, run_name)
    if not os.path.isfile(log_path):
        return "Run not started yet."
    content, _ = read_file_content(log_path)
    return content
コード例 #10
0
def exec_runs(job_name,
              run_names,
              exec_profile_name,
              username=None,
              max_parrallel_exec_user_def=None,
              add_exec_info={},
              send_email=True,
              access_token=None):
    if send_email and app.config["SEND_EMAIL"] and not app.config["USE_OIDC"]:
        if not username is None:
            user_email = user_manager.get_user_info(username)["email"]
        else:
            user_email = app.config["DEFAULT_EMAIL"]
    else:
        user_email = None

    # check if runs are already running:
    already_running_runs = job_manager.get_running_runs_names(
        job_name=job_name, run_names=run_names)

    run_names = sorted(list(set(run_names) - set(already_running_runs)))

    # create new exec entry in database:
    exec_profile = app.config["EXEC_PROFILES"][exec_profile_name]
    if not max_parrallel_exec_user_def is None and \
        exec_profile["allow_user_decrease_max_parallel_exec"] and \
        max_parrallel_exec_user_def < exec_profile["max_parallel_exec"]:
        exec_profile["max_parallel_exec"] = max_parrallel_exec_user_def
    exec_ids = {}
    for run_name in run_names:
        exec_ids[run_name] = job_manager.create_exec(
            job_name=job_name,
            run_name=run_name,
            wf_target=get_path("job_wf", job_name=job_name),
            run_input=get_path("run_input",
                               job_name=job_name,
                               run_name=run_name),
            out_dir=get_path("run_out_dir",
                             job_name=job_name,
                             run_name=run_name),
            global_temp_dir=app.config["TEMP_DIR"],
            log=get_path("run_log", job_name=job_name, run_name=run_name),
            status="submitting",
            err_message="",
            retry_count=0,
            time_started=datetime.now(),
            time_finished=None,  #*
            timeout_limit=None,  #*
            pid=-1,  #*
            username=username,
            exec_profile=exec_profile,
            exec_profile_name=exec_profile_name,
            add_exec_info=add_exec_info,
            user_email=user_email,
            access_token=access_token)

    # start the background process:
    # the child process will be detached from the parent
    # and manages the its status in the database autonomously,
    # even if the parent process is terminated / fails,
    # the child process will continue
    started_runs = []
    for run_name in run_names:
        create_background_process([
            python_interpreter,
            os.path.join(basedir, "cwlab_bg_exec.py"),
            app.config["SQLALCHEMY_DATABASE_URI"],
            str(exec_ids[run_name]),
            str(app.config["DEBUG"])
        ], get_path("debug_run_log", job_name=job_name, run_name=run_name))
        started_runs.append(run_name)
    return started_runs, already_running_runs
コード例 #11
0
ファイル: exec.py プロジェクト: KerstenBreuer/CWLab
def create_job(job_id,
               job_param_sheet=None,
               run_inputs=None,
               wf_target=None,
               validate_paths=True,
               search_paths=False,
               search_subdirs=False,
               search_dir=None,
               sheet_format="xlsx"):
    assert not (
        job_param_sheet is None and (run_inputs is None or wf_target is None)
    ), "You have to either provide a job_param_sheet or a list of run_inputs plus a wf_target document"

    runs_yaml_dir = get_path("runs_yaml_dir", job_id=job_id)
    if wf_target is None:
        job_param_sheet_dest_path = get_path("job_param_sheet",
                                             job_id=job_id,
                                             param_sheet_format=sheet_format)
        copyfile(job_param_sheet, job_param_sheet_dest_path)
        wf_target = get_job_templ_info(
            "metadata",
            job_templ_path=job_param_sheet_dest_path)["workflow_name"]
    wf_type = get_workflow_type_from_file_ext(wf_target)

    # make directories:
    make_job_dir_tree(job_id)

    # make run yamls:
    if not job_param_sheet is None:
        assert not (
            search_paths and search_dir is None
        ), "search_paths was set to True but no search dir has been defined."
        make_runs(sheet_file=job_param_sheet_dest_path,
                  wf_type=wf_type,
                  output_basename="",
                  output_dir=runs_yaml_dir,
                  validate_paths=validate_paths,
                  search_paths=search_paths,
                  search_subdirs=search_subdirs,
                  input_dir=search_dir)
    else:
        [copy(run_input, runs_yaml_dir) for run_input in run_inputs]

    # check if wf_target is absolute path and exists, else search for it in the wf_target dir:
    if os.path.exists(wf_target):
        wf_target = os.path.abspath(wf_target)
        allowed_dirs = get_allowed_base_dirs(job_id=job_id,
                                             allow_input=True,
                                             allow_upload=False,
                                             allow_download=False)
        assert not check_if_path_in_dirs(
            wf_target, allowed_dirs
        ) is None, "The provided wf_target file does not exit or you have no permission to access it."
    else:
        wf_target = get_path("wf", wf_target=wf_target)
    # copy wf_target document:
    copyfile(wf_target, get_path("job_wf", job_id=job_id, wf_type=wf_type))

    # make output directories:
    run_ids = get_run_ids(job_id)
    for run_id in run_ids:
        run_out_dir = get_path("run_out_dir", job_id, run_id)
        if not os.path.exists(run_out_dir):
            os.mkdir(run_out_dir)