Exemple #1
0
def parse_arguments() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser()

    msg = "The corresponding attribute to be passed to the job scheduler."
    parser.add_argument("--mem", dest="mem", default="48G", help=msg)
    parser.add_argument("--cores", dest="cores", default=4, help=msg)
    parser.add_argument("--time", dest="time", default="02:00:00", help=msg)
    parser.add_argument("--partition", dest="partition", default="panda", help=msg)
    choices = divvy.ComputingConfiguration().list_compute_packages()
    msg = "`Divvy` compute configuration to be used when submitting the jobs."
    parser.add_argument("--divvy-configuration", dest="compute", choices=choices, help=msg)
    msg = "Whether to do all steps except job submission."
    parser.add_argument(
        "-d", "--dry-run", dest="dry_run", action="store_true", default=False, help=msg,
    )
    msg = "Attribute in sample annotation containing the path to the input files."
    parser.add_argument(
        "--attribute", dest="sample_file_attribute", default="sample_name", help=msg,
    )
    msg = "The parent directory of containting the input data."
    parser.add_argument("--input-dir", dest="input_dir", default="data", help=msg)
    msg = "Parent directory for output files."
    parser.add_argument("--output-dir", dest="output_dir", default="processed", help=msg)
    msg = "CSV file with metadata for all samples."
    parser.add_argument(dest="metadata", help=msg)
    msg = (
        "Whether all samples or only samples marked with a positive value in the `toggle`"
        "column should be processed."
    )
    parser.add_argument("--toggle", dest="toggle", action="store_true", default=False, help=msg)

    return parser
Exemple #2
0
def set_comp_env():
    global active_settings
    if globs.compute_config is None:
        globs.compute_config = divvy.ComputingConfiguration()
    selected_package = request.args.get('compute', type=str)
    globs.status_check_interval = int(request.args.get('interval', type=int)
                                      or globs.status_check_interval
                                      or POLL_INTERVAL)
    if globs.compute_package is None:
        globs.compute_package = "default"
    if selected_package is not None:
        success = globs.compute_config.clean_start(selected_package)
        if not success:
            msg = "Compute package '{}' cannot be activated".format(selected_package)
            app.logger.warning(msg)
            return jsonify(active_settings=render_template('compute_info.html', active_settings=None, msg=msg))
        globs.compute_package = selected_package
        active_settings = globs.compute_config.get_active_package()
        write_preferences({"status_check_interval": globs.status_check_interval,
                       "compute_package": globs.compute_package})
        return jsonify(active_settings=render_template('compute_info.html', active_settings=active_settings))
    active_settings = globs.compute_config.get_active_package()
    notify_not_set = COMPUTE_SETTINGS_VARNAME[0] if \
        globs.compute_config.default_config_file == globs.compute_config.config_file else None
    write_preferences({"status_check_interval": globs.status_check_interval,
                       "compute_package": globs.compute_package})
    return render_template('preferences.html', env_conf_file=globs.compute_config.config_file,
                           compute_packages=globs.compute_config.list_compute_packages(), active_settings=active_settings,
                           compute_package=globs.compute_package, notify_not_set=notify_not_set,
                           default_interval=globs.status_check_interval)
Exemple #3
0
def set_comp_env():
    global active_settings
    if globs.compute_config is None:
        globs.compute_config = divvy.ComputingConfiguration()
    selected_package = request.args.get('compute', type=str)
    if globs.currently_selected_package is None:
        globs.currently_selected_package = "default"
    if selected_package is not None:
        success = globs.compute_config.clean_start(selected_package)
        if not success:
            msg = "Compute package '{}' cannot be activated".format(
                selected_package)
            app.logger.warning(msg)
            return jsonify(active_settings=render_template(
                'compute_info.html', active_settings=None, msg=msg))
        globs.currently_selected_package = selected_package
        active_settings = globs.compute_config.get_active_package()
        return jsonify(active_settings=render_template(
            'compute_info.html', active_settings=active_settings))
    active_settings = globs.compute_config.get_active_package()
    notify_not_set = COMPUTE_SETTINGS_VARNAME[0] if globs.compute_config.default_config_file == globs.compute_config.config_file\
        else None
    return render_template(
        'set_comp_env.html',
        env_conf_file=globs.compute_config.config_file,
        compute_packages=globs.compute_config.list_compute_packages(),
        active_settings=active_settings,
        currently_selected_package=globs.currently_selected_package,
        notify_not_set=notify_not_set)
Exemple #4
0
def main(cli=None) -> int:
    log.info("IMCpipeline runner")
    parser = parse_arguments()
    args, unknown = parser.parse_known_args(cli)
    # the extra arguments will be passed to the pipeline and
    # compounded arguments (mostly the --cellprofiler-exec argument)
    # should be quoted again
    args.cli = ["'" + x + "'" if " " in x else x for x in unknown]

    log.info("Generating project from given CSV annotation.")
    annot = pd.read_csv(args.metadata).set_index(args.sample_file_attribute)
    if args.toggle:
        log.info("Subsampling samples based on the `toggle` column.")
        annot = annot.loc[annot["toggle"].isin([1, "1", True, "TRUE", "True"]), :]

    log.info("Setting compute settings using divvy.")
    compute = divvy.ComputingConfiguration()
    compute.activate_package(args.compute)

    # Now prepare job submission
    jobs = list()
    cli_args = " ".join(args.cli)
    # the '--' is to separate the nargs from the positional in case there aren't more args
    if cli_args == "":
        cli_args = "--"
    for sample, _ in annot.iterrows():
        log.info("Processing sample %s", sample)

        input_dir = pjoin(args.input_dir, sample)
        output_dir = pjoin(args.output_dir, sample)

        cmd = f"imcpipeline {cli_args} -i {input_dir} -o {output_dir}"

        job_name = f"imcpipeline_{sample}"
        output_prefix = pjoin("submission", job_name)
        job_file = output_prefix + ".sh"
        data = {
            "jobname": job_name,
            "logfile": output_prefix + ".log",
            "mem": args.mem,
            "cores": args.cores,
            "time": args.time,
            "partition": args.partition,
            "code": cmd,
        }

        compute.write_script(job_file, data)
        jobs.append(job_file)

    log.info("Submitting jobs.")
    cmd = compute.get_active_package()["submission_command"]

    if not args.dry_run:
        for job in jobs:
            print(cmd, job)
            subprocess.call([cmd, job])

    log.info("Finished with all samples.")
    return 0
Exemple #5
0
 def test_activate_package(self):
     dcc = divvy.ComputingConfiguration()
     dcc.activate_package("default")
     t = dcc.compute.submission_template
     t2 = dcc["compute"]["submission_template"]
     assert t == t2
     dcc.activate_package("slurm")
     t = dcc.compute.submission_template
     t2 = dcc["compute"]["submission_template"]
     assert t == t2
Exemple #6
0
 def test_write_script_adapters(self, compute, package):
     """Test successful adapter sourcing from various Mapping types"""
     dcc = divvy.ComputingConfiguration()
     dcc.activate_package(package)
     extra_vars = {"compute": compute}
     dcc.write_script("test.sub", extra_vars)
     with open("test.sub", "r") as f:
         contents = f.read()
         assert contents.find("1000") > 0
     os.remove("test.sub")
Exemple #7
0
 def test_adapters_overwitten_by_others(self):
     dcc = divvy.ComputingConfiguration()
     dcc.activate_package("singularity_slurm")
     compute = YacAttMap({"mem": 1000})
     extra_vars = [{"compute": compute}, {"MEM": 333}]
     dcc.write_script("test1.sub", extra_vars)
     with open("test1.sub", "r") as f:
         contents = f.read()
         assert not (contents.find("1000") > 0)
         assert contents.find("333") > 0
     os.remove("test1.sub")
Exemple #8
0
 def test_write_script(self):
     dcc = divvy.ComputingConfiguration()
     dcc
     dcc.activate_package("singularity_slurm")
     extra_vars = {
         "singularity_image": "simg",
         "jobname": "jbname",
         "code": "mycode",
     }
     dcc.write_script("test.sub", extra_vars)
     with open("test.sub", "r") as f:
         contents = f.read()
     assert contents.find("mycode") > 0
     assert contents.find("{SINGULARITY_ARGS}") < 0
     os.remove("test.sub")
    return path


def _req_input_to_args(req_input):
    """
    Given a list of the required inputs for the build command, create an args
    string

    :param list[str] req_input: input names
    :return str: args string
    """
    return ["--" + x + " <arg_here>" for x in req_input]


subdir_path = _make_sub_dir(args.path, args.genome)
dcc = divvy.ComputingConfiguration()
dcc.activate_package("slurm")
cmd_template = "refgenie build -g {g} -a {a} {req_input_str}"
genome = args.genome
to_remove = ["genome", "path"]

data = vars(args)
for i in to_remove:
    data.pop(i)

for asset in asset_build_packages:
    sub_script = os.path.join(subdir_path, asset + ".sub")
    req_input = asset_build_packages[asset]["required_inputs"]
    if req_input:
        print(
            "{} asset requires additional input in the command ({}), so '{}'"
Exemple #10
0
def dcc(request):
    """Provide ComputingConfiguration objects for all files in divcfg repository"""
    return divvy.ComputingConfiguration(filepath=request.param)
Exemple #11
0
def empty_dcc():
    """Provide the empty/default ComputingConfiguration object"""
    return divvy.ComputingConfiguration()
Exemple #12
0
import os
import glob
import divvy
import pytest

THIS_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(THIS_DIR, "data/divcfg-master")
FILES = glob.glob(DATA_DIR + "/*.yaml")
DCC_ATTRIBUTES = divvy.ComputingConfiguration().keys()


@pytest.fixture
def empty_dcc():
    """Provide the empty/default ComputingConfiguration object"""
    return divvy.ComputingConfiguration()


@pytest.fixture(params=FILES)
def dcc(request):
    """Provide ComputingConfiguration objects for all files in divcfg repository"""
    return divvy.ComputingConfiguration(filepath=request.param)


@pytest.fixture
def mock_env_missing(monkeypatch):
    [
        monkeypatch.delenv(env_var, raising=False)
        for env_var in divvy.const.COMPUTE_SETTINGS_VARNAME
    ]
Exemple #13
0
def submit_job(
    code,
    job_file,
    log_file=None,
    computing_configuration=None,
    dry_run=False,
    limited_number=False,
    total_job_lim=500,
    refresh_time=10,
    in_between_time=5,
    **kwargs
):
    """
    Submit a job to be run.
    Uses divvy to allow running on a local computer or distributed computing resources.

    Parameters
    ----------
    code : :obj:`str`
        String of command(s) to be run.
    job_file : :obj:`str`
        File to write job ``code`` to.
    log_file : :obj:`str`
        Log file to write job output to.

        Defaults to ``job_file`` with ".log" ending.
    computing_configuration : :obj:`str`
        Name of :class:`divvy` computing configuration to use.

        Defaults to 'default' which is to run job in localhost.
    dry_run: :obj:`bool`
        Whether not to actually run job.

        Defaults to :obj:`False`.
    limited_number: :obj:`bool`
        Whether to restrict jobs to a maximum number.
        Currently only possible if using "slurm".

        Defaults to :obj:`False`.
    total_job_lim : :obj:`int`
        Maximum number of jobs to restrict to.

        Defaults to 500.
    refresh_time : :obj:`int`
        Time in between checking number of jobs in seconds.

        Defaults to 10.
    in_between_time : :obj:`int`
        Time in between job submission in seconds.

        Defaults to 5.
    **kwargs : :obj:`dict`
        Additional keyword arguments will be passed to the chosen submission template according to `computing_configuration`.
        Pass for example: jobname="job", cores=2, mem=8000, partition="longq".
    """
    import time
    import subprocess

    import divvy
    from ngs_toolkit import _CONFIG, _LOGGER

    # reduce level of logging from divvy
    # only for divvy <=0.
    if "logging" in divvy.__dict__.keys():
        divvy.logging.getLogger("divvy").setLevel("ERROR")

    def count_jobs_running(check_cmd="squeue", sep="\n"):
        """
        Count running jobs on a cluster by invoquing a command that lists the jobs.
        """
        return subprocess.check_output(check_cmd).split(sep).__len__()

    def submit_job_if_possible(
        cmd, check_cmd="squeue", total_job_lim=800, refresh_time=10, in_between_time=5
    ):
        submit = count_jobs_running(check_cmd) < total_job_lim
        while not submit:
            time.sleep(refresh_time)
            submit = count_jobs_running(check_cmd) < total_job_lim
        subprocess.call(cmd)
        time.sleep(in_between_time)

    if log_file is None:
        log_file = ".".join(job_file.split(".")[:-1]) + ".log"

    # Get computing configuration from config
    if computing_configuration is None:
        try:
            computing_configuration = _CONFIG["preferences"]["computing_configuration"]
        except KeyError:
            msg = "'computing_configuration' was not given"
            msg += " and default could not be get from config."
            hint = " Pass a value or add one to the section"
            hint += " preferences:computing_configuration'"
            hint += " in the ngs_toolkit config file."
            _LOGGER.error(msg + hint)
            raise

    dcc = divvy.ComputingConfiguration()
    if computing_configuration is not None:
        dcc.activate_package(computing_configuration)

    # Generate job script
    d = {"code": code, "logfile": log_file}
    d.update(kwargs)
    dcc.write_script(job_file, d)

    # Submit job
    if not dry_run:
        scmd = dcc["compute"]["submission_command"]
        cmd = scmd.split(" ") + [job_file]

        # simply submit if not limiting submission to the number of already running jobs
        if not limited_number:
            subprocess.call(cmd)
        else:
            # otherwise, submit only after `total_job_lim` is less than number of runnning jobs
            # this is only possible for slurm now though
            if scmd != "slurm":
                subprocess.call(cmd)
            else:
                submit_job_if_possible(cmd, check_cmd="slurm")