Beispiel #1
0
def symlink_analysis_to_targets(analysis):
    """Create symlink from experiment directory and projects directories."""
    if analysis["status"] != "SUCCEEDED":
        return

    src = analysis["storage_url"]
    dst = "__".join(
        [
            analysis["application"]["name"].lower().replace(" ", "_"),
            analysis["application"]["version"].lower().replace(" ", "_"),
            str(analysis["pk"]),
        ]
    )

    for i in analysis["targets"]:
        if not i["storage_url"]:  # pragma: no cover
            i = update_storage_url("experiments", i["pk"])

        analyses_dir = join(i["storage_url"], "analyses")
        os.makedirs(analyses_dir, exist_ok=True)
        utils.force_symlink(src, join(analyses_dir, dst))

    if analysis["project_level_analysis"]:
        i = analysis["project_level_analysis"]
        if not i["storage_url"]:
            i = update_storage_url("projects", i["pk"])

        analyses_dir = join(i["storage_url"], "analyses")
        os.makedirs(analyses_dir, exist_ok=True)
        utils.force_symlink(src, join(analyses_dir, dst))
Beispiel #2
0
def test_force_symlink(tmpdir):
    src = join(str(tmpdir), "src")
    dst = join(str(tmpdir), "dst")

    with open(src, "w") as f:
        f.write("Not empty.")

    utils.force_symlink(src, dst)
    assert os.path.islink(dst)
Beispiel #3
0
def symlink_experiment_to_projects(experiment):
    """Create symlink from experiment directory and projects directories."""
    for i in experiment["projects"]:
        if not i["storage_url"]:  # pragma: no cover
            i = update_storage_url("projects", i["pk"])

        experiments_dir = join(i["storage_url"], "experiments")
        experiment_dir = join(experiments_dir, experiment["system_id"])
        os.makedirs(experiments_dir, exist_ok=True)
        utils.force_symlink(experiment["storage_url"], experiment_dir)
Beispiel #4
0
def test_force_symlink_overwrite(tmpdir):
    src = join(str(tmpdir), "src")
    dst = join(str(tmpdir), "dst")

    with open(src, "w") as f:
        f.write("Correct.")

    with open(dst, "w") as f:
        f.write("Wrong.")

    utils.force_symlink(src, dst)
    assert os.path.islink(dst)

    with open(dst, "r") as f:
        assert "Correct" in f.read()
Beispiel #5
0
        def cmd(assembly, symlink, genome_path, dont_index):
            """
            Register an assembly reference genome.

            By default, an attempt to create indexes will be perfomed.
            """
            assembly = LocalReferenceDataImporter.import_data(
                data_id="genome_fasta",
                symlink=symlink,
                data_src=genome_path,
                identifier=assembly,
                model="assemblies",
                description="Reference Genome Fasta File.",
            )

            genome_fasta = assembly["reference_data"]["genome_fasta"]["url"]
            genome_dir = dirname(genome_fasta)
            commands = [
                ["bwa", "index", genome_fasta],
                ["samtools", "faidx", genome_fasta],
                [
                    "samtools",
                    "dict",
                    genome_fasta,
                    "-a",
                    assembly["name"],
                    "-s",
                    assembly["species"],
                    "-o",
                    join(genome_fasta + ".dict"),
                ],
            ]
            for i in commands:
                if dont_index:
                    click.secho(f"Skipping indexing:\n\n\t{' '.join(i)}", fg="yellow")
                    continue

                try:  # pragma: no cover
                    subprocess.check_call(i)
                except subprocess.CalledProcessError:  # pragma: no cover
                    click.secho(
                        f"INDEX FAILED, MUST BE FIXED:\n\n\t{' '.join(i)}", fg="red"
                    )

            indexes = {
                "bwa index": ["amb", "ann", "bwt", "pac", "sa"],
                "samtools faidx": ["fai"],
                "samtools dict": ["dict"],
            }

            for i, indexes in indexes.items():
                for j in indexes:
                    assembly["reference_data"][f"genome_fasta_{j}"] = {
                        "url": join(genome_fasta + f".{j}"),
                        "description": f"Index generated by: {i}",
                    }

            for i in glob(genome_fasta.split(".", 1)[0] + "*"):
                dst = join(genome_dir, assembly["name"] + "." + i.split(".", 1)[-1])
                if i != dst:
                    utils.force_symlink(i, dst)

            api.patch_instance(
                endpoint="assemblies",
                instance_id=assembly["pk"],
                storage_usage=utils.get_tree_size(assembly["storage_url"]),
                reference_data=assembly["reference_data"],
            )
Beispiel #6
0
 def symlink(src, dst):
     """Create symlink from `src` to `dst`."""
     return utils.force_symlink(os.path.realpath(src), dst)
Beispiel #7
0
def submit_lsf_array(commands,
                     requirements,
                     jobname,
                     extra_args=None,
                     throttle_by=50):  # pragma: no cover
    """
    Submit an array of bash scripts.

    Two other jobs will also be submitted:

        EXIT: run exit command if failure.
        CLEAN: clean temporary files and directories after completion.

    Arguments:
        commands (list): of (path to bash script, on exit command) tuples.
        requirements (str): string of LSF requirements.
        jobname (str): lsf array jobname.
        extra_args (str): extra LSF args.
        throttle_by (int): max number of jobs running at same time.

    Returns:
        str: jobid of clean up job.
    """
    extra_args = extra_args or ""
    assert system_settings.BASE_STORAGE_DIRECTORY

    root = join(
        system_settings.BASE_STORAGE_DIRECTORY,
        ".runs",
        getuser(),
        datetime.now(system_settings.TIME_ZONE).isoformat(),
    )

    os.makedirs(root, exist_ok=True)
    jobname += " | rundir: {}".format(root)
    total = len(commands)
    index = 0

    for command, exit_command in commands:
        index += 1
        rundir = abspath(dirname(command))

        with open(join(root, "in.%s" % index), "w") as f:
            # use random sleep to avoid parallel API hits
            f.write(f"sleep {random.uniform(0, 10):.3} && bash {command}")

        with open(join(root, "exit_cmd.%s" % index), "w") as f:
            f.write(exit_command)

        for j in "log", "err", "exit":
            src = join(rundir, "head_job.{}".format(j))
            dst = join(root, "{}.{}".format(j, index))
            open(src, "w").close()
            utils.force_symlink(src, dst)

    # submit array of commands
    cmd = (f"bsub {requirements} {extra_args} "
           f'-J "ISABL | {jobname}[1-{total}]%{throttle_by}" '
           f'-oo "{root}/log.%I" -eo "{root}/err.%I" -i "{root}/in.%I" bash')

    jobid = subprocess.check_output(cmd, shell=True).decode("utf-8")
    jobid = re.findall("<(.*?)>", jobid)[0]

    # submit array of exit commands
    cmd = (
        f'bsub -W 15 -J "EXIT | {jobname}[1-{total}]" -ti -o "{root}/exit.%I" '
        f'-w "exit({jobid}[*])" -i "{root}/exit_cmd.%I" bash ')

    jobid = subprocess.check_output(cmd, shell=True).decode("utf-8")
    jobid = re.findall("<(.*?)>", jobid)[0]

    # clean the execution directory
    cmd = f'bsub -J "CLEAN | {jobname}" -w "ended({jobid})" -ti rm -r {root}'
    jobid = subprocess.check_output(cmd, shell=True).decode("utf-8")
    jobid = re.findall("<(.*?)>", jobid)[0]

    return jobid