Ejemplo n.º 1
0
def cleanup_workflow(
    configs: dict,
    sample_outdir: Path,
    sample_id: str,
    afterok: list,
    slurm_api: SlurmAPI,
    dry_run: bool = False,
) -> int:
    """Run the workflow to compress  an analysis folder"""

    out_dir = configs["out"]
    cleanup_cmd = get_cleanup_cmd(
        out_dir=out_dir,
        sample_outdir=sample_outdir,
        sample_id=sample_id,
    )

    singularity = singularity_base(configs["singularity"], configs["out"],
                                   configs["project"],
                                   configs["singularity_bind"])

    multiqc_cmd = get_multiqc_cmd(singularity=singularity,
                                  input_dir=sample_outdir,
                                  out_dir=sample_outdir)

    jobid = slurm_api.run_job(
        name=f"cleanup-{sample_id}",
        command="\n".join([multiqc_cmd, cleanup_cmd]),
        afterok=afterok,
        dry_run=dry_run,
    )

    return jobid
Ejemplo n.º 2
0
def preface_predict_workflow(
    configs: dict,
    sample_id: str,
    afterok: int,
    slurm_api: SlurmAPI,
    dry_run: bool = False,
):
    """Run the preface predict workflow"""
    LOG.info("Running the preface predict workflow")
    out_dir = configs["out"]

    singularity = singularity_base(configs["singularity"], configs["out"],
                                   configs["project"],
                                   configs["singularity_bind"])

    preface_predict_cmd = get_preface_predict_cmd(
        singularity=singularity,
        out_dir=out_dir,
        model_dir=configs["preface"]["model_dir"],
        sample_id=sample_id,
    )

    jobid = slurm_api.run_job(
        name=f"preface_predict-{sample_id}",
        command=preface_predict_cmd,
        afterok=[afterok],
        dry_run=dry_run,
    )

    return jobid
Ejemplo n.º 3
0
def make_reference(samples: Iterator[dict],
                   configs: dict,
                   slurm_api: SlurmAPI,
                   dry_run: bool = None) -> int:
    """Create a reference based on some samples"""
    out_dir = configs["out"]
    jobids = []
    for sample in samples:

        sample_id = sample["sample_id"]
        sample_outdir = configs["out"] / sample_id

        # This will fail if dir already exists
        if not dry_run:
            sample_outdir.mkdir(parents=True)

        slurm_api.slurm_settings["ntasks"] = configs["align"]["ntasks"]
        slurm_api.slurm_settings["mem"] = configs["align"]["mem"]

        align_jobid = align_individual(configs=configs,
                                       sample=sample,
                                       slurm_api=slurm_api,
                                       dry_run=dry_run)
        jobids.append(align_jobid)

        slurm_api.slurm_settings["ntasks"] = configs["slurm"]["ntasks"]
        slurm_api.slurm_settings["mem"] = configs["slurm"]["mem"]

    singularity = singularity_base(configs["singularity"], configs["out"],
                                   configs["project"],
                                   configs["singularity_bind"])

    mkref_cmd = get_mkref_cmd(
        singularity=singularity,
        out=str(out_dir),
        testbinsize=configs["wisecondorx"]["testbinsize"],
        prefacebinsize=configs["wisecondorx"]["prefacebinsize"],
    )

    jobid = slurm_api.run_job(
        name="wcxmkref",
        command=mkref_cmd,
        afterok=jobids,
        dry_run=dry_run,
    )

    slurm_api.slurm_settings["time"] = "1:00:00"
    pipe_complete(configs=configs,
                  afterok=jobid,
                  slurm_api=slurm_api,
                  dry_run=dry_run)
    pipe_fail(configs=configs,
              slurm_api=slurm_api,
              dry_run=dry_run,
              afternotok=jobid)

    return jobid
Ejemplo n.º 4
0
def align_and_convert_paired_end(config: dict, fastq: list, out: Path,
                                 sample_id: str) -> str:
    """create a command for running bwa and wisecondorX convert (paired end)"""

    singularity = singularity_base(
        config["singularity"],
        config["out"],
        config["project"],
        config["singularity_bind"],
    )

    out_prefix = get_outprefix(out, sample_id)
    aln_r1 = get_align_command(
        singularity=singularity,
        sample_id=sample_id,
        tmp_dir=config["align"]["tmpdir"],
        threads=config["align"]["ntasks"],
        reference=config["reference"],
        fastq=fastq[0],
        out_dir=str(out),
        out_prefix=out_prefix,
        read="r1",
    )

    aln_r2 = get_align_command(
        singularity=singularity,
        sample_id=sample_id,
        tmp_dir=config["align"]["tmpdir"],
        reference=config["reference"],
        threads=config["align"]["ntasks"],
        fastq=fastq[1],
        out_dir=str(out),
        out_prefix=out_prefix,
        read="r2",
    )

    sampe_cmd = get_sampe_command(
        singularity=singularity,
        reference=config["reference"],
        threads=config["align"]["ntasks"],
        fastq1=fastq[0],
        fastq2=fastq[1],
        out_prefix=out_prefix,
    )

    bamsormadup_cmd = get_bamsormadup_command(
        singularity=singularity,
        tmp_dir=config["align"]["tmpdir"],
        out_prefix=out_prefix,
    )

    sampe = " \n ".join([sampe_cmd, bamsormadup_cmd])

    convert = get_convert_cmd(singularity=singularity, out_prefix=out_prefix)

    return "\n".join([aln_r1, aln_r2, sampe, convert])
Ejemplo n.º 5
0
def wisecondor_x_test(configs: dict, out_dir: Path, sample_id: str) -> str:
    """Get the commands for running the wisecondor chromosome X test"""
    out_prefix = out_dir / sample_id / sample_id
    singularity = singularity_base(
        configs["singularity"],
        configs["out"],
        configs["project"],
        configs["singularity_bind"],
    )
    blacklist = configs["wisecondorx"]["blacklist"]
    zscore = str(configs["wisecondorx"]["zscore"])

    wisecondor_test_cmd = get_predict_cmd(
        singularity=singularity,
        out_prefix=out_prefix,
        reference=configs["wisecondorx"]["reftest"],
        blacklist=blacklist,
        zscore=zscore,
    )

    wisecondor_preface_cmd = get_predict_cmd(
        singularity=singularity,
        out_prefix=out_prefix,
        reference=configs["wisecondorx"]["refpreface"],
        blacklist=blacklist,
        zscore=zscore,
        preface=True,
    )

    wisecondor_gender_cmd = get_gender_cmd(
        singularity=singularity,
        out_prefix=out_prefix,
        reference=configs["wisecondorx"]["reftest"],
    )

    return "\n".join(
        [wisecondor_test_cmd, wisecondor_preface_cmd, wisecondor_gender_cmd])
Ejemplo n.º 6
0
def picard_qc(configs: dict, out_dir: Path, sample_id: str) -> str:
    """Get a string with pipeline steps to run picard qc"""
    out_prefix = out_dir / sample_id / sample_id
    reference = configs["reference"]
    javasettings = configs["picard"]["javasettings"]

    singularity = singularity_base(
        configs["singularity"],
        configs["out"],
        configs["project"],
        configs["singularity_bind"],
    )

    gc_bias_cmd = get_collect_gc_bias_cmd(
        singularity=singularity,
        out_prefix=str(out_prefix),
        reference=reference,
        javasettings=javasettings,
        tmp_dir=str(configs["align"]["tmpdir"]),
    )

    insert_size_cmd = get_collect_insert_size_cmd(
        singularity=singularity,
        out_prefix=str(out_prefix),
        javasettings=javasettings,
        tmp_dir=str(configs["align"]["tmpdir"]),
    )

    estimate_complexity_cmd = get_estimate_complexity_cmd(
        singularity=singularity,
        out_prefix=str(out_prefix),
        javasettings=javasettings,
        tmp_dir=str(configs["align"]["tmpdir"]),
    )

    return "\n".join([gc_bias_cmd, insert_size_cmd, estimate_complexity_cmd])
Ejemplo n.º 7
0
def amycne_ffy(configs: dict, out_dir: Path, sample_id: str) -> str:
    """fetal fraction estimation using tiddit and AMYCNE"""
    out_prefix = out_dir / sample_id / sample_id
    path_gc_tab = out_dir / sample_id / ".".join([sample_id, "gc.tab"])

    singularity = singularity_base(
        configs["singularity"],
        configs["out"],
        configs["project"],
        configs["singularity_bind"],
    )

    # Calculate coverage bins with tiddit
    tiddit_cmd = get_tiddit_cmd(
        singularity=singularity,
        out_prefix=str(out_prefix),
        binsize=configs["tiddit"]["binsize"],
    )

    # Calculate bins with GC and quality filtering
    gc_tab_cmd = get_gctab_cmd(
        singularity=singularity,
        reference=configs["reference"],
        binsize=configs["tiddit"]["binsize"],
        path_gc_tab=str(path_gc_tab),
    )

    amycne_cmd = run_amycne_cmd(
        singularity=singularity,
        out_prefix=str(out_prefix),
        path_gc_tab=str(path_gc_tab),
        minq=configs["amycne"]["minq"],
        slope=configs["amycne"]["coefficient"],
        intercept=configs["amycne"]["intercept"],
    )
    return "\n".join([tiddit_cmd, gc_tab_cmd, amycne_cmd])
Ejemplo n.º 8
0
def summarize_workflow(
    configs: dict,
    afterok: list,
    slurm_api: SlurmAPI,
    dry_run: bool = False,
    batch_ref: bool = False,
    two_pass: bool = False,
) -> int:
    """Run the workflow to summarize an analysis"""
    LOG.info("Run the summarize workflow")
    out_dir = configs["out"]
    project_id = configs["project_id"]

    singularity = singularity_base(configs["singularity"], configs["out"],
                                   configs["project"],
                                   configs["singularity_bind"])

    wd = os.path.dirname(os.path.realpath(__file__)).replace(
        "fluffy/workflows", "fluffy/scripts")

    if not two_pass:
        multiqc_cmd = get_multiqc_cmd(singularity=singularity,
                                      input_dir=out_dir,
                                      out_dir=out_dir)
        if batch_ref:

            outfile = out_dir / f"{project_id}.2pass.csv"
            summarize_cmd = get_summarize_cmd(
                singularity=singularity,
                out_dir=out_dir,
                outfile=outfile,
                project_id=configs["project_id"],
                sample_sheet=configs["sample_sheet"],
                zscore=configs["summary"]["zscore"],
                mincnv=configs["summary"]["mincnv"],
                maxgcd=configs["summary"]["maxGCD"],
                maxatd=configs["summary"]["maxATD"],
                maxbin2bin=configs["summary"]["maxbin2bin"],
                maxdup=configs["summary"]["maxdup"],
                minreads=configs["summary"]["minreads"])

            merge_cmd = get_merge_cmd(out_dir, configs["project_id"], wd)
            command_str = f"{multiqc_cmd}\n{summarize_cmd}\n{merge_cmd}"

        else:
            outfile = out_dir / f"{project_id}.csv"
            summarize_cmd = get_summarize_cmd(
                singularity=singularity,
                out_dir=out_dir,
                outfile=outfile,
                project_id=configs["project_id"],
                sample_sheet=configs["sample_sheet"],
                zscore=configs["summary"]["zscore"],
                mincnv=configs["summary"]["mincnv"],
                maxgcd=configs["summary"]["maxGCD"],
                maxatd=configs["summary"]["maxATD"],
                maxbin2bin=configs["summary"]["maxbin2bin"],
                maxdup=configs["summary"]["maxdup"],
                minreads=configs["summary"]["minreads"])
            command_str = f"{multiqc_cmd}\n{summarize_cmd}"

    else:
        outfile = out_dir / f"{project_id}.1pass.csv"
        summarize_cmd = get_summarize_cmd(
            singularity=singularity,
            out_dir=out_dir,
            outfile=outfile,
            project_id=configs["project_id"],
            sample_sheet=configs["sample_sheet"],
            zscore=configs["summary"]["zscore"],
            mincnv=configs["summary"]["mincnv"],
            maxgcd=configs["summary"]["maxGCD"],
            maxatd=configs["summary"]["maxATD"],
            maxbin2bin=configs["summary"]["maxbin2bin"],
            maxdup=configs["summary"]["maxdup"],
            minreads=configs["summary"]["minreads"])

        build_two_pass_ref = get_two_pass_ref_cmd(
            singularity, out_dir, configs["project_id"], wd,
            configs["wisecondorx"]["testbinsize"],
            configs["wisecondorx"]["prefacebinsize"])
        command_str = f"{summarize_cmd}\n{build_two_pass_ref}"

    jobid = slurm_api.run_job(
        name=f"summarize_batch",
        command=command_str,
        afterok=afterok,
        dry_run=dry_run,
    )

    return jobid
Ejemplo n.º 9
0
def analyse_workflow(
    samples: Iterator[dict],
    configs: dict,
    slurm_api: SlurmAPI,
    skip_preface: bool = False,
    dry_run: bool = False,
    batch_ref: bool = True,
) -> int:
    """Run the wisecondor analysis"""
    jobids = []
    sample_jobids = {}

    for sample in samples:
        sample_id = sample["sample_id"]
        sample_jobids[sample_id] = []
        sample_outdir = configs["out"] / sample_id
        # This will fail if dir already exists
        if not dry_run:
            sample_outdir.mkdir(parents=True)

        slurm_api.slurm_settings["ntasks"] = configs["align"]["ntasks"]
        slurm_api.slurm_settings["mem"] = configs["align"]["mem"]

        align_jobid = align_individual(
            configs=configs,
            sample=sample,
            slurm_api=slurm_api,
            dry_run=dry_run,
        )
        jobids.append(align_jobid)
        sample_jobids[sample_id].append(align_jobid)

    if batch_ref:
        binsize_test = configs["wisecondorx"]["testbinsize"]
        binsize_preface = configs["wisecondorx"]["prefacebinsize"]
        out_dir = configs["out"]

        configs["wisecondorx"][
            "reftest"] = f"{str(out_dir).rstrip('/')}.wcxref.{binsize_test}.npz"
        configs["wisecondorx"][
            "refpreface"] = f"{str(out_dir).rstrip('/')}.wcxref.{binsize_preface}.npz"

        singularity = singularity_base(configs["singularity"], configs["out"],
                                       configs["project"],
                                       configs["singularity_bind"])

        mkref_cmd = get_mkref_cmd(
            singularity=singularity,
            out=str(out_dir),
            testbinsize=configs["wisecondorx"]["testbinsize"],
            prefacebinsize=configs["wisecondorx"]["prefacebinsize"],
        )

        make_ref_jobid = slurm_api.run_job(
            name="wcxmkref",
            command=mkref_cmd,
            afterok=jobids,
            dry_run=dry_run,
        )

        for sample in samples:
            sample_id = sample["sample_id"]
            sample_jobids[sample_id].append(make_ref_jobid)

        first_pass_jobid, jobids, slurm_api = run_analysis(
            samples=samples,
            sample_jobids=sample_jobids,
            configs=configs,
            slurm_api=slurm_api,
            skip_preface=skip_preface,
            dry_run=dry_run,
            batch_ref=batch_ref,
            jobids=jobids,
            two_pass=True)

        for sample in samples:
            sample_id = sample["sample_id"]
            sample_jobids[sample_id].append(first_pass_jobid)

    summarize_jobid, jobids, slurm_api = run_analysis(
        samples=samples,
        sample_jobids=sample_jobids,
        configs=configs,
        slurm_api=slurm_api,
        skip_preface=skip_preface,
        dry_run=dry_run,
        batch_ref=batch_ref,
        jobids=jobids,
        two_pass=False)

    slurm_api.print_submitted_jobs()
    slurm_api.slurm_settings["time"] = "1:00:00"
    pipe_complete(configs=configs,
                  afterok=summarize_jobid,
                  slurm_api=slurm_api,
                  dry_run=dry_run)

    pipe_fail(configs=configs,
              slurm_api=slurm_api,
              dry_run=dry_run,
              afternotok=summarize_jobid)

    return summarize_jobid