Ejemplo n.º 1
0
def _run_on_chrom(chrom, work_bams, names, work_dir, items):
    """Run cn.mops on work BAMs for a specific chromosome.
    """
    local_sitelib = os.path.join(install.get_defaults().get("tooldir", "/usr/local"),
                                 "lib", "R", "site-library")
    batch = sshared.get_cur_batch(items)
    ext = "-%s-cnv" % batch if batch else "-cnv"
    out_file = os.path.join(work_dir, "%s%s-%s.bed" % (os.path.splitext(os.path.basename(work_bams[0]))[0],
                                                       ext, chrom if chrom else "all"))
    if not utils.file_exists(out_file):
        with file_transaction(items[0], out_file) as tx_out_file:
            rcode = "%s-run.R" % os.path.splitext(out_file)[0]
            with open(rcode, "w") as out_handle:
                out_handle.write(_script.format(prep_str=_prep_load_script(work_bams, names, chrom, items),
                                                out_file=tx_out_file,
                                                local_sitelib=local_sitelib))
            rscript = config_utils.get_program("Rscript", items[0]["config"])
            try:
                do.run([rscript, rcode], "cn.mops CNV detection", items[0], log_error=False)
            except subprocess.CalledProcessError, msg:
                # cn.mops errors out if no CNVs found. Just write an empty file.
                if _allowed_cnmops_errorstates(str(msg)):
                    with open(tx_out_file, "w") as out_handle:
                        out_handle.write('track name=empty description="No CNVs found"\n')
                else:
                    logger.exception()
                    raise
Ejemplo n.º 2
0
def _run_lumpy(full_bams, sr_bams, disc_bams, work_dir, items):
    """Run lumpy-sv, using speedseq pipeline.
    """
    batch = sshared.get_cur_batch(items)
    ext = "-%s-svs" % batch if batch else "-svs"
    out_file = os.path.join(
        work_dir, "%s%s.vcf" %
        (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext))
    sv_exclude_bed = sshared.prepare_exclude_file(items, out_file)
    if not utils.file_exists(out_file):
        with file_transaction(items[0], out_file) as tx_out_file:
            with tx_tmpdir(items[0]) as tmpdir:
                full_bams = ",".join(full_bams)
                sr_bams = ",".join(sr_bams)
                disc_bams = ",".join(disc_bams)
                exclude = "-x %s" % sv_exclude_bed if utils.file_exists(
                    sv_exclude_bed) else ""
                ref_file = dd.get_ref_file(items[0])
                # use our bcbio python for runs within lumpyexpress
                curpython_dir = os.path.dirname(sys.executable)
                cmd = (
                    "export PATH={curpython_dir}:$PATH && "
                    "lumpyexpress -v -B {full_bams} -S {sr_bams} -D {disc_bams} "
                    "{exclude} -T {tmpdir} -o {tx_out_file}")
                do.run(cmd.format(**locals()), "lumpyexpress", items[0])
    return vcfutils.sort_by_ref(out_file, items[0]), sv_exclude_bed
Ejemplo n.º 3
0
def _run_delly(bam_files, chrom, ref_file, work_dir, items):
    """Run delly, calling structural variations for the specified type.
    """
    batch = sshared.get_cur_batch(items)
    ext = "-%s-svs" % batch if batch else "-svs"
    out_file = os.path.join(
        work_dir, "%s%s-%s.bcf" %
        (os.path.splitext(os.path.basename(bam_files[0]))[0], ext, chrom))
    final_file = "%s.vcf.gz" % (utils.splitext_plus(out_file)[0])
    cores = min(
        utils.get_in(items[0], ("config", "algorithm", "num_cores"), 1),
        len(bam_files))
    if not utils.file_exists(out_file) and not utils.file_exists(final_file):
        with file_transaction(items[0], out_file) as tx_out_file:
            if sshared.has_variant_regions(items, out_file, chrom):
                exclude = ["-x", _delly_exclude_file(items, out_file, chrom)]
                cmd = ["delly", "call", "-g", ref_file, "-o", tx_out_file
                       ] + exclude + bam_files
                multi_cmd = "export OMP_NUM_THREADS=%s && export LC_ALL=C && " % cores
                try:
                    do.run(multi_cmd + " ".join(cmd),
                           "delly structural variant")
                except subprocess.CalledProcessError as msg:
                    # delly returns an error exit code if there are no variants
                    if "No structural variants found" not in str(msg):
                        raise
    return [_bgzip_and_clean(out_file, items)]
Ejemplo n.º 4
0
def _run_delly(bam_files, chrom, sv_type, ref_file, work_dir, items):
    """Run delly, calling structural variations for the specified type.
    """
    batch = sshared.get_cur_batch(items)
    ext = "-%s-svs" % batch if batch else "-svs"
    out_file = os.path.join(work_dir, "%s%s-%s-%s.vcf"
                            % (os.path.splitext(os.path.basename(bam_files[0]))[0], ext, chrom, sv_type))
    cores = min(utils.get_in(items[0], ("config", "algorithm", "num_cores"), 1),
                len(bam_files))
    if not utils.file_exists(out_file):
        with file_transaction(items[0], out_file) as tx_out_file:
            names = [tz.get_in(["rgnames", "sample"], x) for x in items]
            if not sshared.has_variant_regions(items, out_file, chrom):
                vcfutils.write_empty_vcf(tx_out_file, samples=names)
            else:
                exclude = ["-x", _delly_exclude_file(items, out_file, chrom)]
                map_qual = ["--map-qual", "1"]
                cmd = ["delly", "-t", sv_type, "-g", ref_file, "-o", tx_out_file] + exclude + bam_files
                multi_cmd = "export OMP_NUM_THREADS=%s && export LC_ALL=C && " % cores
                try:
                    do.run(multi_cmd + " ".join(cmd), "delly structural variant")
                    # Delly will write nothing if no variants found
                    if not utils.file_exists(tx_out_file):
                        vcfutils.write_empty_vcf(tx_out_file, samples=names)
                except subprocess.CalledProcessError, msg:
                    # delly returns an error exit code if there are no variants
                    if "No structural variants found" in str(msg):
                        vcfutils.write_empty_vcf(tx_out_file, samples=names)
                    else:
                        raise
Ejemplo n.º 5
0
def _run_lumpy(full_bams, sr_bams, disc_bams, previous_evidence, work_dir, items):
    """Run lumpy-sv, using speedseq pipeline.
    """
    batch = sshared.get_cur_batch(items)
    ext = "-%s-svs" % batch if batch else "-svs"
    out_file = os.path.join(work_dir, "%s%s.vcf"
                            % (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext))
    sv_exclude_bed = sshared.prepare_exclude_file(items, out_file)
    if not utils.file_exists(out_file):
        with file_transaction(items[0], out_file) as tx_out_file:
            with tx_tmpdir(items[0]) as tmpdir:
                full_bams = ",".join(full_bams)
                sr_bams = ",".join(sr_bams)
                disc_bams = ",".join(disc_bams)
                exclude = "-x %s" % sv_exclude_bed if (sv_exclude_bed and utils.file_exists(sv_exclude_bed)) else ""
                ref_file = dd.get_ref_file(items[0])
                depths = []
                for sample, ev_files in previous_evidence.items():
                    for ev_type, ev_file in ev_files.items():
                        if utils.file_exists(ev_file):
                            depths.append("%s:%s" % (sample, ev_file))
                depth_arg = "-d %s" % ",".join(depths) if len(depths) > 0 else ""
                # use our bcbio python for runs within lumpyexpress
                exports = utils.local_path_export()
                cmd = ("{exports}lumpyexpress -v -B {full_bams} -S {sr_bams} -D {disc_bams} "
                       "{exclude} {depth_arg} -T {tmpdir} -o {tx_out_file}")
                do.run(cmd.format(**locals()), "lumpyexpress", items[0])
    return vcfutils.sort_by_ref(out_file, items[0]), sv_exclude_bed
Ejemplo n.º 6
0
def _run_lumpy(full_bams, sr_bams, disc_bams, previous_evidence, work_dir,
               items):
    """Run lumpy-sv, using speedseq pipeline.
    """
    batch = sshared.get_cur_batch(items)
    ext = "-%s-svs" % batch if batch else "-svs"
    out_file = os.path.join(
        work_dir, "%s%s.vcf" %
        (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext))
    sv_exclude_bed = sshared.prepare_exclude_file(items, out_file)
    if not utils.file_exists(out_file):
        with file_transaction(items[0], out_file) as tx_out_file:
            with tx_tmpdir(items[0]) as tmpdir:
                full_bams = ",".join(full_bams)
                sr_bams = ",".join(sr_bams)
                disc_bams = ",".join(disc_bams)
                exclude = "-x %s" % sv_exclude_bed if (
                    sv_exclude_bed
                    and utils.file_exists(sv_exclude_bed)) else ""
                ref_file = dd.get_ref_file(items[0])
                depths = []
                for sample, ev_files in previous_evidence.items():
                    for ev_type, ev_file in ev_files.items():
                        if utils.file_exists(ev_file):
                            depths.append("%s:%s" % (sample, ev_file))
                depth_arg = "-d %s" % ",".join(depths) if len(
                    depths) > 0 else ""
                # use our bcbio python for runs within lumpyexpress
                exports = utils.local_path_export()
                cmd = (
                    "{exports}lumpyexpress -v -B {full_bams} -S {sr_bams} -D {disc_bams} "
                    "{exclude} {depth_arg} -T {tmpdir} -o {tx_out_file}")
                do.run(cmd.format(**locals()), "lumpyexpress", items[0])
    return vcfutils.sort_by_ref(out_file, items[0]), sv_exclude_bed
Ejemplo n.º 7
0
def _run_on_chrom(chrom, work_bams, names, work_dir, items):
    """Run cn.mops on work BAMs for a specific chromosome.
    """
    local_sitelib = utils.R_sitelib()
    batch = sshared.get_cur_batch(items)
    ext = "-%s-cnv" % batch if batch else "-cnv"
    out_file = os.path.join(
        work_dir,
        "%s%s-%s.bed" % (os.path.splitext(os.path.basename(
            work_bams[0]))[0], ext, chrom if chrom else "all"))
    if not utils.file_exists(out_file):
        with file_transaction(items[0], out_file) as tx_out_file:
            rcode = "%s-run.R" % os.path.splitext(out_file)[0]
            with open(rcode, "w") as out_handle:
                out_handle.write(
                    _script.format(prep_str=_prep_load_script(
                        work_bams, names, chrom, items),
                                   out_file=tx_out_file,
                                   local_sitelib=local_sitelib))
            rscript = utils.Rscript_cmd()
            try:
                do.run([rscript, "--vanilla", rcode],
                       "cn.mops CNV detection",
                       items[0],
                       log_error=False)
            except subprocess.CalledProcessError as msg:
                # cn.mops errors out if no CNVs found. Just write an empty file.
                if _allowed_cnmops_errorstates(str(msg)):
                    with open(tx_out_file, "w") as out_handle:
                        out_handle.write(
                            'track name=empty description="No CNVs found"\n')
                else:
                    logger.exception()
                    raise
    return [out_file]
Ejemplo n.º 8
0
def _run_delly(bam_files, chrom, sv_type, ref_file, work_dir, items):
    """Run delly, calling structural variations for the specified type.
    """
    batch = sshared.get_cur_batch(items)
    ext = "-%s-svs" % batch if batch else "-svs"
    out_file = os.path.join(work_dir, "%s%s-%s-%s.bcf"
                            % (os.path.splitext(os.path.basename(bam_files[0]))[0], ext, chrom, sv_type))
    final_file = "%s.vcf.gz" % (utils.splitext_plus(out_file)[0])
    cores = min(utils.get_in(items[0], ("config", "algorithm", "num_cores"), 1),
                len(bam_files))
    if not utils.file_exists(out_file) and not utils.file_exists(final_file):
        with file_transaction(items[0], out_file) as tx_out_file:
            if sshared.has_variant_regions(items, out_file, chrom):
                exclude = ["-x", _delly_exclude_file(items, out_file, chrom)]
                # uses -n to skip small indel detection for speed, not yet optimized:
                # https://github.com/dellytools/delly/issues/36
                cmd = ["delly", "call", "-n", "-t", sv_type, "-g", ref_file, "-o", tx_out_file] + exclude + bam_files
                multi_cmd = "export OMP_NUM_THREADS=%s && export LC_ALL=C && " % cores
                try:
                    do.run(multi_cmd + " ".join(cmd), "delly structural variant")
                except subprocess.CalledProcessError as msg:
                    # delly returns an error exit code if there are no variants
                    if "No structural variants found" not in str(msg):
                        raise
    return [_bgzip_and_clean(out_file, items)]
Ejemplo n.º 9
0
def _run_delly(bam_files, chrom, ref_file, work_dir, items):
    """Run delly, calling structural variations for the specified type.
    """
    batch = sshared.get_cur_batch(items)
    ext = "-%s-svs" % batch if batch else "-svs"
    out_file = os.path.join(work_dir, "%s%s-%s.bcf"
                            % (os.path.splitext(os.path.basename(bam_files[0]))[0], ext, chrom))
    final_file = "%s.vcf.gz" % (utils.splitext_plus(out_file)[0])
    cores = min(utils.get_in(items[0], ("config", "algorithm", "num_cores"), 1),
                len(bam_files))
    if not utils.file_exists(out_file) and not utils.file_exists(final_file):
        with file_transaction(items[0], out_file) as tx_out_file:
            if sshared.has_variant_regions(items, out_file, chrom):
                exclude = ["-x", _delly_exclude_file(items, out_file, chrom)]
                cmd = ["delly", "call", "-g", ref_file, "-o", tx_out_file] + exclude + bam_files
                multi_cmd = "export OMP_NUM_THREADS=%s && export LC_ALL=C && " % cores
                try:
                    do.run(multi_cmd + " ".join(cmd), "delly structural variant")
                except subprocess.CalledProcessError as msg:
                    # Small input samples, write an empty vcf
                    if "Sample has not enough data to estimate library parameters" in str(msg):
                        pass
                    # delly returns an error exit code if there are no variants
                    elif "No structural variants found" not in str(msg):
                        raise
    return [_bgzip_and_clean(out_file, items)]
Ejemplo n.º 10
0
def _run_smoove(full_bams, sr_bams, disc_bams, work_dir, items):
    """Run lumpy-sv using smoove.
    """
    batch = sshared.get_cur_batch(items)
    ext = "-%s-svs" % batch if batch else "-svs"
    name = "%s%s" % (dd.get_sample_name(items[0]), ext)
    out_file = os.path.join(work_dir, "%s-smoove.genotyped.vcf.gz" % name)
    sv_exclude_bed = sshared.prepare_exclude_file(items, out_file)
    old_out_file = os.path.join(work_dir, "%s%s-prep.vcf.gz"
                                % (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext))
    if utils.file_exists(old_out_file):
        return old_out_file, sv_exclude_bed
    if not utils.file_exists(out_file):
        with file_transaction(items[0], out_file) as tx_out_file:
            cores = dd.get_num_cores(items[0])
            out_dir = os.path.dirname(tx_out_file)
            ref_file = dd.get_ref_file(items[0])
            full_bams = " ".join(_prepare_smoove_bams(full_bams, sr_bams, disc_bams, items,
                                                      os.path.dirname(tx_out_file)))
            std_excludes = ["~^GL", "~^HLA", "~_random", "~^chrUn", "~alt", "~decoy"]
            def _is_std_exclude(n):
                clean_excludes = [x.replace("~", "").replace("^", "") for x in std_excludes]
                return any([n.startswith(x) or n.endswith(x) for x in clean_excludes])
            exclude_chrs = [c.name for c in ref.file_contigs(ref_file)
                            if not chromhacks.is_nonalt(c.name) and not _is_std_exclude(c.name)]
            exclude_chrs = "--excludechroms '%s'" % ",".join(std_excludes + exclude_chrs)
            exclude_bed = ("--exclude %s" % sv_exclude_bed) if utils.file_exists(sv_exclude_bed) else ""
            tempdir = os.path.dirname(tx_out_file)
            cmd = ("export TMPDIR={tempdir} && "
                   "smoove call --processes {cores} --genotype --removepr --fasta {ref_file} "
                   "--name {name} --outdir {out_dir} "
                   "{exclude_bed} {exclude_chrs} {full_bams}")
            with utils.chdir(tempdir):
                try:
                    do.run(cmd.format(**locals()), "smoove lumpy calling", items[0])
                except subprocess.CalledProcessError as msg:
                    if _allowed_errors(str(msg)):
                        vcfutils.write_empty_vcf(tx_out_file, config=items[0]["config"],
                                                samples=[dd.get_sample_name(d) for d in items])
                    else:
                        logger.exception()
                        raise
    vcfutils.bgzip_and_index(out_file, items[0]["config"])
    return out_file, sv_exclude_bed
Ejemplo n.º 11
0
def _run_smoove(full_bams, sr_bams, disc_bams, work_dir, items):
    """Run lumpy-sv using smoove.
    """
    batch = sshared.get_cur_batch(items)
    ext = "-%s-svs" % batch if batch else "-svs"
    name = "%s%s" % (dd.get_sample_name(items[0]), ext)
    out_file = os.path.join(work_dir, "%s-smoove.genotyped.vcf.gz" % name)
    sv_exclude_bed = sshared.prepare_exclude_file(items, out_file)
    old_out_file = os.path.join(work_dir, "%s%s-prep.vcf.gz"
                                % (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext))
    if utils.file_exists(old_out_file):
        return old_out_file, sv_exclude_bed
    if not utils.file_exists(out_file):
        with file_transaction(items[0], out_file) as tx_out_file:
            cores = dd.get_num_cores(items[0])
            out_dir = os.path.dirname(tx_out_file)
            ref_file = dd.get_ref_file(items[0])
            full_bams = " ".join(_prepare_smoove_bams(full_bams, sr_bams, disc_bams, items,
                                                      os.path.dirname(tx_out_file)))
            std_excludes = ["~^GL", "~^HLA", "~_random", "~^chrUn", "~alt", "~decoy"]
            def _is_std_exclude(n):
                clean_excludes = [x.replace("~", "").replace("^", "") for x in std_excludes]
                return any([n.startswith(x) or n.endswith(x) for x in clean_excludes])
            exclude_chrs = [c.name for c in ref.file_contigs(ref_file)
                            if not chromhacks.is_nonalt(c.name) and not _is_std_exclude(c.name)]
            exclude_chrs = "--excludechroms '%s'" % ",".join(std_excludes + exclude_chrs)
            exclude_bed = ("--exclude %s" % sv_exclude_bed) if utils.file_exists(sv_exclude_bed) else ""
            tempdir = os.path.dirname(tx_out_file)
            cmd = ("export TMPDIR={tempdir} && "
                   "smoove call --processes {cores} --genotype --removepr --fasta {ref_file} "
                   "--name {name} --outdir {out_dir} "
                   "{exclude_bed} {exclude_chrs} {full_bams}")
            with utils.chdir(tempdir):
                try:
                    do.run(cmd.format(**locals()), "smoove lumpy calling", items[0])
                except subprocess.CalledProcessError as msg:
                    if _allowed_errors(msg):
                        vcfutils.write_empty_vcf(tx_out_file, config=items[0]["config"],
                                                 samples=[dd.get_sample_name(d) for d in items])
                    else:
                        logger.exception()
                        raise
    vcfutils.bgzip_and_index(out_file, items[0]["config"])
    return out_file, sv_exclude_bed
Ejemplo n.º 12
0
def _run_lumpy(full_bams, sr_bams, disc_bams, work_dir, items):
    """Run lumpy-sv, using speedseq pipeline.
    """
    batch = sshared.get_cur_batch(items)
    ext = "-%s-svs" % batch if batch else "-svs"
    out_file = os.path.join(work_dir, "%s%s.sv.bedpe"
                            % (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext))
    sv_exclude_bed = sshared.prepare_exclude_file(items, out_file)
    if not utils.file_exists(out_file):
        with file_transaction(out_file) as tx_out_file:
            with utils.curdir_tmpdir(items[0]) as tmpdir:
                out_base = tx_out_file.replace(".sv.bedpe", "")
                full_bams = ",".join(full_bams)
                sr_bams = ",".join(sr_bams)
                disc_bams = ",".join(disc_bams)
                exclude = "-x %s" % sv_exclude_bed if sv_exclude_bed else ""
                ref_file = dd.get_ref_file(items[0])
                cmd = ("speedseq sv -v -B {full_bams} -S {sr_bams} -D {disc_bams} -R {ref_file} "
                       "{exclude} -A false -T {tmpdir} -o {out_base}")
                do.run(cmd.format(**locals()), "speedseq lumpy", items[0])
    return out_file, sv_exclude_bed
Ejemplo n.º 13
0
def _run_lumpy(full_bams, sr_bams, disc_bams, work_dir, items):
    """Run lumpy-sv, using speedseq pipeline.
    """
    batch = sshared.get_cur_batch(items)
    ext = "-%s-svs" % batch if batch else "-svs"
    out_file = os.path.join(work_dir, "%s%s.sv.bedpe"
                            % (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext))
    sv_exclude_bed = sshared.prepare_exclude_file(items, out_file)
    if not utils.file_exists(out_file):
        with file_transaction(items[0], out_file) as tx_out_file:
            with tx_tmpdir(items[0]) as tmpdir:
                out_base = tx_out_file.replace(".sv.bedpe", "")
                full_bams = ",".join(full_bams)
                sr_bams = ",".join(sr_bams)
                disc_bams = ",".join(disc_bams)
                exclude = "-x %s" % sv_exclude_bed if sv_exclude_bed else ""
                ref_file = dd.get_ref_file(items[0])
                cmd = ("speedseq sv -v -B {full_bams} -S {sr_bams} -D {disc_bams} -R {ref_file} "
                       "{exclude} -A false -T {tmpdir} -o {out_base}")
                do.run(cmd.format(**locals()), "speedseq lumpy", items[0])
    return out_file, sv_exclude_bed
Ejemplo n.º 14
0
def _run_lumpy(full_bams, sr_bams, disc_bams, work_dir, items):
    """Run lumpy-sv, using speedseq pipeline.
    """
    batch = sshared.get_cur_batch(items)
    ext = "-%s-svs" % batch if batch else "-svs"
    out_file = os.path.join(work_dir, "%s%s.vcf"
                            % (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext))
    sv_exclude_bed = sshared.prepare_exclude_file(items, out_file)
    if not utils.file_exists(out_file):
        with file_transaction(items[0], out_file) as tx_out_file:
            with tx_tmpdir(items[0]) as tmpdir:
                full_bams = ",".join(full_bams)
                sr_bams = ",".join(sr_bams)
                disc_bams = ",".join(disc_bams)
                exclude = "-x %s" % sv_exclude_bed if utils.file_exists(sv_exclude_bed) else ""
                ref_file = dd.get_ref_file(items[0])
                # use our bcbio python for runs within lumpyexpress
                curpython_dir = os.path.dirname(sys.executable)
                cmd = ("export PATH={curpython_dir}:$PATH && "
                       "lumpyexpress -v -B {full_bams} -S {sr_bams} -D {disc_bams} "
                       "{exclude} -T {tmpdir} -o {tx_out_file}")
                do.run(cmd.format(**locals()), "lumpyexpress", items[0])
    return vcfutils.sort_by_ref(out_file, items[0]), sv_exclude_bed