Example #1
0
def variantcall_sample(data, region=None, align_bams=None, out_file=None):
    """Parallel entry point for doing genotyping of a region of a sample.
    """
    if out_file is None or not os.path.exists(out_file) or not os.path.lexists(
            out_file):
        utils.safe_makedir(os.path.dirname(out_file))
        sam_ref = data["sam_ref"]
        config = data["config"]
        caller_fns = get_variantcallers()
        caller_fn = caller_fns[config["algorithm"].get("variantcaller",
                                                       "gatk")]
        if len(align_bams) == 1:
            items = [data]
        else:
            items = multi.get_orig_items(data)
            assert len(items) == len(align_bams)
        call_file = "%s-raw%s" % utils.splitext_plus(out_file)
        call_file = caller_fn(align_bams, items, sam_ref,
                              data["genome_resources"]["variation"], region,
                              call_file)
        if data["config"]["algorithm"].get("phasing", False) == "gatk":
            call_file = phasing.read_backed_phasing(call_file, align_bams,
                                                    sam_ref, region, config)
        utils.symlink_plus(call_file, out_file)
    if region:
        data["region"] = region
    data["vrn_file"] = out_file
    return [data]
Example #2
0
def variantcall_sample(data, region=None, out_file=None):
    """Parallel entry point for doing genotyping of a region of a sample.
    """
    safe_makedir(os.path.dirname(out_file))
    sam_ref = data["sam_ref"]
    config = data["config"]
    caller_fns = get_variantcallers()
    caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")]
    if isinstance(data["work_bam"], basestring):
        align_bams = [data["work_bam"]]
        items = [data]
    else:
        align_bams = data["work_bam"]
        items = data["work_items"]
    call_file = "%s-raw%s" % os.path.splitext(out_file)
    call_file = caller_fn(align_bams, items, sam_ref,
                          data["genome_resources"]["variation"], region,
                          call_file)
    if data["config"]["algorithm"].get("phasing", False) == "gatk":
        call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref,
                                                region, config)
    utils.symlink_plus(call_file, out_file)
    if "work_items" in data:
        del data["work_items"]
    data["vrn_file"] = out_file
    return [data]
Example #3
0
def variantcall_sample(data, region=None, align_bams=None, out_file=None):
    """Parallel entry point for doing genotyping of a region of a sample.
    """
    if out_file is None or not os.path.exists(out_file) or not os.path.lexists(out_file):
        utils.safe_makedir(os.path.dirname(out_file))
        ref_file = dd.get_ref_file(data)
        config = data["config"]
        caller_fns = get_variantcallers()
        caller_fn = caller_fns[config["algorithm"].get("variantcaller")]
        if len(align_bams) == 1:
            items = [data]
        else:
            items = multi.get_orig_items(data)
            assert len(items) == len(align_bams)
        assoc_files = tz.get_in(("genome_resources", "variation"), data, {})
        if not assoc_files: assoc_files = {}
        for bam_file in align_bams:
            bam.index(bam_file, data["config"], check_timestamp=False)
        do_phasing = data["config"]["algorithm"].get("phasing", False)
        call_file = "%s-unphased%s" % utils.splitext_plus(out_file) if do_phasing else out_file
        call_file = caller_fn(align_bams, items, ref_file, assoc_files, region, call_file)
        if do_phasing == "gatk":
            call_file = phasing.read_backed_phasing(call_file, align_bams, ref_file, region, config)
            utils.symlink_plus(call_file, out_file)
    if region:
        data["region"] = region
    data["vrn_file"] = out_file
    return [data]
Example #4
0
def variantcall_sample(data, region=None, out_file=None):
    """Parallel entry point for doing genotyping of a region of a sample.
    """
    safe_makedir(os.path.dirname(out_file))
    sam_ref = data["sam_ref"]
    config = data["config"]
    caller_fns = get_variantcallers()
    caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")]
    if isinstance(data["work_bam"], basestring):
        align_bams = [data["work_bam"]]
        items = [data]
    else:
        align_bams = data["work_bam"]
        items = data["work_items"]
    call_file = "%s-raw%s" % os.path.splitext(out_file)
    call_file = caller_fn(align_bams, items, sam_ref,
                          data["genome_resources"]["variation"],
                          region, call_file)
    if data["config"]["algorithm"].get("phasing", False) == "gatk":
        call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config)
    for ext in ["", ".idx"]:
        if not os.path.exists(out_file + ext):
            if os.path.exists(call_file + ext):
                try:
                    os.symlink(call_file + ext, out_file + ext)
                except OSError, msg:
                    if  str(msg).find("File exists") == -1:
                        raise
Example #5
0
def variantcall_sample(data, region=None, align_bams=None, out_file=None):
    """Parallel entry point for doing genotyping of a region of a sample.
    """
    if out_file is None or not os.path.exists(out_file) or not os.path.lexists(out_file):
        utils.safe_makedir(os.path.dirname(out_file))
        sam_ref = data["sam_ref"]
        config = data["config"]
        caller_fns = get_variantcallers()
        caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")]
        if len(align_bams) == 1:
            items = [data]
        else:
            items = multi.get_orig_items(data)
            assert len(items) == len(align_bams)
        assoc_files = tz.get_in(("genome_resources", "variation"), data, {})
        if not assoc_files: assoc_files = {}
        for bam_file in align_bams:
            bam.index(bam_file, data["config"], check_timestamp=False)
        do_phasing = data["config"]["algorithm"].get("phasing", False)
        call_file = "%s-raw%s" % utils.splitext_plus(out_file) if do_phasing else out_file
        call_file = caller_fn(align_bams, items, sam_ref, assoc_files, region, call_file)
        if do_phasing == "gatk":
            call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config)
            utils.symlink_plus(call_file, out_file)
    if region:
        data["region"] = region
    data["vrn_file"] = out_file
    return [data]
Example #6
0
def variantcall_sample(data, region=None, out_file=None):
    """Parallel entry point for doing genotyping of a region of a sample.
    """
    safe_makedir(os.path.dirname(out_file))
    sam_ref = data["sam_ref"]
    config = data["config"]
    caller_fns = get_variantcallers()
    caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")]
    if isinstance(data["work_bam"], basestring):
        align_bams = [data["work_bam"]]
        items = [data]
    else:
        align_bams = data["work_bam"]
        items = data["work_items"]
    call_file = "%s-raw%s" % os.path.splitext(out_file)
    call_file = caller_fn(align_bams, items, sam_ref,
                          data["genome_resources"]["variation"],
                          region, call_file)
    if data["config"]["algorithm"].get("phasing", False) == "gatk":
        call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config)
    for ext in ["", ".idx"]:
        if not os.path.exists(out_file + ext):
            if os.path.exists(call_file + ext):
                try:
                    os.symlink(call_file + ext, out_file + ext)
                except OSError, msg:
                    if  str(msg).find("File exists") == -1:
                        raise
Example #7
0
def variantcall_sample(data, region=None, out_file=None):
    """Parallel entry point for doing genotyping of a region of a sample.
    """
    safe_makedir(os.path.dirname(out_file))
    sam_ref = data["sam_ref"]
    config = data["config"]
    caller_fns = get_variantcallers()
    caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")]
    if isinstance(data["work_bam"], basestring):
        align_bams = [data["work_bam"]]
        items = [data]
    else:
        align_bams = data["work_bam"]
        items = data["work_items"]
    call_file = "%s-raw%s" % os.path.splitext(out_file)
    call_file = caller_fn(align_bams, items, sam_ref,
                          data["genome_resources"]["variation"],
                          region, call_file)
    if data["config"]["algorithm"].get("phasing", False) == "gatk":
        call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config)
    utils.symlink_plus(call_file, out_file)
    if "work_items" in data:
        del data["work_items"]
    data["vrn_file"] = out_file
    return [data]
Example #8
0
def variantcall_sample(data, region=None, out_file=None):
    """Parallel entry point for doing genotyping of a region of a sample.
    """
    from bcbio.variation import freebayes, cortex, samtools, varscan
    safe_makedir(os.path.dirname(out_file))
    caller_fns = {
        "gatk": unified_genotyper,
        "gatk-haplotype": haplotype_caller,
        "freebayes": freebayes.run_freebayes,
        "cortex": cortex.run_cortex,
        "samtools": samtools.run_samtools,
        "varscan": varscan.run_varscan
    }
    sam_ref = data["sam_ref"]
    config = data["config"]
    caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")]
    if isinstance(data["work_bam"], basestring):
        align_bams = [data["work_bam"]]
    else:
        align_bams = data["work_bam"]
    call_file = "%s-raw%s" % os.path.splitext(out_file)
    caller_fn(align_bams, sam_ref, config,
              configured_ref_file("dbsnp", config, sam_ref), region, call_file)
    if data["config"]["algorithm"].get("phasing", False) == "gatk":
        call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref,
                                                region, config)
    if not os.path.exists(out_file):
        for ext in ["", ".idx"]:
            if os.path.exists(call_file + ext):
                os.symlink(call_file + ext, out_file + ext)
    data["vrn_file"] = out_file
    return [data]
Example #9
0
def variantcall_sample(data, region=None, out_file=None):
    """Parallel entry point for doing genotyping of a region of a sample.
    """
    from bcbio.variation import freebayes, cortex, samtools, varscan, mutect
    safe_makedir(os.path.dirname(out_file))
    caller_fns = {"gatk": unified_genotyper,
                  "gatk-haplotype": haplotype_caller,
                  "freebayes": freebayes.run_freebayes,
                  "cortex": cortex.run_cortex,
                  "samtools": samtools.run_samtools,
                  "varscan": varscan.run_varscan,
                  "mutect": mutect.mutect_caller}
    sam_ref = data["sam_ref"]
    config = data["config"]
    caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")]
    if isinstance(data["work_bam"], basestring):
        align_bams = [data["work_bam"]]
        items = [data]
    else:
        align_bams = data["work_bam"]
        items = data["work_items"]
    call_file = "%s-raw%s" % os.path.splitext(out_file)
    caller_fn(align_bams, items, sam_ref,
              configured_vrn_files(config, sam_ref),
              region, call_file)
    if data["config"]["algorithm"].get("phasing", False) == "gatk":
        call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config)
    for ext in ["", ".idx"]:
        if not os.path.exists(out_file + ext):
            if os.path.exists(call_file + ext):
                try:
                    os.symlink(call_file + ext, out_file + ext)
                except OSError, msg:
                    if  str(msg).find("File exists") == -1:
                        raise
Example #10
0
def variantcall_sample(data, region=None, out_file=None):
    """Parallel entry point for doing genotyping of a region of a sample.
    """
    from bcbio.variation import freebayes, cortex, samtools, varscan
    safe_makedir(os.path.dirname(out_file))
    caller_fns = {"gatk": unified_genotyper,
                  "gatk-haplotype": haplotype_caller,
                  "freebayes": freebayes.run_freebayes,
                  "cortex": cortex.run_cortex,
                  "samtools": samtools.run_samtools,
                  "varscan": varscan.run_varscan}
    sam_ref = data["sam_ref"]
    config = data["config"]
    caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")]
    if isinstance(data["work_bam"], basestring):
        align_bams = [data["work_bam"]]
    else:
        align_bams = data["work_bam"]
    call_file = "%s-raw%s" % os.path.splitext(out_file)
    caller_fn(align_bams, sam_ref, config,
              configured_ref_file("dbsnp", config, sam_ref),
              region, call_file)
    if data["config"]["algorithm"].get("phasing", False) == "gatk":
        call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config)
    if not os.path.exists(out_file):
        for ext in ["", ".idx"]:
            if os.path.exists(call_file + ext):
                os.symlink(call_file + ext, out_file + ext)
    data["vrn_file"] = out_file
    return [data]
Example #11
0
def finalize_genotyper(call_file, bam_file, ref_file, config):
    """Perform SNP genotyping and analysis.
    """
    vrn_files = configured_vrn_files(config, ref_file)
    variantcaller = config["algorithm"].get("variantcaller", "gatk")
    if variantcaller in ["freebayes", "cortex"]:
        call_file = freebayes.postcall_annotate(call_file, ref_file, vrn_files, config)
    filter_snp = variant_filtration(call_file, ref_file, vrn_files, config)
    phase_snp = phasing.read_backed_phasing(filter_snp, bam_file, ref_file, config)
    _eval_genotyper(phase_snp, ref_file, vrn_files.dbsnp, config)
    return phase_snp
Example #12
0
def finalize_genotyper(call_file, bam_file, ref_file, config):
    """Perform SNP genotyping and analysis.
    """
    vrn_files = configured_vrn_files(config, ref_file)
    variantcaller = config["algorithm"].get("variantcaller", "gatk")
    if variantcaller in ["freebayes", "cortex", "samtools"]:
        call_file = freebayes.postcall_annotate(call_file, ref_file, vrn_files, config)
    filter_snp = variant_filtration(call_file, ref_file, vrn_files, config)
    phase_snp = phasing.read_backed_phasing(filter_snp, bam_file, ref_file, config)
    _eval_genotyper(phase_snp, ref_file, vrn_files.dbsnp, config)
    return phase_snp
Example #13
0
def variantcall_sample(data, region=None, out_file=None):
    """Parallel entry point for doing genotyping of a region of a sample.
    """
    from bcbio.variation import freebayes, cortex, samtools, varscan, mutect
    safe_makedir(os.path.dirname(out_file))
    caller_fns = {
        "gatk": unified_genotyper,
        "gatk-haplotype": haplotype_caller,
        "freebayes": freebayes.run_freebayes,
        "cortex": cortex.run_cortex,
        "samtools": samtools.run_samtools,
        "varscan": varscan.run_varscan,
        "mutect": mutect.mutect_caller
    }
    sam_ref = data["sam_ref"]
    config = data["config"]
    caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")]
    if isinstance(data["work_bam"], basestring):
        align_bams = [data["work_bam"]]
        items = [data]
    else:
        align_bams = data["work_bam"]
        items = data["work_items"]
    call_file = "%s-raw%s" % os.path.splitext(out_file)
    caller_fn(align_bams, items, sam_ref,
              configured_vrn_files(config, sam_ref), region, call_file)
    if data["config"]["algorithm"].get("phasing", False) == "gatk":
        call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref,
                                                region, config)
    for ext in ["", ".idx"]:
        if not os.path.exists(out_file + ext):
            if os.path.exists(call_file + ext):
                try:
                    os.symlink(call_file + ext, out_file + ext)
                except OSError, msg:
                    if str(msg).find("File exists") == -1:
                        raise