def variantcall_sample(data, region=None, align_bams=None, out_file=None): """Parallel entry point for doing genotyping of a region of a sample. """ if out_file is None or not os.path.exists(out_file) or not os.path.lexists( out_file): utils.safe_makedir(os.path.dirname(out_file)) sam_ref = data["sam_ref"] config = data["config"] caller_fns = get_variantcallers() caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")] if len(align_bams) == 1: items = [data] else: items = multi.get_orig_items(data) assert len(items) == len(align_bams) call_file = "%s-raw%s" % utils.splitext_plus(out_file) call_file = caller_fn(align_bams, items, sam_ref, data["genome_resources"]["variation"], region, call_file) if data["config"]["algorithm"].get("phasing", False) == "gatk": call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config) utils.symlink_plus(call_file, out_file) if region: data["region"] = region data["vrn_file"] = out_file return [data]
def variantcall_sample(data, region=None, out_file=None): """Parallel entry point for doing genotyping of a region of a sample. """ safe_makedir(os.path.dirname(out_file)) sam_ref = data["sam_ref"] config = data["config"] caller_fns = get_variantcallers() caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")] if isinstance(data["work_bam"], basestring): align_bams = [data["work_bam"]] items = [data] else: align_bams = data["work_bam"] items = data["work_items"] call_file = "%s-raw%s" % os.path.splitext(out_file) call_file = caller_fn(align_bams, items, sam_ref, data["genome_resources"]["variation"], region, call_file) if data["config"]["algorithm"].get("phasing", False) == "gatk": call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config) utils.symlink_plus(call_file, out_file) if "work_items" in data: del data["work_items"] data["vrn_file"] = out_file return [data]
def variantcall_sample(data, region=None, align_bams=None, out_file=None): """Parallel entry point for doing genotyping of a region of a sample. """ if out_file is None or not os.path.exists(out_file) or not os.path.lexists(out_file): utils.safe_makedir(os.path.dirname(out_file)) ref_file = dd.get_ref_file(data) config = data["config"] caller_fns = get_variantcallers() caller_fn = caller_fns[config["algorithm"].get("variantcaller")] if len(align_bams) == 1: items = [data] else: items = multi.get_orig_items(data) assert len(items) == len(align_bams) assoc_files = tz.get_in(("genome_resources", "variation"), data, {}) if not assoc_files: assoc_files = {} for bam_file in align_bams: bam.index(bam_file, data["config"], check_timestamp=False) do_phasing = data["config"]["algorithm"].get("phasing", False) call_file = "%s-unphased%s" % utils.splitext_plus(out_file) if do_phasing else out_file call_file = caller_fn(align_bams, items, ref_file, assoc_files, region, call_file) if do_phasing == "gatk": call_file = phasing.read_backed_phasing(call_file, align_bams, ref_file, region, config) utils.symlink_plus(call_file, out_file) if region: data["region"] = region data["vrn_file"] = out_file return [data]
def variantcall_sample(data, region=None, out_file=None): """Parallel entry point for doing genotyping of a region of a sample. """ safe_makedir(os.path.dirname(out_file)) sam_ref = data["sam_ref"] config = data["config"] caller_fns = get_variantcallers() caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")] if isinstance(data["work_bam"], basestring): align_bams = [data["work_bam"]] items = [data] else: align_bams = data["work_bam"] items = data["work_items"] call_file = "%s-raw%s" % os.path.splitext(out_file) call_file = caller_fn(align_bams, items, sam_ref, data["genome_resources"]["variation"], region, call_file) if data["config"]["algorithm"].get("phasing", False) == "gatk": call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config) for ext in ["", ".idx"]: if not os.path.exists(out_file + ext): if os.path.exists(call_file + ext): try: os.symlink(call_file + ext, out_file + ext) except OSError, msg: if str(msg).find("File exists") == -1: raise
def variantcall_sample(data, region=None, align_bams=None, out_file=None): """Parallel entry point for doing genotyping of a region of a sample. """ if out_file is None or not os.path.exists(out_file) or not os.path.lexists(out_file): utils.safe_makedir(os.path.dirname(out_file)) sam_ref = data["sam_ref"] config = data["config"] caller_fns = get_variantcallers() caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")] if len(align_bams) == 1: items = [data] else: items = multi.get_orig_items(data) assert len(items) == len(align_bams) assoc_files = tz.get_in(("genome_resources", "variation"), data, {}) if not assoc_files: assoc_files = {} for bam_file in align_bams: bam.index(bam_file, data["config"], check_timestamp=False) do_phasing = data["config"]["algorithm"].get("phasing", False) call_file = "%s-raw%s" % utils.splitext_plus(out_file) if do_phasing else out_file call_file = caller_fn(align_bams, items, sam_ref, assoc_files, region, call_file) if do_phasing == "gatk": call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config) utils.symlink_plus(call_file, out_file) if region: data["region"] = region data["vrn_file"] = out_file return [data]
def variantcall_sample(data, region=None, out_file=None): """Parallel entry point for doing genotyping of a region of a sample. """ from bcbio.variation import freebayes, cortex, samtools, varscan safe_makedir(os.path.dirname(out_file)) caller_fns = { "gatk": unified_genotyper, "gatk-haplotype": haplotype_caller, "freebayes": freebayes.run_freebayes, "cortex": cortex.run_cortex, "samtools": samtools.run_samtools, "varscan": varscan.run_varscan } sam_ref = data["sam_ref"] config = data["config"] caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")] if isinstance(data["work_bam"], basestring): align_bams = [data["work_bam"]] else: align_bams = data["work_bam"] call_file = "%s-raw%s" % os.path.splitext(out_file) caller_fn(align_bams, sam_ref, config, configured_ref_file("dbsnp", config, sam_ref), region, call_file) if data["config"]["algorithm"].get("phasing", False) == "gatk": call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config) if not os.path.exists(out_file): for ext in ["", ".idx"]: if os.path.exists(call_file + ext): os.symlink(call_file + ext, out_file + ext) data["vrn_file"] = out_file return [data]
def variantcall_sample(data, region=None, out_file=None): """Parallel entry point for doing genotyping of a region of a sample. """ from bcbio.variation import freebayes, cortex, samtools, varscan, mutect safe_makedir(os.path.dirname(out_file)) caller_fns = {"gatk": unified_genotyper, "gatk-haplotype": haplotype_caller, "freebayes": freebayes.run_freebayes, "cortex": cortex.run_cortex, "samtools": samtools.run_samtools, "varscan": varscan.run_varscan, "mutect": mutect.mutect_caller} sam_ref = data["sam_ref"] config = data["config"] caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")] if isinstance(data["work_bam"], basestring): align_bams = [data["work_bam"]] items = [data] else: align_bams = data["work_bam"] items = data["work_items"] call_file = "%s-raw%s" % os.path.splitext(out_file) caller_fn(align_bams, items, sam_ref, configured_vrn_files(config, sam_ref), region, call_file) if data["config"]["algorithm"].get("phasing", False) == "gatk": call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config) for ext in ["", ".idx"]: if not os.path.exists(out_file + ext): if os.path.exists(call_file + ext): try: os.symlink(call_file + ext, out_file + ext) except OSError, msg: if str(msg).find("File exists") == -1: raise
def variantcall_sample(data, region=None, out_file=None): """Parallel entry point for doing genotyping of a region of a sample. """ from bcbio.variation import freebayes, cortex, samtools, varscan safe_makedir(os.path.dirname(out_file)) caller_fns = {"gatk": unified_genotyper, "gatk-haplotype": haplotype_caller, "freebayes": freebayes.run_freebayes, "cortex": cortex.run_cortex, "samtools": samtools.run_samtools, "varscan": varscan.run_varscan} sam_ref = data["sam_ref"] config = data["config"] caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")] if isinstance(data["work_bam"], basestring): align_bams = [data["work_bam"]] else: align_bams = data["work_bam"] call_file = "%s-raw%s" % os.path.splitext(out_file) caller_fn(align_bams, sam_ref, config, configured_ref_file("dbsnp", config, sam_ref), region, call_file) if data["config"]["algorithm"].get("phasing", False) == "gatk": call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config) if not os.path.exists(out_file): for ext in ["", ".idx"]: if os.path.exists(call_file + ext): os.symlink(call_file + ext, out_file + ext) data["vrn_file"] = out_file return [data]
def finalize_genotyper(call_file, bam_file, ref_file, config): """Perform SNP genotyping and analysis. """ vrn_files = configured_vrn_files(config, ref_file) variantcaller = config["algorithm"].get("variantcaller", "gatk") if variantcaller in ["freebayes", "cortex"]: call_file = freebayes.postcall_annotate(call_file, ref_file, vrn_files, config) filter_snp = variant_filtration(call_file, ref_file, vrn_files, config) phase_snp = phasing.read_backed_phasing(filter_snp, bam_file, ref_file, config) _eval_genotyper(phase_snp, ref_file, vrn_files.dbsnp, config) return phase_snp
def finalize_genotyper(call_file, bam_file, ref_file, config): """Perform SNP genotyping and analysis. """ vrn_files = configured_vrn_files(config, ref_file) variantcaller = config["algorithm"].get("variantcaller", "gatk") if variantcaller in ["freebayes", "cortex", "samtools"]: call_file = freebayes.postcall_annotate(call_file, ref_file, vrn_files, config) filter_snp = variant_filtration(call_file, ref_file, vrn_files, config) phase_snp = phasing.read_backed_phasing(filter_snp, bam_file, ref_file, config) _eval_genotyper(phase_snp, ref_file, vrn_files.dbsnp, config) return phase_snp
def variantcall_sample(data, region=None, out_file=None): """Parallel entry point for doing genotyping of a region of a sample. """ from bcbio.variation import freebayes, cortex, samtools, varscan, mutect safe_makedir(os.path.dirname(out_file)) caller_fns = { "gatk": unified_genotyper, "gatk-haplotype": haplotype_caller, "freebayes": freebayes.run_freebayes, "cortex": cortex.run_cortex, "samtools": samtools.run_samtools, "varscan": varscan.run_varscan, "mutect": mutect.mutect_caller } sam_ref = data["sam_ref"] config = data["config"] caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")] if isinstance(data["work_bam"], basestring): align_bams = [data["work_bam"]] items = [data] else: align_bams = data["work_bam"] items = data["work_items"] call_file = "%s-raw%s" % os.path.splitext(out_file) caller_fn(align_bams, items, sam_ref, configured_vrn_files(config, sam_ref), region, call_file) if data["config"]["algorithm"].get("phasing", False) == "gatk": call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config) for ext in ["", ".idx"]: if not os.path.exists(out_file + ext): if os.path.exists(call_file + ext): try: os.symlink(call_file + ext, out_file + ext) except OSError, msg: if str(msg).find("File exists") == -1: raise