def variantcall_sample(data, region=None, out_file=None): """Parallel entry point for doing genotyping of a region of a sample. """ from bcbio.variation import freebayes, cortex, samtools, varscan, mutect safe_makedir(os.path.dirname(out_file)) caller_fns = {"gatk": unified_genotyper, "gatk-haplotype": haplotype_caller, "freebayes": freebayes.run_freebayes, "cortex": cortex.run_cortex, "samtools": samtools.run_samtools, "varscan": varscan.run_varscan, "mutect": mutect.mutect_caller} sam_ref = data["sam_ref"] config = data["config"] caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")] if isinstance(data["work_bam"], basestring): align_bams = [data["work_bam"]] items = [data] else: align_bams = data["work_bam"] items = data["work_items"] call_file = "%s-raw%s" % os.path.splitext(out_file) caller_fn(align_bams, items, sam_ref, configured_vrn_files(config, sam_ref), region, call_file) if data["config"]["algorithm"].get("phasing", False) == "gatk": call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config) for ext in ["", ".idx"]: if not os.path.exists(out_file + ext): if os.path.exists(call_file + ext): try: os.symlink(call_file + ext, out_file + ext) except OSError, msg: if str(msg).find("File exists") == -1: raise
def variantcall_sample(data, region=None, out_file=None): """Parallel entry point for doing genotyping of a region of a sample. """ safe_makedir(os.path.dirname(out_file)) sam_ref = data["sam_ref"] config = data["config"] caller_fns = get_variantcallers() caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")] if isinstance(data["work_bam"], basestring): align_bams = [data["work_bam"]] items = [data] else: align_bams = data["work_bam"] items = data["work_items"] call_file = "%s-raw%s" % os.path.splitext(out_file) call_file = caller_fn(align_bams, items, sam_ref, configured_vrn_files(config, sam_ref), region, call_file) if data["config"]["algorithm"].get("phasing", False) == "gatk": call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config) for ext in ["", ".idx"]: if not os.path.exists(out_file + ext): if os.path.exists(call_file + ext): try: os.symlink(call_file + ext, out_file + ext) except OSError, msg: if str(msg).find("File exists") == -1: raise
def finalize_genotyper(call_file, ref_file, config): """Perform SNP genotyping and analysis using GATK. """ vrn_files = configured_vrn_files(config, ref_file) filter_snp = variant_filtration(call_file, ref_file, vrn_files, config) _eval_genotyper(filter_snp, ref_file, vrn_files.dbsnp, config) return filter_snp
def finalize_genotyper(call_file, bam_file, ref_file, config): """Perform SNP genotyping and analysis. """ vrn_files = configured_vrn_files(config, ref_file) variantcaller = config["algorithm"].get("variantcaller", "gatk") if variantcaller in ["freebayes", "cortex", "samtools", "gatk-haplotype", "varscan"]: call_file = annotation.annotate_nongatk_vcf(call_file, bam_file, vrn_files.dbsnp, ref_file, config) filter_snp = variant_filtration(call_file, ref_file, vrn_files, config) return filter_snp
def finalize_genotyper(call_file, bam_file, ref_file, config): """Perform SNP genotyping and analysis. """ vrn_files = configured_vrn_files(config, ref_file) variantcaller = config["algorithm"].get("variantcaller", "gatk") if variantcaller in ["freebayes", "cortex"]: call_file = freebayes.postcall_annotate(call_file, ref_file, vrn_files, config) filter_snp = variant_filtration(call_file, ref_file, vrn_files, config) phase_snp = phasing.read_backed_phasing(filter_snp, bam_file, ref_file, config) _eval_genotyper(phase_snp, ref_file, vrn_files.dbsnp, config) return phase_snp
def finalize_genotyper(call_file, bam_file, ref_file, config): """Perform SNP genotyping and analysis. """ vrn_files = configured_vrn_files(config, ref_file) variantcaller = config["algorithm"].get("variantcaller", "gatk") if variantcaller in ["freebayes", "cortex", "samtools"]: call_file = freebayes.postcall_annotate(call_file, ref_file, vrn_files, config) filter_snp = variant_filtration(call_file, ref_file, vrn_files, config) phase_snp = phasing.read_backed_phasing(filter_snp, bam_file, ref_file, config) _eval_genotyper(phase_snp, ref_file, vrn_files.dbsnp, config) return phase_snp
def postprocess_variants(data): """Provide post-processing of variant calls: filtering and effects annotation. """ logger.info("Finalizing variant calls: %s" % str(data["name"])) if data["work_bam"] and data.get("vrn_file"): vrn_files = configured_vrn_files(data["config"], data["sam_ref"]) data["vrn_file"] = variant_filtration(data["vrn_file"], data["sam_ref"], vrn_files, data["config"]) logger.info("Calculating variation effects for %s" % str(data["name"])) ann_vrn_file = effects.snpeff_effects(data) if ann_vrn_file: data["vrn_file"] = ann_vrn_file return [[data]]
def finalize_genotyper(call_file, bam_file, ref_file, config): """Perform SNP genotyping and analysis. """ vrn_files = configured_vrn_files(config, ref_file) variantcaller = config["algorithm"].get("variantcaller", "gatk") if variantcaller in [ "freebayes", "cortex", "samtools", "gatk-haplotype", "varscan" ]: call_file = annotation.annotate_nongatk_vcf(call_file, bam_file, vrn_files.dbsnp, ref_file, config) filter_snp = variant_filtration(call_file, ref_file, vrn_files, config) return filter_snp
def variantcall_sample(data, region=None, out_file=None): """Parallel entry point for doing genotyping of a region of a sample. """ from bcbio.variation import freebayes, cortex, samtools, varscan, mutect safe_makedir(os.path.dirname(out_file)) caller_fns = { "gatk": unified_genotyper, "gatk-haplotype": haplotype_caller, "freebayes": freebayes.run_freebayes, "cortex": cortex.run_cortex, "samtools": samtools.run_samtools, "varscan": varscan.run_varscan, "mutect": mutect.mutect_caller } sam_ref = data["sam_ref"] config = data["config"] caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")] if isinstance(data["work_bam"], basestring): align_bams = [data["work_bam"]] items = [data] else: align_bams = data["work_bam"] items = data["work_items"] call_file = "%s-raw%s" % os.path.splitext(out_file) caller_fn(align_bams, items, sam_ref, configured_vrn_files(config, sam_ref), region, call_file) if data["config"]["algorithm"].get("phasing", False) == "gatk": call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config) for ext in ["", ".idx"]: if not os.path.exists(out_file + ext): if os.path.exists(call_file + ext): try: os.symlink(call_file + ext, out_file + ext) except OSError, msg: if str(msg).find("File exists") == -1: raise