def process_fastq(curinfo, ref_index, config, config_file): do_realignment = config["algorithm"].get("realignment", "") do_kmercorrect = config["algorithm"].get("kmer_correct", "") trim_three = config["algorithm"].get("trim_three", "") picard = broad.runner_from_config(config) in_file = _prepare_fastq(curinfo, config) if trim_three: in_file = trim_fastq(in_file, three=int(trim_three)) if do_kmercorrect: in_file = remove_ns(in_file) in_file = kmer_filter(in_file, do_kmercorrect, config) unique_file, count_file = uniquify_reads(in_file, config) align_sam = novoalign.align(unique_file, None, ref_index, os.path.splitext(os.path.basename(in_file))[0], config["dir"]["align"], curinfo) name = curinfo.get("description", curinfo.get("name", "")) ref = curinfo.get("ref", config.get("ref", None)) align_bam = sam_to_sort_bam(align_sam, ref, unique_file, None, name, name, name, config) if do_realignment == "gatk": align_bam = gatk_realigner(align_bam, ref, config, deep_coverage=True) picard.run_fn("picard_index", align_bam) # XXX Finish remainder of processing summarize_at_each_pos(align_bam, in_file, count_file, name, config) return if config["algorithm"].get("range_params", None): call_analyze_multiple(align_bam, bc, in_file, config) else: call_bases_and_analyze(align_bam, bc, in_file, config)
def process_fastq(curinfo, ref_index, config, config_file): do_realignment = config["algorithm"].get("realignment", "") do_kmercorrect = config["algorithm"].get("kmer_correct", "") trim_three = config["algorithm"].get("trim_three", "") picard = broad.runner_from_config(config) in_file = _prepare_fastq(curinfo, config) if trim_three: in_file = trim_fastq(in_file, three=int(trim_three)) if do_kmercorrect: in_file = remove_ns(in_file) in_file = kmer_filter(in_file, do_kmercorrect, config) unique_file, count_file = uniquify_reads(in_file, config) align_sam = novoalign.align(unique_file, None, ref_index, os.path.splitext(os.path.basename(in_file))[0], config["dir"]["align"], curinfo) name = curinfo.get("description", curinfo.get("name", "")) ref = curinfo.get("ref", config.get("ref", None)) names = {"rg": name, "sample": name, "pu": name, "pl": name} align_bam = sam_to_sort_bam(align_sam, ref, unique_file, None, names, config) if do_realignment == "gatk": align_bam = gatk_realigner(align_bam, ref, config, deep_coverage=True) picard.run_fn("picard_index", align_bam) # XXX Finish remainder of processing summarize_at_each_pos(align_bam, in_file, count_file, name, config)
def align_and_sort(in_file, exp, config, align_dir): out_sam = os.path.join(align_dir, "%s.sam" % (os.path.splitext( os.path.basename(in_file))[0])) out_bam = "%s.bam" % os.path.splitext(out_sam)[0] sort_bam = "%s-sort.bam" % os.path.splitext(out_sam)[0] if not os.path.exists(sort_bam): if not os.path.exists(out_sam): cl = [config["programs"]["bowtie"], "-S", "-f", "--all", "--best", "--strata", "-v", str(config["analysis"]["align_errors"]), config["analysis"]["bowtie_genome"], in_file, out_sam] print cl subprocess.check_call(cl) sort_bam = sam_to_sort_bam(out_sam, config["analysis"]["seq_genome"], in_file, None, exp["name"], "", exp["name"], config) for to_remove in [out_sam, out_bam]: if os.path.exists(to_remove): os.remove(to_remove) return sort_bam
def process_fastq(bc, ref_index, cur_config, config, config_file): do_realignment = config["algorithm"].get("realignment", "") do_kmercorrect = config["algorithm"].get("kmer_correct", "") trim_three = config["algorithm"].get("trim_three", "") picard = broad.runner_from_config(config) in_file = bc["file"] if trim_three: in_file = trim_fastq(in_file, three=int(trim_three)) if do_kmercorrect: in_file = remove_ns(in_file) in_file = kmer_filter(in_file, do_kmercorrect, config) unique_file = uniquify_bioplayground(in_file, config) align_sam = novoalign.align(config["dir"]["align"], ref_index, unique_file, qual_format=cur_config.get("format", None)) align_bam = sam_to_sort_bam(align_sam, config["ref"], unique_file, None, "", bc["name"], config) if do_realignment == "gatk": align_bam = gatk_realigner(align_bam, config["ref"], config, deep_coverage=True) picard.run_fn("picard_index", align_bam) if config["algorithm"].get("range_params", None): call_analyze_multiple(align_bam, bc, in_file, config) else: call_bases_and_analyze(align_bam, bc, in_file, config)