def convert_fasta(ref_fasta, just_name=False): out_fa = ref_fasta + ".bwameth.c2t" if just_name: return out_fa msg = "c2t in %s to %s" % (ref_fasta, out_fa) if is_newer_b(ref_fasta, out_fa): sys.stderr.write("already converted: %s\n" % msg) return out_fa sys.stderr.write("converting %s\n" % msg) try: fh = open(out_fa, "w") for header, seq in fasta_iter(ref_fasta): ########### Reverse ###################### fh.write(">r%s\n" % header) #if non_cpg_only: # for ctx in "TAG": # use "ATC" for fwd # seq = seq.replace('G' + ctx, "A" + ctx) # for line in wrap(seq): # print >>fh, line #else: for line in wrap(seq.replace("G", "A")): fh.write(line + '\n') ########### Forward ###################### fh.write(">f%s\n" % header) for line in wrap(seq.replace("C", "T")): fh.write(line + '\n') fh.close() except: fh.close(); os.unlink(out_fa) raise return out_fa
def bwa_mem(fa, fq_convert_cmd, extra_args, threads=1, rg=None, paired=True, set_as_failed=None, do_not_penalize_chimeras=False): conv_fa = convert_fasta(fa, just_name=True) if not is_newer_b(conv_fa, (conv_fa + '.amb', conv_fa + '.sa')): raise BWAMethException("first run bwameth.py index %s" % fa) if not rg is None and not rg.startswith('@RG'): rg = '@RG\\tID:{rg}\\tSM:{rg}'.format(rg=rg) #starts the pipeline with the program to convert fastqs cmd = ("|%s " % fq_convert_cmd) # penalize clipping and unpaired. lower penalty on mismatches (-B) cmd += "|bwa mem -T 40 -B 2 -L 10 -CM " if paired: cmd += ("-U 100 -p ") cmd += "-R '{rg}' -t {threads} {extra_args} {conv_fa} -" cmd = cmd.format(**locals()) sys.stderr.write("running: %s\n" % cmd.lstrip("|")) as_bam(cmd, fa, set_as_failed, do_not_penalize_chimeras)
def bwa_mem(fa, mfq, extra_args, prefix='bwa-meth', threads=1, rg=None, calmd=False, paired=True, set_as_failed=None): conv_fa = convert_fasta(fa, just_name=True) if not is_newer_b(conv_fa, (conv_fa + '.amb', conv_fa + '.sa')): raise BWAMethException("first run bwameth.py index %s" % fa) if not rg is None and not rg.startswith('@RG'): rg = '@RG\tID:{rg}\tSM:{rg}'.format(rg=rg) # penalize clipping and unpaired. lower penalty on mismatches (-B) cmd = "|bwa mem -T 40 -B 2 -L 10 -CM " if paired: cmd += ("-U 100 -p ") cmd += "-R '{rg}' -t {threads} {extra_args} {conv_fa} {mfq}" cmd = cmd.format(**locals()) sys.stderr.write("running: %s\n" % cmd.lstrip("|")) as_bam(cmd, fa, prefix, calmd, set_as_failed)
def bwa_index(fa): if is_newer_b(fa, (fa + '.amb', fa + '.sa')): return sys.stderr.write("indexing: %s\n" % fa) try: run("bwa index -a bwtsw %s" % fa) except: if op.exists(fa + ".amb"): os.unlink(fa + ".amb") raise
def bwa_mem(fa, mfq, extra_args, prefix='bwa-meth', threads=1, rg=None, calmd=False, paired=True, set_as_failed=None): conv_fa = convert_fasta(fa, just_name=True) if not is_newer_b(conv_fa, (conv_fa + '.amb', conv_fa + '.sa')): raise BWAMethException("first run bwameth.py index %s" % fa) # penalize clipping and unpaired. lower penalty on mismatches (-B) cmd = "|bwa mem -T 40 -B 2 -L 10 -CM " if paired: cmd += ("-U 100 -p ") cmd += "-t {threads} {extra_args} {conv_fa} {mfq}" cmd = cmd.format(**locals()) sys.stderr.write("running: %s\n" % cmd.lstrip("|")) as_bam(cmd, fa, prefix, calmd, set_as_failed)
def bwa_mem(fa, mfq, extra_args, threads=1, rg=None, paired=True, set_as_failed=None): conv_fa = convert_fasta(fa, just_name=True) if not is_newer_b(conv_fa, (conv_fa + '.amb', conv_fa + '.sa')): raise BWAMethException("first run bwameth.py index %s" % fa) if not rg is None and not rg.startswith('@RG'): rg = '@RG\tID:{rg}\tSM:{rg}'.format(rg=rg) # penalize clipping and unpaired. lower penalty on mismatches (-B) cmd = "|bwa mem -T 40 -B 2 -L 10 -CM " if paired: cmd += ("-U 100 -p ") cmd += "-R '{rg}' -t {threads} {extra_args} {conv_fa} {mfq}" cmd = cmd.format(**locals()) sys.stderr.write("running: %s\n" % cmd.lstrip("|")) as_bam(cmd, fa, set_as_failed)
def convert_fasta(ref_fasta, just_name=False): out_fa = ref_fasta + ".bwameth.c2t" if just_name: return out_fa msg = "c2t in %s to %s" % (ref_fasta, out_fa) if is_newer_b(ref_fasta, out_fa): sys.stderr.write("already converted: %s\n" % msg) return out_fa sys.stderr.write("converting %s\n" % msg) try: fh = open(out_fa, "w") for header, seq in fasta_iter(ref_fasta): ########### Reverse ###################### fh.write(">r%s\n" % header) #if non_cpg_only: # for ctx in "TAG": # use "ATC" for fwd # seq = seq.replace('G' + ctx, "A" + ctx) # for line in wrap(seq): # print >>fh, line #else: for line in wrap(seq.replace("G", "A")): fh.write(line + '\n') ########### Forward ###################### fh.write(">f%s\n" % header) for line in wrap(seq.replace("C", "T")): fh.write(line + '\n') fh.close() except: try: fh.close() except UnboundLocalError: pass os.unlink(out_fa) raise return out_fa
def bwa_mem(fa, fq_convert_cmd, extra_args, threads=1, minSeedLength=19, bandWidth=100, dropoff=100, mismatchPenalty=2, gapOpenPenalty=6, gapExtensionPenalty=1, clippingPenalty=10, minAlignmentScore=40, rg=None, paired=True, set_as_failed=None): conv_fa = convert_fasta(fa, just_name=True) if not is_newer_b(conv_fa, (conv_fa + '.amb', conv_fa + '.sa')): raise BWAMethException("first run bwameth.py index %s" % fa) if not rg is None and not rg.startswith('@RG'): rg = '@RG\\tID:{rg}\\tSM:{rg}'.format(rg=rg) #starts the pipeline with the program to convert fastqs cmd = ("|%s " % fq_convert_cmd) # penalize clipping and unpaired. lower penalty on mismatches (-B) cmd += "|bwa mem -CM " if paired: cmd += ("-U 100 -p ") cmd += f"-R '{rg}' -t {threads} -k {minSeedLength} -w {bandWidth} -d {dropoff} " cmd += f"-B {mismatchPenalty} -O {gapOpenPenalty} -E {gapExtensionPenalty} " cmd += f"-L {clippingPenalty} -T {minAlignmentScore} {extra_args} {conv_fa} -" #cmd = cmd.format(**locals()) sys.stderr.write("running: %s\n" % cmd.lstrip("|")) as_bam(cmd, fa, set_as_failed)