def mapReads(infiles, outfile): '''Map reads to the genome using BWA ''' to_cluster = True job_options = "-pe dedicated %i -R y" % PARAMS["bwa_threads"] m = PipelineMapping.BWA() statement = m.build((infiles,), outfile) P.run()
def mapReads(infiles, outfile): '''Map reads to the genome using BWA (output=SAM), convert to BAM, sort and index BAM file ''' to_cluster = USECLUSTER job_options = "-pe dedicated 2 -R y -l mem_free=8G" track = P.snip(os.path.basename(outfile), ".bam") m = PipelineMapping.BWA(remove_unique=PARAMS["bwa_remove_non_unique"]) statement = m.build((infiles, ), outfile) P.run()
def mapReads(infiles, outfile): '''Map reads to the genome using BWA (output=SAM), convert to BAM, sort and index BAM file, generate alignment statistics and deduplicate using Picard ''' job_options = "-pe dedicated 2 -l mem_free=8G" track = P.snip(os.path.basename(outfile), ".bam") m = PipelineMapping.BWA(remove_unique=PARAMS["bwa_remove_non_unique"], align_stats=True, dedup=True) statement = m.build((infiles, ), outfile) P.run()
def mapReads(infile, outfile): '''Map reads to the genome using BWA, sort and index BAM file, generate alignment statistics and deduplicate using Picard''' job_threads = PARAMS["bwa_threads"] job_memory = PARAMS["bwa_memory"] if PARAMS["bwa_algorithm"] == "aln": m = PipelineMapping.BWA( remove_non_unique=PARAMS["bwa_remove_non_unique"], strip_sequence=False) elif PARAMS["bwa_algorithm"] == "mem": m = PipelineMapping.BWAMEM( remove_non_unique=PARAMS["bwa_remove_non_unique"], strip_sequence=False) else: raise ValueError("bwa algorithm '%s' not known" % algorithm) statement = m.build((infile, ), outfile) print(statement) P.run()
def mapBWAAgainstGenesetGSE53638(infiles, outfile): ''' map reads using BWA against transcriptome data bwa parameterised according to soumillon et al 2014: -l 24 = seed length - 24 bp -k 2 = default number of mismatches allowed in seed - 2 -n 0.04 = default percentage of mismatches allowed across read - 4% non-unique alignments will NOT be removed from the final bam ''' infile, reference = infiles job_threads = 2 job_options = "-l mem_free=1.9G" bwa_aln_options = "-l 24 -k 2 -n 0.04" bwa_index_dir = os.path.abspath(os.path.dirname(reference)) genome = P.snip(os.path.basename(reference), ".sa") bwa_threads = job_threads bwa_samse_options = "" m = PipelineMapping.BWA(remove_non_unique=0, strip_sequence=0, set_nh=1) statement = m.build((infile, ), outfile) P.run()
def mapReadsWithBwaAgainstExpectedContigs(infiles, outfile): ''' map reads against contigs with bowtie ''' to_cluster = True index_dir = os.path.dirname(outfile) genome = os.path.basename( re.search(".*R[0-9]*", infiles[0]).group(0) + ".filtered.contigs.expected.fa") track = P.snip(genome, ".filtered.contigs.expected.fa") fastq = [ infile for infile in infiles[1] if P.snip(infile, ".fastq.1.gz") == track ][0] job_options = " -l mem_free=%s" % (PARAMS["bwa_memory"]) bwa_index_dir = index_dir bwa_aln_options = PARAMS["bwa_aln_options"] bwa_sampe_options = PARAMS["bwa_sampe_options"] bwa_threads = PARAMS["bwa_threads"] m = PipelineMapping.BWA(remove_non_unique=True) statement = m.build((fastq, ), outfile) P.run()
def mapReads(infiles, outfile): '''Map reads to the genome using BWA ''' job_threads = PARAMS["bwa_threads"] m = PipelineMapping.BWA() statement = m.build((infiles, ), outfile) P.run()