def exportMotifDiscoverySequences(infile, outfile): '''export sequences for motif discovery. This method requires the _interval tables. For motif discovery, only the sequences with the highest S/N ratio are supplied. 1. The top *motifs_proportion* intervals sorted by peakval 2. Only a region +/- *motifs_halfwidth* around the peak 3. At least *motifs_min_sequences*. If there are not enough sequences to start with, all will be used. 4. At most *motifs_max_size* sequences will be output. ''' track = P.snip(infile, "_intervals.load") dbhandle = connect() p = P.substituteParameters(**locals()) nseq = PipelineMotifs.writeSequencesForIntervals( track, outfile, dbhandle, full=False, masker=P.asList(p['motifs_masker']), halfwidth=int(p["motifs_halfwidth"]), maxsize=int(p["motifs_max_size"]), proportion=p["motifs_proportion"], min_sequences=p["motifs_min_sequences"], num_sequences=p["motifs_num_sequences"], order=p['motifs_score']) if nseq == 0: E.warn("%s: no sequences - meme skipped" % outfile) P.touch(outfile)
def exportMotifDiscoverySequences( infile, outfile ): '''export sequences for motif discovery. This method requires the _interval tables. For motif discovery, only the sequences with the highest S/N ratio are supplied. 1. The top *motifs_proportion* intervals sorted by peakval 2. Only a region +/- *motifs_halfwidth* around the peak 3. At least *motifs_min_sequences*. If there are not enough sequences to start with, all will be used. 4. At most *motifs_max_size* sequences will be output. ''' track = P.snip( infile, "_intervals.load" ) dbhandle = connect() p = P.substituteParameters( **locals() ) nseq = PipelineMotifs.writeSequencesForIntervals( track, outfile, dbhandle, full = False, masker = P.asList(p['motifs_masker']), halfwidth = int(p["motifs_halfwidth"]), maxsize = int(p["motifs_max_size"]), proportion = p["motifs_proportion"], min_sequences = p["motifs_min_sequences"], num_sequences = p["motifs_num_sequences"], order = p['motifs_score']) if nseq == 0: E.warn( "%s: no sequences - meme skipped" % outfile) P.touch( outfile )
def mapReadsAgainstSpadesContigs(infiles, outfile): ''' map reads against spades contigs ''' inf = infiles[0] to_cluster = True index_dir = os.path.dirname(outfile) if "agg" not in infiles[1]: genome = re.search( ".*R[0-9]*", infiles[0]).group(0) + ".filtered.contigs.fa" else: genome = "agg-agg-agg.filtered.contigs.fa" if infiles[1].endswith(".bt2") or infiles[1].endswith(".ebwt"): infile, reffile = infiles[0], os.path.join(index_dir, genome) + ".fa" m = PipelineMapping.Bowtie( executable=P.substituteParameters(**locals())["bowtie_executable"]) elif infiles[1].endswith("bwt"): genome = genome job_options = " -l mem_free=%s" % (PARAMS["bwa_memory"]) bwa_index_dir = index_dir bwa_mem_options = PARAMS["bwa_mem_options"] bwa_threads = PARAMS["bwa_threads"] m = PipelineMapping.BWAMEM(remove_non_unique=True) statement = m.build((inf,), outfile) P.run()
def mapReadsWithBowtieAgainstRayContigs(infile, outfile): ''' map reads against contigs with bowtie ''' PARAMS["bowtie_index_dir"] = "ray.dir" PARAMS["genome"] = TRACKS.getTracks(infile)[0].split(".")[0] infile, reffile = infile, os.path.join("ray.dir", TRACKS.getTracks(infile)[0]) m = PipelineMapping.Bowtie( executable = P.substituteParameters( **locals() )["bowtie_executable"] ) statement = m.build( (infile,), outfile ) P.run()
def mapReadsWithBowtieAgainstRayContigs(infile, outfile): ''' map reads against contigs with bowtie ''' PARAMS["bowtie_index_dir"] = "ray.dir" PARAMS["genome"] = TRACKS.getTracks(infile)[0].split(".")[0] infile, reffile = infile, os.path.join("ray.dir", TRACKS.getTracks(infile)[0]) m = PipelineMapping.Bowtie(executable=P.substituteParameters( **locals())["bowtie_executable"]) statement = m.build((infile, ), outfile) P.run()
def mapReadsWithBowtieAgainstExpectedContigs(infiles, outfile): ''' map reads against contigs with bowtie ''' to_cluster = True bowtie_index_dir = "expected_contigs.dir" genome = os.path.basename(re.search(".*R[0-9]*", infiles[0]).group(0) + ".contigs.expected") for seq in infiles[1]: to_cluster = True infile, reffile = seq, genome + ".fa" m = PipelineMapping.Bowtie( executable = P.substituteParameters( **locals() )["bowtie_executable"] ) statement = m.build( (infile,), outfile ) P.run()