def exportMotifDiscoverySequences(infile, outfile): '''export sequences for motif discovery. This method requires the _interval tables. For motif discovery, only the sequences with the highest S/N ratio are supplied. 1. The top *motifs_proportion* intervals sorted by peakval 2. Only a region +/- *motifs_halfwidth* around the peak 3. At least *motifs_min_sequences*. If there are not enough sequences to start with, all will be used. 4. At most *motifs_max_size* sequences will be output. ''' track = P.snip(infile, "_intervals.load") dbhandle = connect() p = P.substituteParameters(**locals()) nseq = PipelineMotifs.writeSequencesForIntervals( track, outfile, dbhandle, full=False, masker=P.asList(p['motifs_masker']), halfwidth=int(p["motifs_halfwidth"]), maxsize=int(p["motifs_max_size"]), proportion=p["motifs_proportion"], min_sequences=p["motifs_min_sequences"], num_sequences=p["motifs_num_sequences"], order=p['motifs_score']) if nseq == 0: E.warn("%s: no sequences - meme skipped" % outfile) P.touch(outfile)
def exportMotifDiscoverySequences( infile, outfile ): '''export sequences for motif discovery. This method requires the _interval tables. For motif discovery, only the sequences with the highest S/N ratio are supplied. 1. The top *motifs_proportion* intervals sorted by peakval 2. Only a region +/- *motifs_halfwidth* around the peak 3. At least *motifs_min_sequences*. If there are not enough sequences to start with, all will be used. 4. At most *motifs_max_size* sequences will be output. ''' track = P.snip( infile, "_intervals.load" ) dbhandle = connect() p = P.substituteParameters( **locals() ) nseq = PipelineMotifs.writeSequencesForIntervals( track, outfile, dbhandle, full = False, masker = P.asList(p['motifs_masker']), halfwidth = int(p["motifs_halfwidth"]), maxsize = int(p["motifs_max_size"]), proportion = p["motifs_proportion"], min_sequences = p["motifs_min_sequences"], num_sequences = p["motifs_num_sequences"], order = p['motifs_score']) if nseq == 0: E.warn( "%s: no sequences - meme skipped" % outfile) P.touch( outfile )
def buildBackgroundSequences(infile, outfile, npeaks, width, masker): '''get the peak sequences, masking or not specificed in the ini file. ''' track = P.snip( infile, "_intervals.load" ) dbhandle = connect() nseq = PipelineMotifs.writeSequencesForIntervals( track, outfile, dbhandle, full = False, masker = [masker], halfwidth = width, maxsize = int(PARAMS["motifs_max_size"]), proportion = None, num_sequences = npeaks, order = 'peakval', shift = "leftright") if nseq == 0: E.warn( "%s: no sequences in background" % outfile_background)