예제 #1
0
def exportMotifDiscoverySequences(infile, outfile):
    '''export sequences for motif discovery.

    This method requires the _interval tables.

    For motif discovery, only the sequences with the highest S/N ratio are supplied.

    1. The top *motifs_proportion* intervals sorted by peakval
    2. Only a region +/- *motifs_halfwidth* around the peak 
    3. At least *motifs_min_sequences*. If there are not enough sequences
          to start with, all will be used.
    4. At most *motifs_max_size* sequences will be output.
    '''
    track = P.snip(infile, "_intervals.load")
    dbhandle = connect()

    p = P.substituteParameters(**locals())
    nseq = PipelineMotifs.writeSequencesForIntervals(
        track,
        outfile,
        dbhandle,
        full=False,
        masker=P.asList(p['motifs_masker']),
        halfwidth=int(p["motifs_halfwidth"]),
        maxsize=int(p["motifs_max_size"]),
        proportion=p["motifs_proportion"],
        min_sequences=p["motifs_min_sequences"],
        num_sequences=p["motifs_num_sequences"],
        order=p['motifs_score'])

    if nseq == 0:
        E.warn("%s: no sequences - meme skipped" % outfile)
        P.touch(outfile)
예제 #2
0
def exportMotifDiscoverySequences( infile, outfile ):
    '''export sequences for motif discovery.

    This method requires the _interval tables.

    For motif discovery, only the sequences with the highest S/N ratio are supplied.
    
    1. The top *motifs_proportion* intervals sorted by peakval
    2. Only a region +/- *motifs_halfwidth* around the peak 
    3. At least *motifs_min_sequences*. If there are not enough sequences
          to start with, all will be used.
    4. At most *motifs_max_size* sequences will be output.
    '''
    track = P.snip( infile, "_intervals.load" )
    dbhandle = connect()
        
    p = P.substituteParameters( **locals() )
    nseq = PipelineMotifs.writeSequencesForIntervals( track, 
                                                      outfile,
                                                      dbhandle,
                                                      full = False,
                                                      masker = P.asList(p['motifs_masker']),
                                                      halfwidth = int(p["motifs_halfwidth"]),
                                                      maxsize = int(p["motifs_max_size"]),
                                                      proportion = p["motifs_proportion"],
                                                      min_sequences = p["motifs_min_sequences"],
                                                      num_sequences = p["motifs_num_sequences"],
                                                      order = p['motifs_score'])

    if nseq == 0:
        E.warn( "%s: no sequences - meme skipped" % outfile)
        P.touch( outfile )
예제 #3
0
def buildBackgroundSequences(infile, outfile, npeaks, width, masker):
    '''get the peak sequences, masking or not specificed in the ini file.
    '''
    
    track = P.snip( infile, "_intervals.load" )
    dbhandle = connect()
    
    nseq = PipelineMotifs.writeSequencesForIntervals( track, 
                                                      outfile,
                                                      dbhandle,
                                                      full = False,
                                                      masker = [masker],
                                                      halfwidth = width,
                                                      maxsize = int(PARAMS["motifs_max_size"]),
                                                      proportion = None,
                                                      num_sequences = npeaks,
                                                      order = 'peakval',
                                                      shift = "leftright")

    if nseq == 0:
        E.warn( "%s: no sequences in background" % outfile_background)