Ejemplo n.º 1
0
def exportMotifControlSequences( infile, outfile ):
    '''for each interval, export the left and right 
    sequence segment of the same size.
    '''
    PipelineMotifs.exportSequencesFromBedFile( infile, outfile,
                                               masker = PARAMS['motifs_masker'],
                                               mode = "leftright" )
Ejemplo n.º 2
0
def exportMotifDetectionSequences( infile, outfile ):
    '''export sequences for motif discovery.

    This method requires the _interval tables.
    '''
    PipelineMotifs.exportSequencesFromBedFile( infile, outfile,
                                               masker = PARAMS['motifs_masker'])
Ejemplo n.º 3
0
def loadMast(infile, outfile):
    '''parse mast file and load into database.

    Parse several motif runs and add them to the same
    table.

    Add columns for the control data as well.
    '''
    PipelineMotifs.loadMAST(infile, outfile)
Ejemplo n.º 4
0
def runMast(infiles, outfile):
    '''run mast on all intervals and motifs.

    Collect all results for an E-value up to 10000 so that
    all sequences are output and MAST curves can be computed. 

    10000 is a heuristic.
    '''
    PipelineMotifs.runMAST(infiles, outfile)
Ejemplo n.º 5
0
def loadMast( infile, outfile ):
    '''parse mast file and load into database.

    Parse several motif runs and add them to the same
    table.

    Add columns for the control data as well.
    '''
    PipelineMotifs.loadMAST( infile, outfile )
Ejemplo n.º 6
0
def runMast( infiles, outfile ):
    '''run mast on all intervals and motifs.

    Collect all results for an E-value up to 10000 so that
    all sequences are output and MAST curves can be computed. 

    10000 is a heuristic.
    '''
    PipelineMotifs.runMAST( infiles, outfile )
Ejemplo n.º 7
0
def exportMotifDiscoverySequences(infile, outfile):
    '''export sequences for motif discovery.

    This method requires the _interval tables.

    For motif discovery, only the sequences with the highest S/N ratio are supplied.

    1. The top *motifs_proportion* intervals sorted by peakval
    2. Only a region +/- *motifs_halfwidth* around the peak 
    3. At least *motifs_min_sequences*. If there are not enough sequences
          to start with, all will be used.
    4. At most *motifs_max_size* sequences will be output.
    '''
    track = P.snip(infile, "_intervals.load")
    dbhandle = connect()

    p = P.substituteParameters(**locals())
    nseq = PipelineMotifs.writeSequencesForIntervals(
        track,
        outfile,
        dbhandle,
        full=False,
        masker=P.asList(p['motifs_masker']),
        halfwidth=int(p["motifs_halfwidth"]),
        maxsize=int(p["motifs_max_size"]),
        proportion=p["motifs_proportion"],
        min_sequences=p["motifs_min_sequences"],
        num_sequences=p["motifs_num_sequences"],
        order=p['motifs_score'])

    if nseq == 0:
        E.warn("%s: no sequences - meme skipped" % outfile)
        P.touch(outfile)
Ejemplo n.º 8
0
def exportMotifDiscoverySequences( infile, outfile ):
    '''export sequences for motif discovery.

    This method requires the _interval tables.

    For motif discovery, only the sequences with the highest S/N ratio are supplied.
    
    1. The top *motifs_proportion* intervals sorted by peakval
    2. Only a region +/- *motifs_halfwidth* around the peak 
    3. At least *motifs_min_sequences*. If there are not enough sequences
          to start with, all will be used.
    4. At most *motifs_max_size* sequences will be output.
    '''
    track = P.snip( infile, "_intervals.load" )
    dbhandle = connect()
        
    p = P.substituteParameters( **locals() )
    nseq = PipelineMotifs.writeSequencesForIntervals( track, 
                                                      outfile,
                                                      dbhandle,
                                                      full = False,
                                                      masker = P.asList(p['motifs_masker']),
                                                      halfwidth = int(p["motifs_halfwidth"]),
                                                      maxsize = int(p["motifs_max_size"]),
                                                      proportion = p["motifs_proportion"],
                                                      min_sequences = p["motifs_min_sequences"],
                                                      num_sequences = p["motifs_num_sequences"],
                                                      order = p['motifs_score'])

    if nseq == 0:
        E.warn( "%s: no sequences - meme skipped" % outfile)
        P.touch( outfile )
Ejemplo n.º 9
0
def runMeme(infile, outfile):
    '''run MEME to find motifs.

    In order to increase the signal/noise ratio,
    MEME is not run on all intervals but only the 
    top 10% of intervals (peakval) are used. 
    Also, only the segment of 200 bp around the peak
    is used and not the complete interval.

    * Softmasked sequence is converted to hardmasked
      sequence to avoid the detection of spurious motifs.

    * Sequence is run through dustmasker
    '''

    track = P.snip(infile, ".discovery.fasta")

    PipelineMotifs.runMEMEOnSequences(infile, outfile)
Ejemplo n.º 10
0
def runMeme( infile, outfile ):
    '''run MEME to find motifs.

    In order to increase the signal/noise ratio,
    MEME is not run on all intervals but only the 
    top 10% of intervals (peakval) are used. 
    Also, only the segment of 200 bp around the peak
    is used and not the complete interval.

    * Softmasked sequence is converted to hardmasked
      sequence to avoid the detection of spurious motifs.

    * Sequence is run through dustmasker
    '''

    track = P.snip( infile, ".discovery.fasta" )

    PipelineMotifs.runMEMEOnSequences( infile, outfile )
Ejemplo n.º 11
0
def buildBackgroundSequences(infile, outfile, npeaks, width, masker):
    '''get the peak sequences, masking or not specificed in the ini file.
    '''
    
    track = P.snip( infile, "_intervals.load" )
    dbhandle = connect()
    
    nseq = PipelineMotifs.writeSequencesForIntervals( track, 
                                                      outfile,
                                                      dbhandle,
                                                      full = False,
                                                      masker = [masker],
                                                      halfwidth = width,
                                                      maxsize = int(PARAMS["motifs_max_size"]),
                                                      proportion = None,
                                                      num_sequences = npeaks,
                                                      order = 'peakval',
                                                      shift = "leftright")

    if nseq == 0:
        E.warn( "%s: no sequences in background" % outfile_background)
Ejemplo n.º 12
0
def runTomTom(infile, outfile):
    '''compare ab-initio motifs against tomtom.'''
    PipelineMotifs.runTomTom(infile, outfile)
Ejemplo n.º 13
0
def runTomTom( infile, outfile ):
    '''compare ab-initio motifs against tomtom.'''
    PipelineMotifs.runTomTom( infile, outfile )