Beispiel #1
0
def runMemeChIP(infiles, outfile):
    '''Run the MEME-ChIP pipeline, optionally with 
    reference motif sequences'''

    infile, motifs = infiles[0], infiles[1:]

    PipelineMotifs.runMemeCHIP(infile, outfile, motifs)
Beispiel #2
0
def compareMemeChipTracks(infiles, outfile):
    '''Use tomtom to look if simlar motifs have been foundin more than one
    track '''

    track1, track2 = infiles

    PipelineMotifs.tomtom_comparison(track1, track2, outfile)
Beispiel #3
0
def exportMotifControlSequences(infile, outfile):
    '''for each interval, export the left and right
    sequence segment of the same size.
    '''
    PipelineMotifs.exportSequencesFromBedFile(infile,
                                              outfile,
                                              masker=PARAMS['motifs_masker'],
                                              mode="leftright")
Beispiel #4
0
def exportMotifIntervalSequences(infile, outfile):
    '''export sequences for motif detection.

    This method requires the _interval tables.
    '''
    PipelineMotifs.exportSequencesFromBedFile(infile,
                                              outfile,
                                              masker=PARAMS['motifs_masker'])
Beispiel #5
0
def loadMast(infile, outfile):
    '''parse mast file and load into database.

    Parse several motif runs and add them to the same
    table.

    Add columns for the control data as well.
    '''
    PipelineMotifs.loadMAST(infile, outfile)
Beispiel #6
0
def getMemeChipSeedMotifs(infile, outfile):
    ''' extract the seed motifs from the MEME-ChIP output'''

    motifs = infile
    track = os.path.basename(motifs)
    alignments = os.path.join(PARAMS["exportdir"], "memechip.dir", track,
                              "motif_alignment.txt")

    PipelineMotifs.getSeedMotifs(motifs, alignments, outfile)
Beispiel #7
0
def runMast(infiles, outfile):
    '''run mast on all intervals and motifs.

    Collect all results for an E-value up to 10000 so that all
    sequences are output and MAST curves can be computed.

    10000 is a heuristic.

    '''
    PipelineMotifs.runMAST(infiles, outfile)
Beispiel #8
0
def outputTomTomWithMotifEnrichment(infiles, outfile):
    '''Original motif enrichment is not outputted in runTomTom output.
    This adds this data'''
    seed_result = infiles[0]

    tomtom_result = infiles[1]

    # Get the list of seed motifs
    PipelineMotifs.add_motif_enrichment_to_tomtom(seed_result, tomtom_result,
                                                  outfile)
Beispiel #9
0
def runMeme(infile, outfile):
    '''run MEME to find motifs.

    In order to increase the signal/noise ratio, MEME is not run on
    all intervals but only the top 10% of intervals (peakval) are
    used.  Also, only the segment of 200 bp around the peak is used
    and not the complete interval.

    * Softmasked sequence is converted to hardmasked
      sequence to avoid the detection of spurious motifs.

    * Sequence is run through dustmasker

    '''
    PipelineMotifs.runMEMEOnSequences(infile, outfile)
Beispiel #10
0
def exportIntervalSequences(infile, outfile, track, method):
    '''export sequences for motif discovery.

    This method requires the _interval tables.

    For motif discovery, only the sequences with the highest S/N ratio
    are supplied.

    1. The top *motifs_proportion* intervals sorted by peakval
    2. Only a region +/- *motifs_halfwidth* around the peak
    3. At least *motifs_min_sequences*. If there are not enough sequences
          to start with, all will be used.
    4. At most *motifs_max_size* sequences will be output.

    '''
    dbhandle = connect()

    try:
        halfwidth = int(PARAMS[method + "_halfwidth"])
        full = False
    except ValueError:
        full = True
        halfwidth = None

    try:
        maxsize = int(PARAMS[method + "_max_size"])
    except ValueError:
        maxsize = None

    nseq = PipelineMotifs.writeSequencesForIntervals(
        track,
        outfile,
        dbhandle,
        full=full,
        masker=P.asList(PARAMS[method + '_masker']),
        halfwidth=halfwidth,
        maxsize=maxsize,
        num_sequences=PARAMS[method + "_num_sequences"],
        proportion=PARAMS[method + "_proportion"],
        min_sequences=PARAMS[method + "_min_sequences"],
        order=PARAMS[method + '_score'])

    if nseq == 0:
        E.warn("%s: no sequences - %s skipped" % (outfile, method))
        P.touch(outfile)
Beispiel #11
0
def getDiscDremeSeeds(infiles, outfile):
    ''' extract seed motifs for MEME and DREME output '''

    alignments, motifs = infiles
    PipelineMotifs.getSeedMotifs(motifs, alignments, outfile)
Beispiel #12
0
def discDremeSelfMatches(infile, outfile):
    '''Compare output of MEME and DREME to itself so that similar 
    motifs within runs can be clustered '''

    PipelineMotifs.tomtom_comparison(infile, infile, outfile)
Beispiel #13
0
def runDiscDREME(infiles, outfile):
    '''Run discriminative DREME using control file speicied
    in design.tsv'''

    infile, negatives = infiles
    PipelineMotifs.runDREME(infile, outfile, neg_file=negatives)
Beispiel #14
0
def runRandDreme(infile, outfile):
    '''Run DREME with a randomised negative set'''

    PipelineMotifs.runDREME(infile, outfile)
Beispiel #15
0
def runDiscMEME(infiles, outfile):
    ''' Run MEME with PSP file, therefore making it 
    discrimenative'''

    psp, fasta = infiles
    PipelineMotifs.runMEMEOnSequences(fasta, outfile, psp=psp)
Beispiel #16
0
def loadTomTom(infile, outfile):
    '''load tomtom results'''
    PipelineMotifs.loadTomTom(infile, outfile)
Beispiel #17
0
def getDiscMEMEPSPFile(infiles, outfile):
    '''Get the position specific prior file to allow
    discriminative motif finding with meme '''

    pos, neg = infiles
    PipelineMotifs.generatePSP(pos, neg, outfile)
Beispiel #18
0
def runTomTom(infile, outfile):
    '''compare ab-initio motifs against a databse of known motifs'''
    PipelineMotifs.runTomTom(infile, outfile)