Esempio n. 1
0
def runMemeChIP(infiles, outfile):
    '''Run the MEME-ChIP pipeline, optionally with 
    reference motif sequences'''

    infile, motifs = infiles[0], infiles[1:]

    PipelineMotifs.runMemeCHIP(infile, outfile, motifs)
Esempio n. 2
0
def compareMemeChipTracks(infiles, outfile):
    '''Use tomtom to look if simlar motifs have been foundin more than one
    track '''

    track1, track2 = infiles

    PipelineMotifs.tomtom_comparison(track1, track2, outfile)
Esempio n. 3
0
def exportMotifControlSequences(infile, outfile):
    '''for each interval, export the left and right
    sequence segment of the same size.
    '''
    PipelineMotifs.exportSequencesFromBedFile(
        infile, outfile,
        masker=PARAMS['motifs_masker'],
        mode="leftright")
Esempio n. 4
0
def exportMotifIntervalSequences(infile, outfile):
    '''export sequences for motif detection.

    This method requires the _interval tables.
    '''
    PipelineMotifs.exportSequencesFromBedFile(
        infile, outfile,
        masker=PARAMS['motifs_masker'])
Esempio n. 5
0
def loadMast(infile, outfile):
    '''parse mast file and load into database.

    Parse several motif runs and add them to the same
    table.

    Add columns for the control data as well.
    '''
    PipelineMotifs.loadMAST(infile, outfile)
Esempio n. 6
0
def runMast(infiles, outfile):
    '''run mast on all intervals and motifs.

    Collect all results for an E-value up to 10000 so that all
    sequences are output and MAST curves can be computed.

    10000 is a heuristic.

    '''
    PipelineMotifs.runMAST(infiles, outfile)
Esempio n. 7
0
def outputTomTomWithMotifEnrichment(infiles, outfile):
    '''Original motif enrichment is not outputted in runTomTom output.
    This adds this data'''
    seed_result = infiles[0]

    tomtom_result = infiles[1]

    # Get the list of seed motifs
    PipelineMotifs.add_motif_enrichment_to_tomtom(seed_result,
                                                  tomtom_result,
                                                  outfile)
Esempio n. 8
0
def getMemeChipSeedMotifs(infile, outfile):
    ''' extract the seed motifs from the MEME-ChIP output'''

    motifs = infile
    track = os.path.basename(motifs)
    alignments = os.path.join(PARAMS["exportdir"],
                              "memechip.dir",
                              track,
                              "motif_alignment.txt")
                              
    PipelineMotifs.getSeedMotifs(motifs, alignments, outfile)
Esempio n. 9
0
def runMeme(infile, outfile):
    '''run MEME to find motifs.

    In order to increase the signal/noise ratio, MEME is not run on
    all intervals but only the top 10% of intervals (peakval) are
    used.  Also, only the segment of 200 bp around the peak is used
    and not the complete interval.

    * Softmasked sequence is converted to hardmasked
      sequence to avoid the detection of spurious motifs.

    * Sequence is run through dustmasker

    '''
    PipelineMotifs.runMEMEOnSequences(infile, outfile)
Esempio n. 10
0
def exportIntervalSequences(infile, outfile, track, method):
    '''export sequences for motif discovery.

    This method requires the _interval tables.

    For motif discovery, only the sequences with the highest S/N ratio
    are supplied.

    1. The top *motifs_proportion* intervals sorted by peakval
    2. Only a region +/- *motifs_halfwidth* around the peak
    3. At least *motifs_min_sequences*. If there are not enough sequences
          to start with, all will be used.
    4. At most *motifs_max_size* sequences will be output.

    '''
    dbhandle = connect()

    try:
        halfwidth = int(PARAMS[method+"_halfwidth"])
        full = False
    except ValueError:
        full = True
        halfwidth = None

    try:
        maxsize = int(PARAMS[method+"_max_size"])
    except ValueError:
        maxsize = None

    nseq = PipelineMotifs.writeSequencesForIntervals(
        track,
        outfile,
        dbhandle,
        full=full,
        masker=P.as_list(PARAMS[method+'_masker']),
        halfwidth=halfwidth,
        maxsize=maxsize,
        num_sequences=PARAMS[method+"_num_sequences"],
        proportion=PARAMS[method+"_proportion"],
        min_sequences=PARAMS[method+"_min_sequences"],
        order=PARAMS[method+'_score'])

    if nseq == 0:
        E.warn("%s: no sequences - %s skipped" % (outfile, method))
        P.touch(outfile)
Esempio n. 11
0
def getDiscDremeSeeds(infiles, outfile):
    ''' extract seed motifs for MEME and DREME output '''

    alignments, motifs = infiles
    PipelineMotifs.getSeedMotifs(motifs, alignments, outfile)
Esempio n. 12
0
def discDremeSelfMatches(infile, outfile):
    '''Compare output of MEME and DREME to itself so that similar 
    motifs within runs can be clustered '''

    PipelineMotifs.tomtom_comparison(infile, infile, outfile)
Esempio n. 13
0
def runDiscDREME(infiles, outfile):
    '''Run discriminative DREME using control file speicied
    in design.tsv'''

    infile, negatives = infiles
    PipelineMotifs.runDREME(infile, outfile, neg_file=negatives)
Esempio n. 14
0
def runRandDreme(infile, outfile):
    '''Run DREME with a randomised negative set'''

    PipelineMotifs.runDREME(infile, outfile)
Esempio n. 15
0
def runDiscMEME(infiles, outfile):
    ''' Run MEME with PSP file, therefore making it 
    discrimenative'''

    psp, fasta = infiles
    PipelineMotifs.runMEMEOnSequences(fasta, outfile, psp=psp)
Esempio n. 16
0
def loadTomTom(infile, outfile):
    '''load tomtom results'''
    PipelineMotifs.loadTomTom(infile, outfile)
Esempio n. 17
0
def getDiscMEMEPSPFile(infiles, outfile):
    '''Get the position specific prior file to allow
    discriminative motif finding with meme '''

    pos, neg = infiles
    PipelineMotifs.generatePSP(pos, neg, outfile)
Esempio n. 18
0
def runTomTom(infile, outfile):
    '''compare ab-initio motifs against a databse of known motifs'''
    PipelineMotifs.runTomTom(infile, outfile)