def runMemeChIP(infiles, outfile): '''Run the MEME-ChIP pipeline, optionally with reference motif sequences''' infile, motifs = infiles[0], infiles[1:] PipelineMotifs.runMemeCHIP(infile, outfile, motifs)
def compareMemeChipTracks(infiles, outfile): '''Use tomtom to look if simlar motifs have been foundin more than one track ''' track1, track2 = infiles PipelineMotifs.tomtom_comparison(track1, track2, outfile)
def exportMotifControlSequences(infile, outfile): '''for each interval, export the left and right sequence segment of the same size. ''' PipelineMotifs.exportSequencesFromBedFile( infile, outfile, masker=PARAMS['motifs_masker'], mode="leftright")
def exportMotifIntervalSequences(infile, outfile): '''export sequences for motif detection. This method requires the _interval tables. ''' PipelineMotifs.exportSequencesFromBedFile( infile, outfile, masker=PARAMS['motifs_masker'])
def loadMast(infile, outfile): '''parse mast file and load into database. Parse several motif runs and add them to the same table. Add columns for the control data as well. ''' PipelineMotifs.loadMAST(infile, outfile)
def runMast(infiles, outfile): '''run mast on all intervals and motifs. Collect all results for an E-value up to 10000 so that all sequences are output and MAST curves can be computed. 10000 is a heuristic. ''' PipelineMotifs.runMAST(infiles, outfile)
def outputTomTomWithMotifEnrichment(infiles, outfile): '''Original motif enrichment is not outputted in runTomTom output. This adds this data''' seed_result = infiles[0] tomtom_result = infiles[1] # Get the list of seed motifs PipelineMotifs.add_motif_enrichment_to_tomtom(seed_result, tomtom_result, outfile)
def getMemeChipSeedMotifs(infile, outfile): ''' extract the seed motifs from the MEME-ChIP output''' motifs = infile track = os.path.basename(motifs) alignments = os.path.join(PARAMS["exportdir"], "memechip.dir", track, "motif_alignment.txt") PipelineMotifs.getSeedMotifs(motifs, alignments, outfile)
def runMeme(infile, outfile): '''run MEME to find motifs. In order to increase the signal/noise ratio, MEME is not run on all intervals but only the top 10% of intervals (peakval) are used. Also, only the segment of 200 bp around the peak is used and not the complete interval. * Softmasked sequence is converted to hardmasked sequence to avoid the detection of spurious motifs. * Sequence is run through dustmasker ''' PipelineMotifs.runMEMEOnSequences(infile, outfile)
def exportIntervalSequences(infile, outfile, track, method): '''export sequences for motif discovery. This method requires the _interval tables. For motif discovery, only the sequences with the highest S/N ratio are supplied. 1. The top *motifs_proportion* intervals sorted by peakval 2. Only a region +/- *motifs_halfwidth* around the peak 3. At least *motifs_min_sequences*. If there are not enough sequences to start with, all will be used. 4. At most *motifs_max_size* sequences will be output. ''' dbhandle = connect() try: halfwidth = int(PARAMS[method+"_halfwidth"]) full = False except ValueError: full = True halfwidth = None try: maxsize = int(PARAMS[method+"_max_size"]) except ValueError: maxsize = None nseq = PipelineMotifs.writeSequencesForIntervals( track, outfile, dbhandle, full=full, masker=P.as_list(PARAMS[method+'_masker']), halfwidth=halfwidth, maxsize=maxsize, num_sequences=PARAMS[method+"_num_sequences"], proportion=PARAMS[method+"_proportion"], min_sequences=PARAMS[method+"_min_sequences"], order=PARAMS[method+'_score']) if nseq == 0: E.warn("%s: no sequences - %s skipped" % (outfile, method)) P.touch(outfile)
def getDiscDremeSeeds(infiles, outfile): ''' extract seed motifs for MEME and DREME output ''' alignments, motifs = infiles PipelineMotifs.getSeedMotifs(motifs, alignments, outfile)
def discDremeSelfMatches(infile, outfile): '''Compare output of MEME and DREME to itself so that similar motifs within runs can be clustered ''' PipelineMotifs.tomtom_comparison(infile, infile, outfile)
def runDiscDREME(infiles, outfile): '''Run discriminative DREME using control file speicied in design.tsv''' infile, negatives = infiles PipelineMotifs.runDREME(infile, outfile, neg_file=negatives)
def runRandDreme(infile, outfile): '''Run DREME with a randomised negative set''' PipelineMotifs.runDREME(infile, outfile)
def runDiscMEME(infiles, outfile): ''' Run MEME with PSP file, therefore making it discrimenative''' psp, fasta = infiles PipelineMotifs.runMEMEOnSequences(fasta, outfile, psp=psp)
def loadTomTom(infile, outfile): '''load tomtom results''' PipelineMotifs.loadTomTom(infile, outfile)
def getDiscMEMEPSPFile(infiles, outfile): '''Get the position specific prior file to allow discriminative motif finding with meme ''' pos, neg = infiles PipelineMotifs.generatePSP(pos, neg, outfile)
def runTomTom(infile, outfile): '''compare ab-initio motifs against a databse of known motifs''' PipelineMotifs.runTomTom(infile, outfile)