예제 #1
0
from TAMO import MotifTools 
from TAMO.seq import Fasta 
from TAMO.MotifMetrics import ProbeSet 
from TAMO.MD.AlignAce import AlignAce 
from TAMO.MD.MDscan import MDscan 
from TAMO.MD.Meme import Meme 
#from TAMO.DataSources import GO
from time import time

fastaPath    = '/Users/biggus/Documents/James/Data/ClusterDefs/TC-Fastas/TC-96.oneLine.fas'
clusterIDS   = Fasta.ids(fastaPath)
totalSeqs    = ProbeSet(fastaPath)  # !! this is wrong should proly be goodAffys

MDbg         = '/Users/biggus/Documents/James/Data/2KB/2kb_Sequence/2kb_Anopheles/2KBupTSS_goodAffyAGAPsFastasOUT.masked.nr.MD.bg'

outFile      = '/Users/biggus/Documents/James/Data/ClusterDefs/testTAMOmetrics.txt'

#theAce = AlignAce(fastaPath,width=10)

print 'running MDscan...'
tMD_1 = time()
MDmotifs   = MDscan(fastaPath) #,bgfile=MDbg)
tMD_2 = time()
MD_time = tMD_2-tMD_1
print 'MDscan took %.5f sec == %.3f min.\nMDscan found %s motifs.' % (MD_time,MD_time/60.0, len(MDmotifs.motifs))

print 'running MEME...'
tMeme_1 = time()
memeMotifs = Meme(fastaPath)
tMeme_2 = time()
Meme_time = tMeme_2-tMeme_1
예제 #2
0
from TAMO import MotifTools 
from TAMO.seq import Fasta 
from TAMO import MotifMetrics
from TAMO.MD.AlignAce import AlignAce 
from TAMO.MD.MDscan import MDscan 
from TAMO.MD.Meme import Meme 
from TAMO import Clustering
#from TAMO.DataSources import GO
from time import time

TC8_path = '/Users/biggus/Documents/James/Data/ClusterDefs/TC-Fastas/TC-8.fas'
TC8_ids  = Fasta.ids(TC8_path)
TC8_seqs = Fasta.seqs(TC8_path)
allSeqs  = MotifMetrics.ProbeSet('/Users/biggus/Documents/James/Data/2KB/2kb_Sequence/2kb_Anopheles/2KBupTSS_goodAffyAGAPsFastasOUT.masked.nr.fas')

outFile  = '/Users/biggus/Documents/James/Data/ClusterDefs/TC-8_MotifMetrics.5-12.txt'

roughBestKmers = []

for i in range(6,10):
    imers = MotifMetrics.top_nmers_seqs(i,TC8_seqs)
    roughBestKmers.extend(imers)
    print '%s %smers found.' % (len(imers), i)
    
kmerMetrics = ['Kmer\thGeoPval\tBinomOverRep\n']
    
for kmer in roughBestKmers:
    hGeoPval = allSeqs.Enrichment(kmer, TC8_ids)
    binom   = allSeqs.overrep(kmer,TC8_ids)
    kmerMetrics.append('%s\t%s\t%s\n' % (kmer,hGeoPval,binom))