コード例 #1
0
ファイル: AlignAce.py プロジェクト: adamlabadorf/TAMO
 def _parse(self):
     i = 0
     while i < len(self.lines):
         line = self.lines[i]
         toks = line.split()
         if '-i' in toks and not self.fastafile:
             idx = toks.index('-i')
             self.fastafile = toks[idx+1]
         if (len(toks) > 0 and toks[0] == 'Motif'):
             seqs = []
             while 1:
                 i = i + 1
                 line = self.lines[i]
                 toks = line.split()
                 if toks[0][0] == '*': break
                 seqs.append(toks[0])
             M   = Motif(seqs)
             i = i + 1
             line = self.lines[i]
             toks = line.split()
             MAP = float(toks[2])
             if MAP > 1000: MAP = 0  #likely to be an AlignACE error
             M.MAP = MAP
             self.motifs.append(M)
         i = i + 1
     self.nmotifs = len(self.motifs)
コード例 #2
0
def generateMotifs(seqGroups, align, outputPrefix, transSeq=False,
                        extendAlphabet=False, clusterMotifs=False, protein=False, threads=2, stream=None):
    from TAMO.MotifTools import Motif
    ighvMotifs = []
    if clusterMotifs and 'gene' in outputPrefix:
        findMotifClusters(ighvMotifs, outputPrefix, stream=stream)
    printto(stream, '\t\tPWMs, consensus and logos are being generated for {} motifs ... '.format(len(seqGroups)))
    pwmFile = open(outputPrefix + '_pwm.txt', 'w')
    consensusFile = open(outputPrefix + '_consensus.txt', 'w')
    logosFolder = outputPrefix + '_logos'

    if not os.path.exists(logosFolder):
        os.makedirs(logosFolder)

    # create the sequence alphabet: DNA or Protein
    alphabet = createAlphabet(align, transSeq, extendAlphabet, protein)
    groups = seqGroups.keys()
    groups.sort()        
    
    for group in groups:    
        filename = os.path.join(logosFolder, group.replace('/', '') + '.png')
        seqs = seqGroups[group]
        m = generateMotif(seqs, group, alphabet, filename, align, transSeq, protein, outDir=logosFolder,
                          threads=threads, stream=stream)
        if m is None:
            # motif file found, no further work required
            return
        motifSeqs = m.instances
        pwm = m.counts.normalize(pseudocounts=None)  # {'A':0.6, 'C': 0.4, 'G': 0.4, 'T': 0.6}
        consensusMax = str(m.consensus)      
               
        pwmFile.write('#{} {} sequences\n'.format(group, len(motifSeqs)))
        pwmFile.write(str(pwm))  
        consensusFile.write('>{} max_count\n'.format(group))
        consensusFile.write(consensusMax + '\n')      
    #             print(str(m.anticonsensus)) # smallest values in the columns
        if not transSeq and not align and not protein:
            consensusIupac = str(m.degenerate_consensus)
    #             print(consensusIupac) # IUPAC ambiguous nucleotides            
            consensusFile.write('>{} degenerate\n'.format(group))
            consensusFile.write(consensusIupac + '\n')
        
        pwmFile.flush()
        consensusFile.flush()
        gc.collect()
        if clusterMotifs and len(motifSeqs) > 10:
            motif = Motif(map(lambda x: str(x), motifSeqs),
                          backgroundD={'A': 0.6, 'C': 0.4, 'G': 0.4, 'T': 0.6}, id=group)
            motif.addpseudocounts(0.1)
            ighvMotifs.append(motif)
            
    pwmFile.close()
    consensusFile.close()      
    gc.collect()
    printto(stream, "\tPosition weight matrices are written to " + os.path.basename(outputPrefix + '_pwm.txt'))
    printto(stream, "\tConsensus sequences are written to " + os.path.basename(outputPrefix + '_consensus.txt'))
    if clusterMotifs:
        findMotifClusters(ighvMotifs, outputPrefix, stream=stream)
コード例 #3
0
ファイル: bioDefs.py プロジェクト: xguse/gusPyProj
def ifKmerInAll(kmer,dictOfSeqs, factor=1):
    kmer = Motif(kmer)
    results = []
    for seq in dictOfSeqs:
        temp = kmer.scan(dictOfSeqs[seq], factor=factor)
        if temp[0]:
            results.append(True)
        else:
            results.append(False)
        
    if False in results:
        return False
    else:
        return True
コード例 #4
0
ファイル: bioDefs.py プロジェクト: xguse/gusPyProj
def taMotif2MopatMatrix(taMotif):
    """
    Uses ONE tamo motif to produce a probability matrix compatible with MOPAT.
    RETURNS: [oneLetterName, [list of lists]]
    """
    name = taMotif.oneletter.replace('.','n')
    matrix = []
    
    # Use logLiklihoods to generate info for motif.counts
    taMotif = Motif(taMotif.bogus_kmers())
    
    for pos in taMotif.counts:
        mopatPos = []
        for nuc in sorted(pos.keys()):
            mopatPos.append(pos[nuc])
        matrix.append(mopatPos)
            
    return [name, matrix]
コード例 #5
0
ファイル: MDscan.py プロジェクト: malhamdoosh/abseqPy
 def _parse(self):
     'Parse MDscan file'
     alloutput = '\n'.join(self.lines)
     premotifs = alloutput.split('\nMtf ')
     print len(premotifs)
     for pm in premotifs:
         sublines = pm.split('\n')
         score, seednum = 0, 0
         seqs = []
         for line in sublines:
             if line.find('Final Motif') == 0:
                 toks = line.split()
                 score = float(toks[6])
                 seednum = int(toks[8])
             if line.find('>') == 0:
                 seqs.append(line.split()[-1])
         #print "SEQS: ",seqs
         if seqs:
             m = Motif(seqs)
             m.MAP = score
             m.seednum = seednum
             self.motifs.append(m)
コード例 #6
0
ファイル: MDscan.py プロジェクト: adamlabadorf/TAMO
 def _parse(self):
     'Parse MDscan file'
     alloutput = '\n'.join(self.lines)
     premotifs = alloutput.split('\nMtf ')
     print len(premotifs)
     for pm in premotifs:
         sublines = pm.split('\n')
         score, seednum = 0,0
         seqs = []
         for line in sublines:
             if line.find('Final Motif') == 0:
                 toks    = line.split()
                 score   = float(toks[6])
                 seednum = int(toks[8])
             if line.find('>') == 0:
                 seqs.append(line.split()[-1])
         #print "SEQS: ",seqs
         if seqs:
             m = Motif(seqs)
             m.MAP = score
             m.seednum = seednum
             self.motifs.append(m)
コード例 #7
0
from TAMO.MotifTools import Motif
import motility

tM = Motif('WGATAR')
sites = tM.bogus_kmers()

tM = Motif(sites)
mM = motility.make_pwm(sites)


s = 'ATGCATGCTAGCGGCTGATAACGCTTATCATATGC'

mReults = mM.find(s,mM.max_score()*0.75,)