Ejemplo n.º 1
0
def getKmersWithOneMisMtch(motif1, motifListWithMetrics):
    """
    Takes a TAMO motif and a list of lists: [TAMOmotif, weightMetric].
    Returns listOfLists in same form, if a revComp in motifListWithMetrics
    matches better, _IT_ is returned instead of the original motif.
    
    Restricts motif1 from collecting any motifs that are not of leng +/- 1 of itself.
    """
    resultList = []
    
    for mWithMetric in motifListWithMetrics:
        if len(motif1)-1 <= len(mWithMetric[0]) <= len(motif1)+1:
            # Determine what distanceResult == one misMatch.
            # Use length of shortest motif for misMatch Calc
            maxAlnLen   = min(len(motif1), len(mWithMetric[0]))
            kMer        = Motif('%s' %('A'*maxAlnLen))
            kMer1mis    = Motif('%s%s' %('A'*(maxAlnLen-1),'T'))  # whether at end or in middle the 'T gives same align score'
            oneMisMatch = MotifCompare.minshortestoverhangdiff(kMer,kMer1mis)
            bestOri = getMinDiffOri(motif1,mWithMetric[0])
            if bestOri[1] <= oneMisMatch:
                resultList.append([bestOri[0],mWithMetric[1]]) # keep [bestOriTAMOmotif, weightMetric]
            
    return resultList
Ejemplo n.º 2
0
def getMinDiffOri(motif1,motif2,minoverlap=6,getOffset=False):     ##originally had this at end of func def. dunno why->   , N=1, keepLen=0):
    """
    Takes two TAMO motifs.  Calculates TAMO.Clustering.MotifCompare.minshortestoverhangdiff
    for motif1 against motif2 and the rvcmp of motif2.  Returns a tuple containing the TAMO
    motif obj of motif2 that produced the least distance result and the distance result.
    
    (motif2, distResult) -OR- (motif2_rc, distResult)
    if getOffset:
    (motif2, distResult, offset) -OR- (motif2_rc, distResult, offset)
    """
    
    dist = MotifCompare.minshortestoverhangdiff(motif1,motif2,minoverlap=minoverlap,want_DistAndOff=1)

        
    if getOffset:    
        if dist[2]:
            return (motif2.revcomp(), dist[0],dist[1])
        else:
            return (motif2, dist[0],dist[1])
    else:
        if dist[2]:
            return (motif2.revcomp(), dist[0])
        else:
            return (motif2, dist[0])
Ejemplo n.º 3
0
print "loading vars..."
motifList = map(lambda line: MotifTools.Motif_from_text(line.strip()),open(\
    '/Users/biggus/Documents/James/Writings_Talks/Grants/09_Feb/PrelimData_Grant_Feb09/Clus2_kmerSearch-0.01.8mers.motifs.txt','r').readlines())

dMat      = ''

print "constructing distanceMatrix..."
dM_t1 = time()
distanceMatrix={}
for i in range(len(motifList)):
    print 'motif %s of %s' % (i+1,len(motifList))
    distanceMatrix[i]={}
    for j in range(len(motifList)):
	# check fwd and revCmp alignments and take the lowest
	fwd_diff    = MotifCompare.minshortestoverhangdiff(motifList[i],motifList[j])
	revCmp_diff = MotifCompare.minshortestoverhangdiff(motifList[i].revcomp(),motifList[j])
	print 'Fwd: %s\nRev: %s\n' % (fwd_diff,revCmp_diff)
	distanceMatrix[i][j] = min([fwd_diff,revCmp_diff])
dM_t2 = time()
pprint(distanceMatrix)
print 'distanceMatrix took %.4f sec.' % (dM_t2-dM_t1) 
		
print "discovering clusters..."
# --Using Kmedoids --
clusterOut = Kmedoids.bestaveKMedoids_cluster(distanceMatrix,kmax=30)

for c in clusterOut[1]:
    print 'cluster_%s:' % (c)
    for m in clusterOut[1][c]:
	print motifList[m].oneletter
motifs_tfbs = MotifTools.load(file_tfbs) 

match_dict = {}
for unknown in motifs_unknown:
  tf_list = []
  for tfbs in motifs_tfbs:
    #print 
    #print "Comparing motifs:"
    #print "    %s  vs  %s" % (unknown.source, tfbs.source)
    #print "    Unknown motif ( %s ) vs TFBS ( %s ) " % (unknown, tfbs)
    #print
    joined_motifs = []
    joined_motifs.append(unknown)
    joined_motifs.append(tfbs)
    print joined_motifs
    Dmat = MotifCompare.computeDmat(joined_motifs)
    cluster = Kmedoids.bestaveKMedoids_cluster(Dmat,kmax=2,min_dist=0.2)
    for i in cluster[1].keys():
      cluster_list = cluster[1][i]
      if len(cluster_list) > 1:
        dist = Dmat[0][1]
        tf_list.append( (dist, tfbs.source, tfbs.oneletter) )
        #print 
        #print "*** Motif match found! ***" 
        #tfbs.giflogo("%s" % tfbs.oneletter)
        #for i in cluster_list:
          #print joined_motifs[i]
  novel_id = unknown.source
  tf_list.sort()
  match_dict[novel_id] = {'query_motif': unknown.oneletter,
                          'subject': tf_list }