def combineMotifs(): print "Running" results = [] for tool in foundMotifs: others = list(set(foundMotifs) - set([tool])) for motif in foundMotifs[tool]: mscore = 0 for oTool in others: for oMotif in foundMotifs[oTool]: mscore += compare(motif, oMotif) results += [[mscore, motif]] results.sort() for m in results: print m[1], m[0] # search for top scoring dissimilar motifs mlist.append(results[-1][1]) for score, newMotif in results[::-1][1:]: maxSimilarity = 0 for oldMotif in mlist: maxSimilarity = max(maxSimilarity, compare(newMotif, oldMotif)) if maxSimilarity < 4: mlist.append(newMotif) print "choosing motif:", newMotif, maxSimilarity else: print "not choosing", newMotif, maxSimilarity if len(mlist) > 2: break print mlist for mnum, m in enumerate(mlist): searchFasta(POS_SEQ, RES_DIR, m, mnum+1)
def combineMotifs(): print "Combining motifs" results = [] for tool in foundMotifs: others = list(set(foundMotifs) - set([tool])) for motif in foundMotifs[tool]: if 'TATATA' in motif[0]: continue mscore = 0 compares = 0 for oTool in others: for oMotif in foundMotifs[oTool]: compares += 1 mscore += compare(motif[0], oMotif[0], pos1=motif[1], pos2=oMotif[1]) results += [[mscore/compares, motif[0]]] results.sort() with open("results/compareScores.out", 'w') as cout: for score, motif in results: cout.write(motif + '\t%.3f\n'%score) mlist.append(results[-1]) for score, newMotif in results[::-1][1:]: maxSimilarity = 0 for oldMotif in mlist: maxSimilarity = max(maxSimilarity, compare(newMotif, oldMotif[1])) if maxSimilarity < 4: mlist.append([score, newMotif]) # stop if requested number of results have been collected if len(mlist) > NUM_VIS - 1: break print mlist for mnum, m in enumerate(mlist): searchFasta(FILTERED_SEQ, RES_DIR, m[-1], mnum+1) return results
def voteRankMotifs(): seqs = {} with open(POS_SEQ, 'r') as fin: seqName = "" for line in fin: if line[0] == '>': seqName = line.strip()[1:] seqs[seqName] = "" else: seqs[seqName] = seqs[seqName] + line.strip() # actually do the ranking results = voteRank(seqs, foundMotifsSeqs) # take the top result mlist.append(results[-1]) for score, newMotif in results[::-1][1:]: # try the next highest ranked motif maxSimilarity = 0 for oldMotif in mlist: maxSimilarity = max(maxSimilarity, compare(newMotif, oldMotif[1])) if maxSimilarity < 4: # include it if it isn't too similar to previously found motifs mlist.append([score, newMotif]) # stop if requested number of results have been collected if len(mlist) > NUM_VIS - 1: break print mlist for mnum, m in enumerate(mlist): searchFasta(FILTERED_SEQ, RES_DIR, m[-1], mnum+1)
def combineMotifs(): print "Running" results = [] for tool in foundMotifs: others = list(set(foundMotifs) - set([tool])) for motif in foundMotifs[tool]: mscore = 0 for oTool in others: for oMotif in foundMotifs[oTool]: mscore += compare(motif, oMotif) results += [[mscore, motif]] results.sort() for m in results: print m[1], m[0] # search for top scoring motif mlist.append(results[-1][1]) searchFasta(POS_SEQ, RES_DIR, results[-1][1], 1)
def searchFasta(fName, resultsDir, motif, motifnum): print "~",motifnum,"#" sInstances = {0:[], -1:[], -2:[]} threshold = 0 sequenceName = "" mLen = len(motif) with open(fName, "r") as f: fileText = f.read().split("\n") fLen = len(fileText) lineNum = -1 while lineNum < fLen - 1: # retrieve line lineNum += 1 line = fileText[lineNum] if len(line) == 0: # ignore blank lines continue if line[0] == '>': # beginning of new sequence, save sequence name sequenceName = line[1:].strip() continue # retrieve sequence sequence = "" while lineNum < fLen and len(fileText[lineNum]) != 0 and fileText[lineNum][0] != '>': sequence += fileText[lineNum].strip() lineNum += 1 # print "|" + fileText[lineNum] + "|" + str(lineNum) if lineNum < fLen and len(fileText[lineNum]) > 0 and fileText[lineNum][0] == '>': lineNum -= 1 sLen = len(sequence) # search sequence sPos = 0 while sPos < sLen - mLen: match = sequence[sPos:sPos+mLen] score = compare(motif, match) # if match is better if score > min(sInstances.keys()): if diff(motif, match) < diff(motif, sequence[sPos+1:sPos+mLen+1]): sPos += 1 continue if not score in sInstances: # add score to found instances and remove previous min sInstances[score] = [] del sInstances[min(sInstances.keys())] sInstances[score].append([match, score, sequenceName, str(sLen-sPos), sLen]) sPos += mLen #end if sPos += 1 #end while #end while print motif + "\n Instances:" instances = [] for key in sInstances: for instance in sInstances[key]: instances.append(instance) # write to file for visualisation with open(resultsDir + "motif" + str(motifnum) + ".instances", "w") as fout: #pdb.set_trace() for instance in instances: fout.write(">"+instance[2]+"\t"+str(instance[3])+"|"+str(instance[4])\ +"\n"+instance[0]+"\n\n") print makePWM(mLen, [x[0] for x in instances])[0]