def combineMotifs():
    print "Running"
    results = []
    for tool in foundMotifs:
        others = list(set(foundMotifs) - set([tool]))
        for motif in foundMotifs[tool]:
            mscore = 0
            for oTool in others:
                for oMotif in foundMotifs[oTool]:
                    mscore += compare(motif, oMotif)
            results += [[mscore, motif]]
    results.sort()
    for m in results:
        print m[1], m[0]
    # search for top scoring dissimilar motifs
    mlist.append(results[-1][1])
    for score, newMotif in results[::-1][1:]:
        maxSimilarity = 0
        for oldMotif in mlist:
            maxSimilarity = max(maxSimilarity, compare(newMotif, oldMotif))
        if maxSimilarity < 4:
            mlist.append(newMotif)
            print "choosing motif:", newMotif, maxSimilarity
        else:
            print "not choosing", newMotif, maxSimilarity
        if len(mlist) > 2:
            break
    print mlist
    for mnum, m in enumerate(mlist):
        searchFasta(POS_SEQ, RES_DIR, m, mnum+1)
Exemple #2
0
def combineMotifs():
    print "Combining motifs"
    results = []
    for tool in foundMotifs:
        others = list(set(foundMotifs) - set([tool]))
        for motif in foundMotifs[tool]:
            if 'TATATA' in motif[0]:
                continue
            mscore = 0
            compares = 0
            for oTool in others:
                for oMotif in foundMotifs[oTool]:
                    compares += 1
                    mscore += compare(motif[0], oMotif[0], pos1=motif[1], pos2=oMotif[1])
            results += [[mscore/compares, motif[0]]]
    results.sort()
    with open("results/compareScores.out", 'w') as cout:
        for score, motif in results:
            cout.write(motif + '\t%.3f\n'%score)
    mlist.append(results[-1])
    for score, newMotif in results[::-1][1:]:
        maxSimilarity = 0
        for oldMotif in mlist:
            maxSimilarity = max(maxSimilarity, compare(newMotif, oldMotif[1]))
        if maxSimilarity < 4:
            mlist.append([score, newMotif])
        # stop if requested number of results have been collected
        if len(mlist) > NUM_VIS - 1:
            break
    print mlist
    for mnum, m in enumerate(mlist):
        searchFasta(FILTERED_SEQ, RES_DIR, m[-1], mnum+1)
    return results
Exemple #3
0
def voteRankMotifs():
    seqs = {}
    with open(POS_SEQ, 'r') as fin:
        seqName = ""
        for line in fin:
            if line[0] == '>':
                seqName = line.strip()[1:]
                seqs[seqName] = ""
            else:
                seqs[seqName] = seqs[seqName] + line.strip()
    # actually do the ranking
    results = voteRank(seqs, foundMotifsSeqs)

    # take the top result
    mlist.append(results[-1])
    for score, newMotif in results[::-1][1:]:
        # try the next highest ranked motif
        maxSimilarity = 0
        for oldMotif in mlist:
            maxSimilarity = max(maxSimilarity, compare(newMotif, oldMotif[1]))
        if maxSimilarity < 4:
            # include it if it isn't too similar to previously found motifs
            mlist.append([score, newMotif])
        # stop if requested number of results have been collected
        if len(mlist) > NUM_VIS - 1:
            break
    print mlist
    for mnum, m in enumerate(mlist):
        searchFasta(FILTERED_SEQ, RES_DIR, m[-1], mnum+1)
def combineMotifs():
    print "Running"
    results = []
    for tool in foundMotifs:
        others = list(set(foundMotifs) - set([tool]))
        for motif in foundMotifs[tool]:
            mscore = 0
            for oTool in others:
                for oMotif in foundMotifs[oTool]:
                    mscore += compare(motif, oMotif)
            results += [[mscore, motif]]
    results.sort()
    for m in results:
        print m[1], m[0]
    # search for top scoring motif
    mlist.append(results[-1][1])
    searchFasta(POS_SEQ, RES_DIR, results[-1][1], 1)
Exemple #5
0
def searchFasta(fName, resultsDir, motif, motifnum):
    print "~",motifnum,"#"
    sInstances = {0:[], -1:[], -2:[]}
    threshold = 0
    sequenceName = ""
    mLen = len(motif)

    with open(fName, "r") as f:
        fileText = f.read().split("\n")
    fLen = len(fileText)
    lineNum = -1

    while lineNum < fLen - 1:
        # retrieve line
        lineNum += 1
        line = fileText[lineNum]

        if len(line) == 0:
            # ignore blank lines
            continue

        if line[0] == '>':
            # beginning of new sequence, save sequence name
            sequenceName = line[1:].strip()
            continue
        
        # retrieve sequence
        sequence = ""
        while lineNum < fLen and len(fileText[lineNum]) != 0 and fileText[lineNum][0] != '>':
            sequence += fileText[lineNum].strip()
            lineNum += 1
#        print "|" + fileText[lineNum] + "|" + str(lineNum)
        if lineNum < fLen and len(fileText[lineNum]) > 0 and fileText[lineNum][0] == '>':
            lineNum -= 1
        sLen = len(sequence)
        
        # search sequence
        sPos = 0
        while sPos < sLen - mLen:
            match = sequence[sPos:sPos+mLen]
            score = compare(motif, match)
            # if match is better
            if score > min(sInstances.keys()):
                if diff(motif, match) < diff(motif, sequence[sPos+1:sPos+mLen+1]):
                    sPos += 1
                    continue
                if not score in sInstances:
                    # add score to found instances and remove previous min
                    sInstances[score] = []
                    del sInstances[min(sInstances.keys())]
                sInstances[score].append([match, score, sequenceName, str(sLen-sPos), sLen])
                sPos += mLen
            #end if
            sPos += 1
        #end while
    #end while
    print motif + "\n Instances:"
    instances = []
    for key in sInstances:
        for instance in sInstances[key]:
            instances.append(instance)
    
    # write to file for visualisation
    with open(resultsDir + "motif" + str(motifnum) + ".instances", "w") as fout:
        #pdb.set_trace()
        for instance in instances:
            fout.write(">"+instance[2]+"\t"+str(instance[3])+"|"+str(instance[4])\
                    +"\n"+instance[0]+"\n\n")
    print makePWM(mLen, [x[0] for x in instances])[0]