Beispiel #1
0
def toTAMOmotifs(motifList,seqData=None):
    """Returns a list of TAMO motif Objects when given
    a list of the native motif Objs. 
    If seqData is a dict, it is assumed to be
    nucFreqs (seqData['A']==.26),
    else, it is assumed to be a path to a seqPopulation
    that the motifs will be used on for stats to be
    calculated against."""
    
    from TAMO.MotifTools import Motif_from_counts
    if not seqData:
        if type(seqData) == type({}):
            try:
                aFreq = seqData['A']
                cFreq = seqData['C']
                gFreq = seqData['G']
                tFreq = seqData['T']
            except KeyError:
                raise InvalidOptionError("toTAMOmotifs: Unrecognized key in seqData as dict.")
        elif type(seqData) == type(''):
            sDict = ParseFastA(seqData).toDict()
            seqData = seqStats(sDict)
            del(sDict)
            tot = float(seqData['nonNs'])
            aFreq = seqData['aCnt']/tot
            cFreq = seqData['cCnt']/tot
            gFreq = seqData['gCnt']/tot
            tFreq = seqData['tCnt']/tot
    else:
        aFreq = 0.25
        cFreq = 0.25
        gFreq = 0.25
        tFreq = 0.25
    
    tList = []
    for m in motifList:
        counts = []
        for i in range(len(m)):
            counts.append({'A':m.pwm['A'][i],'C':m.pwm['C'][i],'G':m.pwm['G'][i],'T':m.pwm['T'][i]})
        t = Motif_from_counts(counts,beta=0.01,bg={'A':aFreq,'C':cFreq,'G':gFreq,'T':tFreq})
        t.id = m.id
        try:
            t.sigvalue = m.sigvalue
        except AttributeError:
            pass
        tList.append(t)
    return tList
tamoMotifs = []

for jMat in jasparFiles:
    tempMat = map(lambda l: l.strip().split(), open(jMat, "rU").readlines())

    ## eval() inteligently converts text numbers to int or float!
    # for i in range(len(tempMat)):
    # for j in range(len(tempMat[i])):
    # tempMat[i][j] = eval(tempMat[i][j])

    # transpose matrix
    tempMat = transpose(mat(tempMat)).tolist()

    for i in range(len(tempMat)):
        tempMat[i] = {
            "A": eval(tempMat[i][0]),
            "C": eval(tempMat[i][1]),
            "G": eval(tempMat[i][2]),
            "T": eval(tempMat[i][3]),
        }
    jasparTAMO_Motif = Motif_from_counts(tempMat, bg=nucBack)
    jasparTAMO_Motif.sourceFile = jMat.split("/")[-1]
    tamoMotifs.append(jasparTAMO_Motif)

# print to file
cPickle.dump(tamoMotifs, open(pklFile, "w"))
save_motifs(tamoMotifs, tmoFile, kmer_count=60)


print "Done."