예제 #1
0
def info2seeds(N,infofile,probefile,species='YEAST'):
    G    = ProbeSet(species)
    IDs  = G.ids_from_file(probefile)
    Q    = EM.theMarkovBackground.zeroth()
 
    seqs = Fasta.seqs(infofile)
    
    if not N:
        nmers = seqs
    else:
        nmers= MotifTools.top_nmers(N,seqs)
        if len(nmers) > 1000: nmers = nmers[0:1000]
        
    print "Scoring enrichment of %d nmers from %s"%len(nmers,infofile)
    sys.stdout.flush()
    
    nmers_scoresT = []
    for nmer in nmers:
        if nmer.isalpha():
            p = G.p_value(nmer,IDs,'') #'verbose'
            nmers_scoresT.append((nmer,p))
    nmers_scoresT.sort(lambda x,y: cmp(x[1],y[1]))
    last = min(20,len(nmers_scoresT))
    models = []
    for i in range(last):
        seq = nmers_scoresT[i][0]
        m = MotifTools.Motif('',Q)
        m.compute_from_text(seq,0.1)
        models.append(m)
    for tup in nmers_scoresT[0:40]:
        print tup
    return(models)
예제 #2
0
def loadmotif(infile, trimstart=0, trimend=0):
    from TAMO import MotifTools
    lines = loadlist(infile)
    if lines[0] == "A\tC\tG\tT":
        ma = []
        for l in lines[1:]:
            p = l.split("\t")
            ma.append({
                'A': float(p[0]),
                'C': float(p[1]),
                'G': float(p[2]),
                'T': float(p[3])
            })
        if trimend == 0: ma = ma[trimstart:]
        else: ma = ma[trimstart:-trimend]
        return MotifTools.Motif_from_counts(ma)
    elif lines[0][0] in 'ACGT':
        if trimend == 0: lines = lines[trimstart:]
        else: lines = lines[trimstart:-trimend]
        return MotifTools.Motif(lines)
    else:
        na = []
        for line in lines:
            na.append(list(map(int, line.split())))
        ma = []
        for i in range(len(na[0])):
            ma.append({
                'A': na[0][i],
                'C': na[1][i],
                'G': na[2][i],
                'T': na[3][i]
            })
        return MotifTools.Motif_from_counts(ma)
예제 #3
0
def test():
    motifs = []
    betalist = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 4.0]
    for beta in [1.0]:
        m = MotifTools.Motif()
        m.compute_from_text('GGTTTCAT', beta)  #STE12 binding site
        print m
        m._print_ll()
        print "Against Ste12:"
        match = validate(m, "STE12", 'V', 'T')
        print "Against Fkh2:"
        fmatch = validate(m, "FKH2", 'V', 'T')
        print beta, match, fmatch