def info2seeds(N,infofile,probefile,species='YEAST'): G = ProbeSet(species) IDs = G.ids_from_file(probefile) Q = EM.theMarkovBackground.zeroth() seqs = Fasta.seqs(infofile) if not N: nmers = seqs else: nmers= MotifTools.top_nmers(N,seqs) if len(nmers) > 1000: nmers = nmers[0:1000] print "Scoring enrichment of %d nmers from %s"%len(nmers,infofile) sys.stdout.flush() nmers_scoresT = [] for nmer in nmers: if nmer.isalpha(): p = G.p_value(nmer,IDs,'') #'verbose' nmers_scoresT.append((nmer,p)) nmers_scoresT.sort(lambda x,y: cmp(x[1],y[1])) last = min(20,len(nmers_scoresT)) models = [] for i in range(last): seq = nmers_scoresT[i][0] m = MotifTools.Motif('',Q) m.compute_from_text(seq,0.1) models.append(m) for tup in nmers_scoresT[0:40]: print tup return(models)
def loadmotif(infile, trimstart=0, trimend=0): from TAMO import MotifTools lines = loadlist(infile) if lines[0] == "A\tC\tG\tT": ma = [] for l in lines[1:]: p = l.split("\t") ma.append({ 'A': float(p[0]), 'C': float(p[1]), 'G': float(p[2]), 'T': float(p[3]) }) if trimend == 0: ma = ma[trimstart:] else: ma = ma[trimstart:-trimend] return MotifTools.Motif_from_counts(ma) elif lines[0][0] in 'ACGT': if trimend == 0: lines = lines[trimstart:] else: lines = lines[trimstart:-trimend] return MotifTools.Motif(lines) else: na = [] for line in lines: na.append(list(map(int, line.split()))) ma = [] for i in range(len(na[0])): ma.append({ 'A': na[0][i], 'C': na[1][i], 'G': na[2][i], 'T': na[3][i] }) return MotifTools.Motif_from_counts(ma)
def test(): motifs = [] betalist = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 4.0] for beta in [1.0]: m = MotifTools.Motif() m.compute_from_text('GGTTTCAT', beta) #STE12 binding site print m m._print_ll() print "Against Ste12:" match = validate(m, "STE12", 'V', 'T') print "Against Fkh2:" fmatch = validate(m, "FKH2", 'V', 'T') print beta, match, fmatch