Exemple #1
0
from FASTA import *
import numpy
import pylab as P
P.ion()

nucleotides = ['G', 'A', 'T', 'C']

nucleotide_to_index = {}
for i, nuc in enumerate(nucleotides):
    nucleotide_to_index[nuc] = i

# build PSSM on yeast genome:
yeast = FASTA('s_cerevisiae.fasta')

# motif is TATAwxyzuv
motif_start = 'TATA'
motif_length = 10

pseudo_count = 1
count_pssm = numpy.zeros((motif_length, 4)) + 1

num_matches = 0
for chromosome_name, chromosome_sequence in yeast.accession_to_sequence.items(
):
    print 'processing', chromosome_name
    for i in xrange(len(chromosome_sequence) - motif_length):
        sl = chromosome_sequence[i:i + motif_length]
        if sl.startswith(motif_start):
            num_matches += 1
            for i, nuc in enumerate(sl):
                nuc_index = nucleotide_to_index[nuc]