from FASTA import * import numpy import pylab as P P.ion() nucleotides = ['G', 'A', 'T', 'C'] nucleotide_to_index = {} for i, nuc in enumerate(nucleotides): nucleotide_to_index[nuc] = i # build PSSM on yeast genome: yeast = FASTA('s_cerevisiae.fasta') # motif is TATAwxyzuv motif_start = 'TATA' motif_length = 10 pseudo_count = 1 count_pssm = numpy.zeros((motif_length, 4)) + 1 num_matches = 0 for chromosome_name, chromosome_sequence in yeast.accession_to_sequence.items( ): print 'processing', chromosome_name for i in xrange(len(chromosome_sequence) - motif_length): sl = chromosome_sequence[i:i + motif_length] if sl.startswith(motif_start): num_matches += 1 for i, nuc in enumerate(sl): nuc_index = nucleotide_to_index[nuc]