def motif_count(sequences, start_at=4, stop_at=8): """Return a dict of motif counts for all motifs, lengths 4 through 7. The output dict is nested like so: motifs = { 4: { 'GGAG': 5, } } sequences -- A list of sequences """ motifs = dict() for motif_length in range(start_at, stop_at): motifs[motif_length] = dict() for sequence in sequences: for motif in possible_motifs_by_length(motif_length): if motif not in motifs[motif_length]: motifs[motif_length][motif] = 0 if sequence.find(motif) != -1: motifs[motif_length][motif] += overlap_count( sequence, motif) return motifs
def test_count_with_overlap(self): count = cStrings.overlap_count("GGAGGAGG", "GGAGG") self.assertEqual(count, 2)