def frequent_words_with_mismatches_v2(text, k, hamming_dist):
    """(Failed) attempt to improve the efficiency of v1"""
    #initialize frequency array
    frequency_array = list()
    for index in range(4 ** k):
        frequency_array.append(0)
    #list all patterns in text
    patterns_in_text = list()
    for index in range(len(text)-k+1):
        pattern = text[index:index+k]
        patterns_in_text.append(pattern)
    #create list of all possible kmers
    possible_kmers = all_possible_kmers(k)
    #go through patterns and compare to kmers
    for pattern in patterns_in_text:
        for kmer in possible_kmers:
            if patterncount.hamming_distance(pattern, kmer) <= hamming_dist:
                frequency_array[pattern_to_number(kmer)] = frequency_array[pattern_to_number(kmer)] + 1
    frequent_patterns = set([])
    max_count = max(frequency_array)
    for index in range(4 ** k):
        if frequency_array[index] == max_count:
            pattern = number_to_pattern(index, k)
            frequent_patterns = frequent_patterns.union(set([pattern]))
    return (frequent_patterns, max_count)
def frequent_words_with_mismatches_v1(text, k, hamming_dist):
    """Find the most frequent word(s) of length within a text, allowing up to
    hamming_dist mismatches. N.B. that the most frequent word need not actually
    occur in the text -- for example, input ("ATATA", 3, 2) returns 10 distinct
    patterns that occur twice, despite the fact that there are only two distinct
    3-mers that actually occur in the text, and one of those patterns that
    actually does occur in the text ('TAT') is not one of the most frequent
    3-mers. Runs very inefficiently; use v3 instead!"""
    #enumerate all possible kmers
    possible_kmers = set(all_possible_kmers(k))
    #enumerate kmers that actually occur in text
    patterns_in_text = list()
    for index in range(len(text)-k+1):
        pattern = text[index:index+k]
        patterns_in_text.append(pattern)
    print patterns_in_text
    #pare down to those that are within hamming_dist of each pattern in text
    relevant_kmers = set([])
    for kmer in possible_kmers:
        for pattern in patterns_in_text:
            if patterncount.hamming_distance(pattern, kmer) <= hamming_dist:
                relevant_kmers = relevant_kmers.union(set([kmer]))
                break
            else:
                pass
    relevant_kmers = relevant_kmers.union(patterns_in_text) #add all patterns that actually occur in text!
    #initialize frequency array
    frequency_array = list()
    for index in range(4 ** k):
        frequency_array.append(0)
    #loop through text and count frequencies
    for pattern in patterns_in_text:
        pattern_list = [kmer for kmer in relevant_kmers if patterncount.hamming_distance(kmer, pattern) <= hamming_dist]
        pattern_list = set(pattern_list)
        pattern_list = pattern_list.union(set([pattern]))
        code_list = [pattern_to_number(pat) for pat in pattern_list]
        for code in code_list:
            frequency_array[code] = frequency_array[code] + 1
    #count most frequent patterns
    frequent_patterns = set([])
    max_count = max(frequency_array)
    for index in range(4 ** k):
        if frequency_array[index] == max_count:
            pattern = number_to_pattern(index, k)
            frequent_patterns = frequent_patterns.union(set([pattern]))
    return (frequent_patterns, max_count)