def pattern_matching_mismatches_copy_pseudocode(text, k, d):
    frequent_patterns = set()
    close = []
    frequency_array = []
    for i in range(4**k):
        close.append(False)
        frequency_array.append(0)

    for i in range(len(text) - (k)):
        neighbourhood = neighbours(text[i:i+k], d)
        for pattern in neighbourhood:
            index = pattern_to_number(pattern)
            close[index] = True

    for i in range(4**k):
        if close[i]:
            pattern = number_to_pattern(i, k)
            frequency_array[i] = approx_pattern_count(text, pattern, d)

    max_count = max(frequency_array)
    for i in range(4**k):
        if frequency_array[i] == max_count:
            pattern = number_to_pattern(i, k)
            frequent_patterns.add(pattern)

    return frequent_patterns
Example #2
0
def median_string(dna_list, k):
    median = None
    distance = None
    for i in xrange((4 ** k) - 1):
        pattern = number_to_pattern(i, k)
        dist = distance_between_pattern_and_strings(pattern, dna_list)
        if distance is None or dist < distance:
            distance = dist
            median = pattern
    return median