def frequent_words_with_mismatches_by_sorting(text, k, d):
    frequent_patterns = []
    neighborhoods = []
    index = []
    count = []
    for i in range(0,len(text)-k +1):
        neighborhoods.append(neighbors(text[i:i+k],d))
    neighborhoods = [item for sublist in neighborhoods for item in sublist]
    neighborhood_array = neighborhoods
    for i in range(0, len(neighborhoods)):
        index.append(0)
        count.append(0)
    for i in range(0,len(neighborhoods)):
        pattern = neighborhood_array[i]
        index[i] = p_to_n_modify(pattern)
        count[i] = 1
    sorted_index = sorted(index)
    for i in range(0, neighborhoods.__len__()-1):
        if sorted_index[i] == sorted_index[i+1]:
            count[i+1] = count[i] +1
    maxcount = max(count)
    for i in range(0, len(neighborhoods)):
        if count[i] == maxcount:
            pattern = n_to_p_modify(sorted_index[i], k)
            frequent_patterns.append(pattern)
    return frequent_patterns
Example #2
0
def motif_enumeration(dna, k, d):
    patterns = []
    for slice in dna:
        for i in range(0,len(slice)-k +1):
            neighborhood = set(neighbors(slice[i:i+k],d))
            for pattern in neighborhood:
                count = 0
                for slice_II in dna:
                    if patterncount_with_mismatchs(slice_II,pattern,d) > 0:
                        count += 1
                if count >= len(dna):
                    patterns.append(pattern)
    patterns = set(patterns)
    return patterns
def frequent_words_with_mismatches(text, k, d):
    frequent_patterns = []
    close = []
    frequent_array = []
    for i in range(0, pow(4,k)):
        close.append(0)
        frequent_array.append(0)
    for i in range(0,len(text)-k+1):
        neighborhood = neighbors(text[i:i+k],d)
        for pattern in neighborhood:
            index = p_to_n_modify(pattern)
            close[index] = 1
    for i in range(0, pow(4,k)):
        if close[i] == 1:
            pattern = n_to_p_modify(i,k)
            frequent_array[i] = len(approximate_pattern_matching(pattern,text,d))
    max_count = max(frequent_array)
    for i in range(0,pow(4,k)):
        if frequent_array[i] == max_count:
            pattern = n_to_p_modify(i,k)
            frequent_patterns.append(pattern)
    return frequent_patterns