Example #1
0
def DistanceBetweenPatternAndStrings(pattern, text):
	k = len(pattern)
	# print(k)
	dist = 0
	for str in text:
		HammingDistance = float("inf")
		for i in range(len(str)-k+1):
			if hamming_distance(pattern,str[i:i+k]) < HammingDistance:
				HammingDistance = hamming_distance(pattern,str[i:i+k])
		dist += HammingDistance
	return dist
def MotifScore(Motifs):
	consensus = [0 for i in range(len(Motifs[0]))]
	dist = 0
	for i in range(len(Motifs[0])):
		col = [motif[i] for motif in Motifs]
		items = dict((col.count(i),i) for i in col)
		consensus[i] = items[max(list(items.keys()))]
	consensus = ''.join(consensus)
	for motif in Motifs:
		dist += hamming_distance(consensus,motif)
	return dist
def approx_pattern_matching(pattern, text, d):
    matches = []
    for i in range(len(text) - len(pattern) + 1):
        if len(text[i:i + len(pattern)]) == len(pattern):
            val1 = pattern
            val2 = text[i:i + len(pattern)]
        else:
            val2 = text[i:i + len(pattern)]
            val1 = pattern[:len(val2)]

        if hamming_distance(val1, val2) <= d:
            matches.append(i)
    return matches
Example #4
0
def string_scores(pattern, strings):
    k = len(pattern) # need to examine k-mers of same length as pattern
    score = 0 #initialize score as zero
    
    # go through each string to identify closest match to pattern
    for s in strings:
        
        #set hamming distance as infinity 
        ham = float("inf")
        # for each bp in sequence
        for i, bp in enumerate(s):
            end_index = i+k - 1
    
            # if you haven't gone too far down the pattern (possibilitiy of finding k_mer still exists)        
            if end_index < len(s):
                k_mer = s[i:i+k] # picks out k_mer
                if hamming_distance(k_mer, pattern) < ham:
                    ham = hamming_distance(k_mer, pattern) # set ham to lowest hamming distance (closest k-mer)
        
        score += ham
        
    return score
def Neighbors(Pattern, d, nucleotides={'A', 'C', 'G', 'T'}):
    if d == 0:
        return Pattern
    elif len(Pattern) == 1:
        return nucleotides
    Neighborhood = []
    SuffixNeighbors = Neighbors(Pattern[1:], d)
    for Text in SuffixNeighbors:
        if hamming_distance(Pattern[1:], Text) < d:
            for x in nucleotides:
                Neighborhood.append(x + Text)
        else:
            Neighborhood.append(Pattern[0] + Text)
    return Neighborhood
def neighbors(pattern, d):
    neighborhood = set()
    neighborhood.add(pattern)
    if d == 0:
        return pattern
    if len(pattern) == 1:
        return ['A', 'C', 'G', 'T']
    suffix_neighbors = list([
        ''.join(p) for p in product(['A', 'C', 'G', 'T'], repeat=len(pattern))
    ])
    for text in suffix_neighbors:
        if hamming_distance(pattern, text) == d:
            neighborhood.add(text)
    return neighborhood
Example #7
0
def approx_pattern_count(pattern, text, d):
    positions = []
    for i, b in enumerate(text):
        # if index is far enough from end of sequence that possibility of finding pattern still exists
        if i < (1 + len(text) - len(pattern)):
            # if the section of text matches the pattern
            if text[i:i + len(pattern)] == pattern:
                positions.append(i)  # add it to the list of positions
            elif hamming_distance(pattern, text[i:i + len(pattern)]) <= d:
                positions.append(i)
    print positions
    print len(positions)

    return len(positions)
def Neighbors(pattern, d):
	if d == 0:
		return {pattern}
	if len(pattern) == 1:
		return {'A','C','G','T'}
	Neighborhood = set()
	SufficeNeighbors = Neighbors(pattern[1:],d)
	for str in SufficeNeighbors:
		if hamming_distance(str,pattern[1:]) < d:
			for base in Nucleotides:
				# print(base)
				Neighborhood.add(base+str)
		else:
			Neighborhood.add(pattern[0]+str)
	return Neighborhood
Example #9
0
def distance_between_pattern_and_strings(pattern, dna):
    k = len(pattern)
    distance = 0
    tmp = []
    for text in dna:
        text = text.strip()
        result = list([
            hamming_distance(text[i:i + k], pattern)
            for i in range(len(text) - k + 1)
        ])
        try:
            distance += min(result)
        except ValueError:
            pass
    return distance
def approximate_pattern(pattern, text ,n):
	result = []
	for i in range(len(text)-len(pattern)+1):
		if hamming_distance(pattern,text[i:i+len(pattern)]) <= n:
			result.append(i)
	return result