def generate_null_distribution(seq_x,seq_y, scoring_matrix, num_trials): retdic = {} #cnt = 0 while num_trials > 0: #seq_y = random.shuffle(seq_y) #seq_y = ''.join(random.sample(seq_y,len(seq_y))) l = list(seq_y) random.shuffle(l) seq_y = ''.join(l) alignment_matrix = PR.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, False) #print alignment_matrix res = PR.compute_local_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix) #calc MAX '''vals = [] for fst in alignment_matrix: vals.append(max(fst)) ith = vals.index(max(vals)) jth = alignment_matrix[ith].index(max(alignment_matrix[ith])) ''' #print "ALI = ", alignment_matrix[ith][jth] #print "res = ", res[0] #print "------------" #ali0 = alignment_matrix[ith][jth] #~!!!!!!!!!!!!!!!!!!!!!!!!!! #print "score = ", res[0] if retdic.has_key(res[0]): temp = retdic[res[0]] retdic[res[0]] = temp+1 else: retdic[res[0]] = 1 #cnt += 1 num_trials -= 1 return retdic
def check_spelling(checked_word, dist, word_list): matrix = PR.build_scoring_matrix(alphabet, 2 ,1, 0) retlist = [] str1 = checked_word for ith in word_list: str2 = ith alig = PR.compute_alignment_matrix(str1, str2, matrix, True) res = PR.compute_global_alignment(str1, str2, matrix, alig) edit_dist = len(str1)+len(str2)-res[0] if edit_dist <= dist: retlist.append(str2) return retlist
newalign_y = "" trail = True for ith, jth in zip(align_x, align_y): if (ith or jth) == "-" and trail == True: trail = True else: newalign_x += ith newalign_y += jth trail = False ''' oldscore = score score = 0 for ith, jth in zip(align_x, align_y): score += scoring_matrix[ith][jth] #score = alignment_matrix[len(seq_x)][len(seq_y)] print "Oldscore = ", oldscore print "VALIDANCE = ", oldscore == score return (score, align_x, align_y) pro1 = LoadHelper.read_protein(LoadHelper.HUMAN_EYELESS_URL) pro2 = LoadHelper.read_protein(LoadHelper.FRUITFLY_EYELESS_URL) matr = LoadHelper.read_scoring_matrix(LoadHelper.PAM50_URL) alimat = PR.compute_alignment_matrix(pro1, pro2, matr, False) print compute_local_alignment(pro1, pro2, matr, alimat)
# humanEyelessProtein = "http://storage.googleapis.com/codeskulptor-alg/alg_HumanEyelessProtein.txt" # fruitflyEyelessProtein = "http://storage.googleapis.com/codeskulptor-alg/alg_FruitflyEyelessProtein.txt" # scoringMatrixPAM50 = "http://storage.googleapis.com/codeskulptor-alg/alg_PAM50.txt" pro1 = LoadHelper.read_protein(LoadHelper.HUMAN_EYELESS_URL) pro2 = LoadHelper.read_protein(LoadHelper.FRUITFLY_EYELESS_URL) matr = LoadHelper.read_scoring_matrix(LoadHelper.PAM50_URL) cpax = LoadHelper.read_protein(LoadHelper.CONSENSUS_PAX_URL) localHum = "HSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEK-QQ" localHum_dashless = "HSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEKQQ" localFly = "HSGVNQLGGVFVGGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATAEVVSKISQYKRECPSIFAWEIRDRLLQENVCTNDNIPSVSSINRVLRNLAAQKEQQ" alimat = PR.compute_alignment_matrix(localFly, cpax, matr, True) a1 = PR.compute_global_alignment(localFly, cpax, matr, alimat) print a1 # s1 = ''.join(random.choice("ACBEDGFIHKMLNQPSRTWVYXZ") for x in range(130)) # s2 = ''.join(random.choice("ACBEDGFIHKMLNQPSRTWVYXZ") for x in range(130)) # a1 = (12, s1, s2) cnt = 0 for ith, jth in zip(a1[1], a1[2]): if ith == jth: cnt += 1 print cnt / (len(a1[1]) * 1.0)
import Project as PR import string import LoadHelper str1 = "sadness" str2 = "straw" #str1 = "kitten" #str2 = "sitting" alphabet = list(string.ascii_lowercase) matrix = PR.build_scoring_matrix(alphabet, 2 ,1, 0) alig = PR.compute_alignment_matrix(str1, str2, matrix, True) res = PR.compute_global_alignment(str1, str2, matrix, alig) #print res #print "EDIT dist = ", len(str1)+len(str2)-res[0] word_list = LoadHelper.read_words(LoadHelper.WORD_LIST_URL) #print "WL = ", word_list def check_spelling(checked_word, dist, word_list): matrix = PR.build_scoring_matrix(alphabet, 2 ,1, 0) retlist = [] str1 = checked_word for ith in word_list: str2 = ith alig = PR.compute_alignment_matrix(str1, str2, matrix, True) res = PR.compute_global_alignment(str1, str2, matrix, alig) edit_dist = len(str1)+len(str2)-res[0] if edit_dist <= dist: retlist.append(str2)