def question_2(): alignment_matrix = student.compute_alignment_matrix( human, fly, scoring_matrix, False) local_score, local_human, local_fly = student.compute_local_alignment( human, fly, scoring_matrix, alignment_matrix) local_human_stripped = local_human.replace('-', '') local_fly_stripped = local_fly.replace('-', '') alignment_matrix_h = student.compute_alignment_matrix( local_human_stripped, consensus, scoring_matrix, True) alignment_matrix_f = student.compute_alignment_matrix( local_fly_stripped, consensus, scoring_matrix, True) global_score_h, global_human, global_cons_h = student.compute_global_alignment( local_human_stripped, consensus, scoring_matrix, alignment_matrix_h) global_score_f, global_fly, global_cons_f = student.compute_global_alignment( local_fly_stripped, consensus, scoring_matrix, alignment_matrix_f) print "\nHuman and fly against consensus scores after GLOBAL alignment:", global_score_h, global_score_f print "Human and fly sequences after GLOBAL alignment:" print global_human print global_cons_h print global_fly print global_cons_f print "Human/consensus alignment match: ", "{0:.1%}".format( check_sequence(global_human, global_cons_h)) print "Fly/consensus alignment match: ", "{0:.1%}".format( check_sequence(global_fly, global_cons_f))
def run_suite(): """ Some informal testing code """ def print_matrix(matrix): for row in matrix: print(row) suite = poc_simpletest.TestSuite() # create a TestSuite object print("\nSTARTING TESTS:") # 1. check the basic MATRIX methods directly suite.run_test(pro.build_scoring_matrix({"A", "B", "C"}, 10, 4, -4)['A'], {'A': 10, 'C': 4, 'B': 4, '-': -4}, "Test #1a: 'build_scoring_matrix' method") # due to technical reasons we only check the first key 'A" suite.run_test(pro.build_scoring_matrix({'A', 'C', 'T', 'G'}, 6, 2, -4)['-'], {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, "Test #1b: 'build_scoring_matrix' method") sc_matrix = pro.build_scoring_matrix({'A', 'C', 'T', 'G'}, 10, 2, -4) al_matrix = pro.compute_alignment_matrix("ACC", "TTTACACGG", sc_matrix, True) suite.run_test(al_matrix[-1], [-12, -6, 0, 6, 2, 6, 10, 14, 10, 6], "Test #1c: 'compute_alignment_matrix' method") # we only need to check the last row # 2. check the basic ALIGNMENT methods directly suite.run_test(pro.compute_global_alignment("ACC", "TTTACACGG", sc_matrix, al_matrix), (6, '---AC-C--', 'TTTACACGG'), "Test #2a: 'compute_global_alignment' method") # testing the example 1c above sc_matrix = {'A': {'A': 2, 'C': 1, '-': 0, 'T': 1, 'G': 1}, 'C': {'A': 1, 'C': 2, '-': 0, 'T': 1, 'G': 1}, '-': {'A': 0, 'C': 0, '-': 0, 'T': 0, 'G': 0}, 'T': {'A': 1, 'C': 1, '-': 0, 'T': 2, 'G': 1}, 'G': {'A': 1, 'C': 1, '-': 0, 'T': 1, 'G': 2}} al_matrix = [[0, 0, 0, 0, 0, 0], [0, 2, 2, 2, 2, 2], [0, 2, 3, 4, 4, 4], [0, 2, 3, 4, 6, 6], [0, 2, 3, 4, 6, 8], [0, 2, 3, 5, 6, 8], [0, 2, 3, 5, 7, 8]] suite.run_test(pro.compute_global_alignment("ACTACT", "AGCTA", sc_matrix, al_matrix),(8, 'A-CTACT', 'AGCTA--'), "Test #2b: 'compute_global_alignment' method") sc_matrix = pro.build_scoring_matrix({"A", "C", "G", "T"}, 10, 2, -4) al_matrix = pro.compute_alignment_matrix("ACC", "TTTACACGG", sc_matrix, False) suite.run_test(pro.compute_local_alignment("ACC", "TTTACACGG", sc_matrix, al_matrix), (26, 'AC-C', 'ACAC'), "Test #2c: 'compute_local_alignment' method") # 3. report number of tests and failures suite.report_results()
def question_1(): alignment_matrix = student.compute_alignment_matrix( human, fly, scoring_matrix, False) local_score, local_human, local_fly = student.compute_local_alignment( human, fly, scoring_matrix, alignment_matrix) print "Human and fly score after LOCAL alignment:", local_score print "Human and fly sequences after LOCAL alignment:" print local_human print local_fly
def calculate_distance(w1, w2): scoring_matrix = student.build_scoring_matrix( set(w1) | set(w2), 2, 1, 0 ) # the dash score has to be zero, so there is only a case of manipulating diag and off alignment_matrix = student.compute_alignment_matrix( w1, w2, scoring_matrix, True) score, x, y = student.compute_global_alignment(w1, w2, scoring_matrix, alignment_matrix) # print "Lengths:", len(w1) + len(w2), ", Score:", score, ", Words:", x, y return len(w1) + len(w2) - score
def check_spelling(checked_word, dist, word_list): scoring_matrix = student.build_scoring_matrix( set(checked_word) | set(chain.from_iterable(word_list)), 2, 1, 0) spelling = [] for word in word_list: alignment_matrix = student.compute_alignment_matrix( checked_word, word, scoring_matrix, True) score, dummy_x, dummy_y = student.compute_global_alignment( checked_word, word, scoring_matrix, alignment_matrix) distance = len(checked_word) + len(word) - score if distance <= dist: spelling.append(word) return spelling
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): scoring_distribution = {} for trial in range(num_trials): rand_y = list(seq_y) random.shuffle(rand_y) alignment_matrix = student.compute_alignment_matrix( seq_x, rand_y, scoring_matrix, False ) # here we mix the real data with a randomly shuffled sequence x local_score, local_human, local_fly = student.compute_local_alignment( seq_x, rand_y, scoring_matrix, alignment_matrix) if local_score not in scoring_distribution: scoring_distribution[local_score] = 1 else: scoring_distribution[local_score] += 1 return scoring_distribution
def question_5(): # distribution = generate_null_distribution(human, fly, scoring_matrix, 1000) # this takes too long to compute, so we use a copy below distribution = { 39: 4, 40: 5, 41: 15, 42: 24, 43: 28, 44: 39, 45: 52, 46: 70, 47: 71, 48: 65, 49: 74, 50: 77, 51: 67, 52: 63, 53: 59, 54: 32, 55: 41, 56: 35, 57: 29, 58: 20, 59: 21, 60: 22, 61: 12, 62: 12, 63: 11, 64: 5, 65: 7, 66: 6, 67: 4, 68: 11, 69: 5, 70: 3, 71: 3, 72: 1, 74: 1, 75: 2, 77: 1, 78: 1, 80: 2 } scores = list( chain.from_iterable([x] * distribution[x] for x in distribution) ) # this creates an appropriate number of scores acording to the 'distribution' dictionary alignment_matrix = student.compute_alignment_matrix( human, fly, scoring_matrix, False ) # the actual local score between human and fly is needed for the z-value local_score, local_human, local_fly = student.compute_local_alignment( human, fly, scoring_matrix, alignment_matrix) mean = sum(scores) / float(len(scores)) stdev = math.sqrt( sum((score - mean)**2 for score in scores) / float(len(scores))) # Small z-scores indicate a greater likelihood that the local alignment score was due to chance while larger scores indicate a lower likelihood that the local alignment score was due to chance. z = local_score - mean # the nominator itself is very big, which excludes the randomness z_value = ( local_score - mean ) / stdev # here we divide it by the standard deviation to see how many standard deviations the test result is off by print "the mean value is :", mean print "the standard deviation is :", stdev print "the z-value is :", z_value # The z-score helps quantify the likelihood of the score 's' being a product of chance. print "the match of the random data is off (3 x stdv) by approx. :", int( z / (3 * stdev)), "times"