예제 #1
0
def question_2():
    alignment_matrix = student.compute_alignment_matrix(
        human, fly, scoring_matrix, False)
    local_score, local_human, local_fly = student.compute_local_alignment(
        human, fly, scoring_matrix, alignment_matrix)

    local_human_stripped = local_human.replace('-', '')
    local_fly_stripped = local_fly.replace('-', '')
    alignment_matrix_h = student.compute_alignment_matrix(
        local_human_stripped, consensus, scoring_matrix, True)
    alignment_matrix_f = student.compute_alignment_matrix(
        local_fly_stripped, consensus, scoring_matrix, True)

    global_score_h, global_human, global_cons_h = student.compute_global_alignment(
        local_human_stripped, consensus, scoring_matrix, alignment_matrix_h)
    global_score_f, global_fly, global_cons_f = student.compute_global_alignment(
        local_fly_stripped, consensus, scoring_matrix, alignment_matrix_f)
    print "\nHuman and fly against consensus scores after GLOBAL alignment:", global_score_h, global_score_f
    print "Human and fly sequences after GLOBAL alignment:"
    print global_human
    print global_cons_h
    print global_fly
    print global_cons_f
    print "Human/consensus alignment match: ", "{0:.1%}".format(
        check_sequence(global_human, global_cons_h))
    print "Fly/consensus alignment match: ", "{0:.1%}".format(
        check_sequence(global_fly, global_cons_f))
예제 #2
0
def run_suite():
    """ Some informal testing code """
	
    def print_matrix(matrix):
        for row in matrix:
            print(row)
    suite = poc_simpletest.TestSuite()  # create a TestSuite object
    print("\nSTARTING TESTS:")


    # 1. check the basic MATRIX methods directly
    suite.run_test(pro.build_scoring_matrix({"A", "B", "C"}, 10, 4, -4)['A'], {'A': 10, 'C': 4, 'B': 4, '-': -4}, "Test #1a: 'build_scoring_matrix' method")  # due to technical reasons we only check the first key 'A"
    suite.run_test(pro.build_scoring_matrix({'A', 'C', 'T', 'G'}, 6, 2, -4)['-'], {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, "Test #1b: 'build_scoring_matrix' method")

    sc_matrix = pro.build_scoring_matrix({'A', 'C', 'T', 'G'}, 10, 2, -4)
    al_matrix = pro.compute_alignment_matrix("ACC", "TTTACACGG", sc_matrix, True)
    suite.run_test(al_matrix[-1], [-12, -6, 0, 6, 2, 6, 10, 14, 10, 6], "Test #1c: 'compute_alignment_matrix' method")  # we only need to check the last row


    # 2. check the basic ALIGNMENT methods directly
    suite.run_test(pro.compute_global_alignment("ACC", "TTTACACGG", sc_matrix, al_matrix), (6, '---AC-C--', 'TTTACACGG'), "Test #2a: 'compute_global_alignment' method")  # testing the example 1c above

    sc_matrix = {'A': {'A': 2, 'C': 1, '-': 0, 'T': 1, 'G': 1}, 'C': {'A': 1, 'C': 2, '-': 0, 'T': 1, 'G': 1}, '-': {'A': 0, 'C': 0, '-': 0, 'T': 0, 'G': 0}, 'T': {'A': 1, 'C': 1, '-': 0, 'T': 2, 'G': 1}, 'G': {'A': 1, 'C': 1, '-': 0, 'T': 1, 'G': 2}}
    al_matrix = [[0, 0, 0, 0, 0, 0], [0, 2, 2, 2, 2, 2], [0, 2, 3, 4, 4, 4], [0, 2, 3, 4, 6, 6], [0, 2, 3, 4, 6, 8], [0, 2, 3, 5, 6, 8], [0, 2, 3, 5, 7, 8]]
    suite.run_test(pro.compute_global_alignment("ACTACT", "AGCTA", sc_matrix, al_matrix),(8, 'A-CTACT', 'AGCTA--'), "Test #2b: 'compute_global_alignment' method")

    sc_matrix = pro.build_scoring_matrix({"A", "C", "G", "T"}, 10, 2, -4)
    al_matrix = pro.compute_alignment_matrix("ACC", "TTTACACGG", sc_matrix, False)
    suite.run_test(pro.compute_local_alignment("ACC", "TTTACACGG", sc_matrix, al_matrix), (26, 'AC-C', 'ACAC'), "Test #2c: 'compute_local_alignment' method")


    # 3. report number of tests and failures
    suite.report_results()
예제 #3
0
def question_1():
    alignment_matrix = student.compute_alignment_matrix(
        human, fly, scoring_matrix, False)
    local_score, local_human, local_fly = student.compute_local_alignment(
        human, fly, scoring_matrix, alignment_matrix)
    print "Human and fly score after LOCAL alignment:", local_score
    print "Human and fly sequences after LOCAL alignment:"
    print local_human
    print local_fly
예제 #4
0
 def calculate_distance(w1, w2):
     scoring_matrix = student.build_scoring_matrix(
         set(w1) | set(w2), 2, 1, 0
     )  # the dash score has to be zero, so there is only a case of manipulating diag and off
     alignment_matrix = student.compute_alignment_matrix(
         w1, w2, scoring_matrix, True)
     score, x, y = student.compute_global_alignment(w1, w2, scoring_matrix,
                                                    alignment_matrix)
     # print "Lengths:", len(w1) + len(w2), ", Score:", score, ", Words:", x, y
     return len(w1) + len(w2) - score
예제 #5
0
    def check_spelling(checked_word, dist, word_list):
        scoring_matrix = student.build_scoring_matrix(
            set(checked_word) | set(chain.from_iterable(word_list)), 2, 1, 0)
        spelling = []

        for word in word_list:
            alignment_matrix = student.compute_alignment_matrix(
                checked_word, word, scoring_matrix, True)
            score, dummy_x, dummy_y = student.compute_global_alignment(
                checked_word, word, scoring_matrix, alignment_matrix)
            distance = len(checked_word) + len(word) - score
            if distance <= dist:
                spelling.append(word)
        return spelling
예제 #6
0
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    scoring_distribution = {}
    for trial in range(num_trials):
        rand_y = list(seq_y)
        random.shuffle(rand_y)
        alignment_matrix = student.compute_alignment_matrix(
            seq_x, rand_y, scoring_matrix, False
        )  # here we mix the real data with a randomly shuffled sequence x
        local_score, local_human, local_fly = student.compute_local_alignment(
            seq_x, rand_y, scoring_matrix, alignment_matrix)
        if local_score not in scoring_distribution:
            scoring_distribution[local_score] = 1
        else:
            scoring_distribution[local_score] += 1

    return scoring_distribution
예제 #7
0
def question_5():
    # distribution = generate_null_distribution(human, fly, scoring_matrix, 1000)  # this takes too long to compute, so we use a copy below
    distribution = {
        39: 4,
        40: 5,
        41: 15,
        42: 24,
        43: 28,
        44: 39,
        45: 52,
        46: 70,
        47: 71,
        48: 65,
        49: 74,
        50: 77,
        51: 67,
        52: 63,
        53: 59,
        54: 32,
        55: 41,
        56: 35,
        57: 29,
        58: 20,
        59: 21,
        60: 22,
        61: 12,
        62: 12,
        63: 11,
        64: 5,
        65: 7,
        66: 6,
        67: 4,
        68: 11,
        69: 5,
        70: 3,
        71: 3,
        72: 1,
        74: 1,
        75: 2,
        77: 1,
        78: 1,
        80: 2
    }
    scores = list(
        chain.from_iterable([x] * distribution[x] for x in distribution)
    )  # this creates an appropriate number of scores acording to the 'distribution' dictionary
    alignment_matrix = student.compute_alignment_matrix(
        human, fly, scoring_matrix, False
    )  # the actual local score between human and fly is needed for the z-value
    local_score, local_human, local_fly = student.compute_local_alignment(
        human, fly, scoring_matrix, alignment_matrix)

    mean = sum(scores) / float(len(scores))
    stdev = math.sqrt(
        sum((score - mean)**2 for score in scores) / float(len(scores)))
    # Small z-scores indicate a greater likelihood that the local alignment score was due to chance while larger scores indicate a lower likelihood that the local alignment score was due to chance.
    z = local_score - mean  # the nominator itself is very big, which excludes the randomness
    z_value = (
        local_score - mean
    ) / stdev  # here we divide it by the standard deviation to see how many standard deviations the test result is off by
    print "the mean value is :", mean
    print "the standard deviation is :", stdev
    print "the z-value is :", z_value  # The z-score helps quantify the likelihood of the score 's' being a product of chance.
    print "the match of the random data is off (3 x stdv) by approx. :", int(
        z / (3 * stdev)), "times"