Example #1
0
def test_find_pattern_clumps_with_genome():
    text = read_genome("genomics_algo/tests/test_data/e_coli.txt")
    patterns = find_pattern_clumps(text=text,
                                   substring_length=9,
                                   window_length=500,
                                   minimum_frequency=3)
    assert len(patterns) == 1904
def test_get_occurences_with_exact_match_with_reverse_complement(
        exact_matching_algo):
    text = "GACTACGGAGACT"
    pattern = "ACT"
    result = get_occurences_with_exact_match_with_reverse_complement(
        pattern, text, exact_matching_algo)
    assert result == [1, 10]

    text = "AAAAAAAAAACCCAAAAAAAAAAGGGAAAAAAAAAA"
    pattern = "CCC"
    result = get_occurences_with_exact_match_with_reverse_complement(
        pattern, text, exact_matching_algo)
    assert result == [10, 23]

    text = "AAAAAAAAAACGCGAAAAAAAAAACGCGAAAAAAAAAA"
    pattern = "CGCG"
    result = get_occurences_with_exact_match_with_reverse_complement(
        pattern, text, exact_matching_algo)
    assert result == [10, 24]

    text = read_genome("genomics_algo/tests/test_data/genomes/phix.fa")
    pattern = "ATTA"
    result = get_occurences_with_exact_match_with_reverse_complement(
        pattern, text, exact_matching_algo)
    assert min(result) == 62
    assert len(result) == 60
def test_get_occurences_in_entire_genome_with_boyer_moores_exact_match(
    exact_matching_algo, ):
    text = read_genome(
        "genomics_algo/tests/test_data/genomes/vibrio_cholerae.txt")
    pattern = "ATGATCAAG"
    result = exact_matching_algo(pattern, text)
    assert result == [
        116556,
        149355,
        151913,
        152013,
        152394,
        186189,
        194276,
        200076,
        224527,
        307692,
        479770,
        610980,
        653338,
        679985,
        768828,
        878903,
        985368,
    ]
Example #4
0
def test_find_minimum_gc_skew_location_in_genome():
    genome = read_genome("genomics_algo/tests/test_data/e_coli.txt")
    result = find_minimum_gc_skew_location(genome)
    np.testing.assert_array_equal([3923619, 3923620, 3923621, 3923622], result)
def test_read_genome():
    genome = read_genome(
        "genomics_algo/tests/test_data/genomes/lambda_virus.fa")
    assert len(genome) == 48502
    assert genome[:50] == "GGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAA"