def test_find_pattern_clumps_with_genome(): text = read_genome("genomics_algo/tests/test_data/e_coli.txt") patterns = find_pattern_clumps(text=text, substring_length=9, window_length=500, minimum_frequency=3) assert len(patterns) == 1904
def test_get_occurences_with_exact_match_with_reverse_complement( exact_matching_algo): text = "GACTACGGAGACT" pattern = "ACT" result = get_occurences_with_exact_match_with_reverse_complement( pattern, text, exact_matching_algo) assert result == [1, 10] text = "AAAAAAAAAACCCAAAAAAAAAAGGGAAAAAAAAAA" pattern = "CCC" result = get_occurences_with_exact_match_with_reverse_complement( pattern, text, exact_matching_algo) assert result == [10, 23] text = "AAAAAAAAAACGCGAAAAAAAAAACGCGAAAAAAAAAA" pattern = "CGCG" result = get_occurences_with_exact_match_with_reverse_complement( pattern, text, exact_matching_algo) assert result == [10, 24] text = read_genome("genomics_algo/tests/test_data/genomes/phix.fa") pattern = "ATTA" result = get_occurences_with_exact_match_with_reverse_complement( pattern, text, exact_matching_algo) assert min(result) == 62 assert len(result) == 60
def test_get_occurences_in_entire_genome_with_boyer_moores_exact_match( exact_matching_algo, ): text = read_genome( "genomics_algo/tests/test_data/genomes/vibrio_cholerae.txt") pattern = "ATGATCAAG" result = exact_matching_algo(pattern, text) assert result == [ 116556, 149355, 151913, 152013, 152394, 186189, 194276, 200076, 224527, 307692, 479770, 610980, 653338, 679985, 768828, 878903, 985368, ]
def test_find_minimum_gc_skew_location_in_genome(): genome = read_genome("genomics_algo/tests/test_data/e_coli.txt") result = find_minimum_gc_skew_location(genome) np.testing.assert_array_equal([3923619, 3923620, 3923621, 3923622], result)
def test_read_genome(): genome = read_genome( "genomics_algo/tests/test_data/genomes/lambda_virus.fa") assert len(genome) == 48502 assert genome[:50] == "GGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAA"