Beispiel #1
0
def test_longest_overlap():
    # This should return the longest possible overlap =
    # shortest possible subsequence between sequences.
    longest_overlap1 = {"one": "ACCCC", "two": "CCCCG"}
    assert find_overlaps(longest_overlap1) == "ACCCCG"

    longest_overlap2 = {"three": "GGGGT", "four": "AGGGG"}
    assert find_overlaps(longest_overlap2) == "AGGGGT"
def test_rosalind_exercise():
    # Make sure that the script produces the right answer as judged by Rosalind
    test_file = "data/rosalind_long5.txt"
    answer_file = "results/long5.txt"

    with open(answer_file, "r") as read_file:
        answer = read_file.readline().strip()

    sequence_dictionary = read_sequences(test_file)
    assert find_overlaps(sequence_dictionary) == answer
Beispiel #3
0
def test_simple_sequences_with_duplicates():
    # This should return a single instance of the longest supersequence
    simple_with_duplicates = {
        "one": "AC",
        "two": "CG",
        "three": "GT",
        "one-dup": "AC",
        "two-dup": "CG",
        "three-dup": "GT",
    }
    assert find_overlaps(simple_with_duplicates) == "ACGT"
Beispiel #4
0
def test_two_simple_sequences():
    two_simple_sequences = {"one": "AC", "two": "CG"}
    assert find_overlaps(two_simple_sequences) == "ACG"
Beispiel #5
0
def test_simple_deduplication():
    # This should return on of the two identical sequences
    simple_duplicates = {"one": "A", "two": "A"}
    assert find_overlaps(simple_duplicates) == "A"
Beispiel #6
0
def test_single_sequence():
    # This should return the sequence itself
    assert find_overlaps({"single_sequence": "ACGT"}) == "ACGT"
Beispiel #7
0
def test_find_overlaps():
    # Make sure the sample dataset works as intended
    assert find_overlaps(sample_dictionary) == "ATTAGACCTGCCGGAATAC"