Ejemplo n.º 1
0
def test_get_global_aln_results_1():
    """
    Test aligning identical sequences.
    """

    ref_seq = ''.join([random.choice('ACTG') for i in range(45000)])
    query_seq = ref_seq
    min_seq_len = 2000

    result = mod.get_global_aln_results(ref_seq, query_seq, min_seq_len)
    result = (result[0], round(result[1], 2), round(result[2], 2))
    expected = (0, 1.00, 1.00)  # delta_len, idt, cov

    assert (result == expected)
Ejemplo n.º 2
0
def test_get_global_aln_results_6():
    """
    The DW alignment has a 100bp minimum distance threshold for (e1 - s1) and (e2 - s2).
    However, Edlib does not have any such constraints.
    """

    ref_seq = ''.join([random.choice('ACTG') for i in range(90)])
    query_seq = ref_seq
    min_seq_len = 50

    result = mod.get_global_aln_results(ref_seq, query_seq, min_seq_len)
    result = (result[0], round(result[1], 2), round(result[2], 2))
    expected = (0, 1.00, 1.00)  # delta_len, idt, cov

    assert (result == expected)
Ejemplo n.º 3
0
def test_get_global_aln_results_4():
    """
    This tests alignment of very long sequences.
    Edlib is good with memory, and should align this easily.
    """

    ref_seq = ''.join([random.choice('ACTG') for i in range(300000)])
    query_seq = ref_seq
    min_seq_len = 2000

    result = mod.get_global_aln_results(ref_seq, query_seq, min_seq_len)
    result = (result[0], round(result[1], 2), round(result[2], 2))
    expected = (0, 1.00, 1.00)  # delta_len, idt, cov

    assert (result == expected)
Ejemplo n.º 4
0
def test_get_global_aln_results_3():
    """
    Test aligning non-identical sequences, where the query has an insertion
    compared to ref.
    """

    query_seq = ''.join([random.choice('ACTG') for i in range(45000)])
    ref_seq = query_seq[0:20000] + query_seq[25000:]
    min_seq_len = 2000

    result = mod.get_global_aln_results(ref_seq, query_seq, min_seq_len)
    result = (result[0], round(result[1], 2), round(result[2], 2))
    expected = (5000, 0.89, 1.0)  # delta_len, idt, cov

    assert (result == expected)
Ejemplo n.º 5
0
def test_get_global_aln_results_7():
    """
    Align two completely different sequences.
    The align_edlib.py module will return coverage of 1.00 always, since
    the global alignment is applied.
    """

    ref_seq = ''.join([random.choice('AC') for i in range(3000)])
    query_seq = ''.join([random.choice('GT') for i in range(3000)])
    min_seq_len = 2000

    result = mod.get_global_aln_results(ref_seq, query_seq, min_seq_len)
    result = (result[0], round(result[1], 2), round(result[2], 2))
    expected = (0, 0.00, 1.00)  # delta_len, idt, cov

    assert (result == expected)
Ejemplo n.º 6
0
def test_get_global_aln_results_5():
    """
    The legacy deduplication code has a threshold on minimum sequence
    length for alignment. If any of the sequences was shorter than this,
    the alignment wouldn't be performed, and the sequence would not be deduplicated
    even if it was a duplicate.
    """

    ref_seq = ''.join([random.choice('ACTG') for i in range(300)])
    query_seq = ref_seq
    min_seq_len = 2000

    result = mod.get_global_aln_results(ref_seq, query_seq, min_seq_len)
    result = (result[0], round(result[1], 2), round(result[2], 2))
    expected = (0, 0.00, 0.00)  # delta_len, idt, cov

    assert (result == expected)