Beispiel #1
0
def test_merge_pair(record1, record2, record4):
    """
    Assemble a compatible overlapping read pair.

    GCTGCACCGATGTACGCAAA
                  |||||                 -->   GCTGCACCGATGTACGCAAAGCTATTTAAAACC
                 ACGCAAAGCTATTTAAAACC
    """
    pair = OverlappingReadPair(tail=record1,
                               head=record2,
                               offset=13,
                               overlap=7,
                               sameorient=True,
                               swapped=False)
    assert merge_pair(pair) == 'GCTGCACCGATGTACGCAAAGCTATTTAAAACC'

    pair = OverlappingReadPair(tail=record1,
                               head=record4,
                               offset=13,
                               overlap=7,
                               sameorient=True,
                               swapped=False)
    with pytest.raises(AssertionError) as ae:
        contig = merge_pair(pair)
    assert 'attempted to assemble incompatible reads' in str(ae)
Beispiel #2
0
def test_merge_contained_with_offset_and_error(record7, record12):
    """
    Test merge with containment, offset, and a sequencing error.

    CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTATGTGAGGCGATAACTCAA
        |||||||||||||||||
         |||||||||||||||||
              |||||||||||||||||
                                          |||||||||||||||||
                                           |||||||||||||||||
                                            |||||||||||||||||
              CCCGGATACTTGAAGCAGGCAcC
              |||||||||||||||||    *
    """
    pair = calc_offset(record7, record12, 'CCCGGATACTTGAAGCA')
    assert pair == INCOMPATIBLE_PAIR

    pair = OverlappingReadPair(tail=record7,
                               head=record12,
                               offset=10,
                               overlap=23,
                               sameorient=True,
                               swapped=False)
    with pytest.raises(AssertionError) as ae:
        newrecord = merge_and_reannotate(pair, 'WontWork')
    assert 'attempted to assemble incompatible reads' in str(ae)
Beispiel #3
0
def test_merge_and_reannotate_contained(record7, record10):
    """
    Test merge/reannotation with containment.

    CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTATGTGAGGCGATAACTCAA
        |||||||||||||||||
         |||||||||||||||||
              |||||||||||||||||
                                          |||||||||||||||||
                                           |||||||||||||||||
                                            |||||||||||||||||
    CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCT
        |||||||||||||||||
         |||||||||||||||||
              |||||||||||||||||
    """
    pair = OverlappingReadPair(tail=record7,
                               head=record10,
                               offset=0,
                               overlap=35,
                               sameorient=True,
                               swapped=False)
    newrecord = merge_and_reannotate(pair, 'ContainedAtOne')
    assert newrecord.name == 'ContainedAtOne'
    assert newrecord.sequence == record7.sequence
    assert newrecord.ikmers == record7.ikmers
Beispiel #4
0
def test_merge_and_reannotate_edge_case_opposite_orientation(record7, record9):
    """
    Test merge/reannotation with edge cases to check for fencepost errors.

    CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTATGTGAGGCGATAACTCAA
        |||||||||||||||||
         |||||||||||||||||
              |||||||||||||||||
                                          |||||||||||||||||
                                           |||||||||||||||||
                                            |||||||||||||||||
               reverse complement  ----->  GTATGTGAGGCGATAACTCAAGACCACGGGAGCTCACTTCGTTGACGCGAGCGCCTTGCT
                                           |||||||||||||||||
                                            |||||||||||||||||
                                                                                     |||||||||||||||||
                                                                                      |||||||||||||||||
    """  # noqa
    pair = OverlappingReadPair(tail=record7,
                               head=record9,
                               offset=39,
                               overlap=21,
                               sameorient=False,
                               swapped=False)
    newrecord = merge_and_reannotate(pair, 'SoMeCoNtIg')
    assert newrecord.name == 'SoMeCoNtIg'
    assert newrecord.sequence == ('CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTAT'
                                  'GTGAGGCGATAACTCAAGACCACGGGAGCTCACTTCGTTGACG'
                                  'CGAGCGCCTTGCT')
    assert len(newrecord.ikmers) == 8

    testseqs = [
        'TCCCCACCCGGATACTT',
        'CCCCACCCGGATACTTG',
        'CCCGGATACTTGAAGCA',
        'GGTATGTGAGGCGATAA',
        'GTATGTGAGGCGATAAC',
        'TATGTGAGGCGATAACT',
    ]
    testoffsets = [4, 5, 10, 38, 39, 40, 81, 82]
    for kmer, seq, offset in zip(newrecord.ikmers, testseqs, testoffsets):
        assert kmer.sequence == seq
        assert kmer.offset == offset
Beispiel #5
0
def test_merge_and_reannotate_opposite_orientation(record1, record3):
    """
    Assemble a read pair and re-annotate the associated interesting k-mers.

    GCTGCACCGATGTACGCAAA
                  |||||                 -->   GCTGCACCGATGTACGCAAAGCTATTTAAAACC
                 ACGCAAAGCTATTTAAAACC                       *****        *****
    """
    pair = OverlappingReadPair(tail=record1,
                               head=record3,
                               offset=13,
                               overlap=7,
                               sameorient=False,
                               swapped=False)
    newrecord = merge_and_reannotate(pair, 'contig1')
    assert newrecord.name == 'contig1'
    assert newrecord.sequence == 'GCTGCACCGATGTACGCAAAGCTATTTAAAACC'
    assert len(newrecord.ikmers) == 2
    assert newrecord.ikmers[0].offset == 14
    assert newrecord.ikmers[1].offset == 27
Beispiel #6
0
def test_graph_init():
    """Test graph initialization."""
    instream = kevlar.open(data_file('var1.reads.augfastq'), 'r')
    graph = kevlar.ReadGraph()
    graph.load(kevlar.parse_augmented_fastx(instream))
    graph.populate_edges(strict=True)

    # 10 reads in the file, but read16f has no valid connections due to error
    assert len(graph.nodes()) == 10

    # The given read shares its interesting k-mer and has compatible overlaps
    # with 6 other reads (read13f and read15f have errors).
    r23name = 'read23f start=67,mutations=0'
    assert len(graph[r23name]) == 6

    # Test the values of one of the edges.
    r35name = 'read35f start=25,mutations=0'
    assert graph[r23name][r35name]['offset'] == 42
    assert graph[r23name][r35name]['overlap'] == 58

    # Should all be a single CC
    assert len(list(connected_components(graph))) == 2
    assert len([p for p in graph.partitions()]) == 1

    r8name = 'read8f start=8,mutations=0'
    r37name = 'read37f start=9,mutations=0'
    assert graph[r37name][r8name]['offset'] == 1
    assert graph[r37name][r8name]['overlap'] == 99
    pair = OverlappingReadPair(tail=graph.get_record(r8name),
                               head=graph.get_record(r37name),
                               offset=1,
                               overlap=99,
                               sameorient=True,
                               swapped=False)
    assert merge_pair(pair) == ('CACTGTCCTTACAGGTGGATAGTCGCTTTGTAATAAAAGAGTTAC'
                                'ACCCCGGTTTTTAGAAGTCTCGACTTTAAGGAAGTGGGCCTACGG'
                                'CGGAAGCCGTC')