Beispiel #1
0
def test_merge_pair(record1, record2, record4):
    """
    Assemble a compatible overlapping read pair.

    GCTGCACCGATGTACGCAAA
                  |||||                 -->   GCTGCACCGATGTACGCAAAGCTATTTAAAACC
                 ACGCAAAGCTATTTAAAACC
    """
    pair = OverlappingReadPair(tail=record1,
                               head=record2,
                               offset=13,
                               overlap=7,
                               sameorient=True,
                               swapped=False)
    assert merge_pair(pair) == 'GCTGCACCGATGTACGCAAAGCTATTTAAAACC'

    pair = OverlappingReadPair(tail=record1,
                               head=record4,
                               offset=13,
                               overlap=7,
                               sameorient=True,
                               swapped=False)
    with pytest.raises(AssertionError) as ae:
        contig = merge_pair(pair)
    assert 'attempted to assemble incompatible reads' in str(ae)
Beispiel #2
0
def test_assembly_round2():
    instream = kevlar.open(data_file('var1.round2.augfastq'), 'r')
    graph = kevlar.ReadGraph()
    graph.load(kevlar.parse_augmented_fastx(instream))
    contig = graph.get_record('contig1')
    read = graph.get_record('read22f start=5,mutations=0')
    pair = calc_offset(contig, read, 'AAGTCTCGACTTTAAGGAAGTGGGCCTAC')
    assert pair.tail == read
    assert pair.head == contig
    assert merge_pair(pair) == ('TATCACTGTCCTTACAGGTGGATAGTCGCTTTGTAATAAAAGAGT'
                                'TACACCCCGGTTTTTAGAAGTCTCGACTTTAAGGAAGTGGGCCTA'
                                'CGGCGGAAGCCGTC')
Beispiel #3
0
def test_graph_init():
    """Test graph initialization."""
    instream = kevlar.open(data_file('var1.reads.augfastq'), 'r')
    graph = kevlar.ReadGraph()
    graph.load(kevlar.parse_augmented_fastx(instream))
    graph.populate_edges(strict=True)

    # 10 reads in the file, but read16f has no valid connections due to error
    assert len(graph.nodes()) == 10

    # The given read shares its interesting k-mer and has compatible overlaps
    # with 6 other reads (read13f and read15f have errors).
    r23name = 'read23f start=67,mutations=0'
    assert len(graph[r23name]) == 6

    # Test the values of one of the edges.
    r35name = 'read35f start=25,mutations=0'
    assert graph[r23name][r35name]['offset'] == 42
    assert graph[r23name][r35name]['overlap'] == 58

    # Should all be a single CC
    assert len(list(connected_components(graph))) == 2
    assert len([p for p in graph.partitions()]) == 1

    r8name = 'read8f start=8,mutations=0'
    r37name = 'read37f start=9,mutations=0'
    assert graph[r37name][r8name]['offset'] == 1
    assert graph[r37name][r8name]['overlap'] == 99
    pair = OverlappingReadPair(tail=graph.get_record(r8name),
                               head=graph.get_record(r37name),
                               offset=1,
                               overlap=99,
                               sameorient=True,
                               swapped=False)
    assert merge_pair(pair) == ('CACTGTCCTTACAGGTGGATAGTCGCTTTGTAATAAAAGAGTTAC'
                                'ACCCCGGTTTTTAGAAGTCTCGACTTTAAGGAAGTGGGCCTACGG'
                                'CGGAAGCCGTC')