def test_merge_pair(record1, record2, record4): """ Assemble a compatible overlapping read pair. GCTGCACCGATGTACGCAAA ||||| --> GCTGCACCGATGTACGCAAAGCTATTTAAAACC ACGCAAAGCTATTTAAAACC """ pair = OverlappingReadPair(tail=record1, head=record2, offset=13, overlap=7, sameorient=True, swapped=False) assert merge_pair(pair) == 'GCTGCACCGATGTACGCAAAGCTATTTAAAACC' pair = OverlappingReadPair(tail=record1, head=record4, offset=13, overlap=7, sameorient=True, swapped=False) with pytest.raises(AssertionError) as ae: contig = merge_pair(pair) assert 'attempted to assemble incompatible reads' in str(ae)
def test_merge_contained_with_offset_and_error(record7, record12): """ Test merge with containment, offset, and a sequencing error. CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTATGTGAGGCGATAACTCAA ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| CCCGGATACTTGAAGCAGGCAcC ||||||||||||||||| * """ pair = calc_offset(record7, record12, 'CCCGGATACTTGAAGCA') assert pair == INCOMPATIBLE_PAIR pair = OverlappingReadPair(tail=record7, head=record12, offset=10, overlap=23, sameorient=True, swapped=False) with pytest.raises(AssertionError) as ae: newrecord = merge_and_reannotate(pair, 'WontWork') assert 'attempted to assemble incompatible reads' in str(ae)
def test_merge_and_reannotate_contained(record7, record10): """ Test merge/reannotation with containment. CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTATGTGAGGCGATAACTCAA ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCT ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| """ pair = OverlappingReadPair(tail=record7, head=record10, offset=0, overlap=35, sameorient=True, swapped=False) newrecord = merge_and_reannotate(pair, 'ContainedAtOne') assert newrecord.name == 'ContainedAtOne' assert newrecord.sequence == record7.sequence assert newrecord.ikmers == record7.ikmers
def test_merge_and_reannotate_edge_case_opposite_orientation(record7, record9): """ Test merge/reannotation with edge cases to check for fencepost errors. CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTATGTGAGGCGATAACTCAA ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| reverse complement -----> GTATGTGAGGCGATAACTCAAGACCACGGGAGCTCACTTCGTTGACGCGAGCGCCTTGCT ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| """ # noqa pair = OverlappingReadPair(tail=record7, head=record9, offset=39, overlap=21, sameorient=False, swapped=False) newrecord = merge_and_reannotate(pair, 'SoMeCoNtIg') assert newrecord.name == 'SoMeCoNtIg' assert newrecord.sequence == ('CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTAT' 'GTGAGGCGATAACTCAAGACCACGGGAGCTCACTTCGTTGACG' 'CGAGCGCCTTGCT') assert len(newrecord.ikmers) == 8 testseqs = [ 'TCCCCACCCGGATACTT', 'CCCCACCCGGATACTTG', 'CCCGGATACTTGAAGCA', 'GGTATGTGAGGCGATAA', 'GTATGTGAGGCGATAAC', 'TATGTGAGGCGATAACT', ] testoffsets = [4, 5, 10, 38, 39, 40, 81, 82] for kmer, seq, offset in zip(newrecord.ikmers, testseqs, testoffsets): assert kmer.sequence == seq assert kmer.offset == offset
def test_merge_and_reannotate_opposite_orientation(record1, record3): """ Assemble a read pair and re-annotate the associated interesting k-mers. GCTGCACCGATGTACGCAAA ||||| --> GCTGCACCGATGTACGCAAAGCTATTTAAAACC ACGCAAAGCTATTTAAAACC ***** ***** """ pair = OverlappingReadPair(tail=record1, head=record3, offset=13, overlap=7, sameorient=False, swapped=False) newrecord = merge_and_reannotate(pair, 'contig1') assert newrecord.name == 'contig1' assert newrecord.sequence == 'GCTGCACCGATGTACGCAAAGCTATTTAAAACC' assert len(newrecord.ikmers) == 2 assert newrecord.ikmers[0].offset == 14 assert newrecord.ikmers[1].offset == 27
def test_graph_init(): """Test graph initialization.""" instream = kevlar.open(data_file('var1.reads.augfastq'), 'r') graph = kevlar.ReadGraph() graph.load(kevlar.parse_augmented_fastx(instream)) graph.populate_edges(strict=True) # 10 reads in the file, but read16f has no valid connections due to error assert len(graph.nodes()) == 10 # The given read shares its interesting k-mer and has compatible overlaps # with 6 other reads (read13f and read15f have errors). r23name = 'read23f start=67,mutations=0' assert len(graph[r23name]) == 6 # Test the values of one of the edges. r35name = 'read35f start=25,mutations=0' assert graph[r23name][r35name]['offset'] == 42 assert graph[r23name][r35name]['overlap'] == 58 # Should all be a single CC assert len(list(connected_components(graph))) == 2 assert len([p for p in graph.partitions()]) == 1 r8name = 'read8f start=8,mutations=0' r37name = 'read37f start=9,mutations=0' assert graph[r37name][r8name]['offset'] == 1 assert graph[r37name][r8name]['overlap'] == 99 pair = OverlappingReadPair(tail=graph.get_record(r8name), head=graph.get_record(r37name), offset=1, overlap=99, sameorient=True, swapped=False) assert merge_pair(pair) == ('CACTGTCCTTACAGGTGGATAGTCGCTTTGTAATAAAAGAGTTAC' 'ACCCCGGTTTTTAGAAGTCTCGACTTTAAGGAAGTGGGCCTACGG' 'CGGAAGCCGTC')