def test_merge_contained_with_offset_and_error(record7, record12): """ Test merge with containment, offset, and a sequencing error. CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTATGTGAGGCGATAACTCAA ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| CCCGGATACTTGAAGCAGGCAcC ||||||||||||||||| * """ pair = calc_offset(record7, record12, 'CCCGGATACTTGAAGCA') assert pair == INCOMPATIBLE_PAIR pair = OverlappingReadPair(tail=record7, head=record12, offset=10, overlap=23, sameorient=True, swapped=False) with pytest.raises(AssertionError) as ae: newrecord = merge_and_reannotate(pair, 'WontWork') assert 'attempted to assemble incompatible reads' in str(ae)
def test_merge_and_reannotate_contained(record7, record10): """ Test merge/reannotation with containment. CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTATGTGAGGCGATAACTCAA ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCT ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| """ pair = OverlappingReadPair(tail=record7, head=record10, offset=0, overlap=35, sameorient=True, swapped=False) newrecord = merge_and_reannotate(pair, 'ContainedAtOne') assert newrecord.name == 'ContainedAtOne' assert newrecord.sequence == record7.sequence assert newrecord.ikmers == record7.ikmers
def test_assembly_contigs(): instream = kevlar.open(data_file('AluContigs.augfastq'), 'r') graph = kevlar.ReadGraph() graph.load(kevlar.parse_augmented_fastx(instream)) contig6 = graph.get_record('contig6') contig7 = graph.get_record('contig7') pair = calc_offset(contig6, contig7, 'AAAGTTTTCTTAAAAACATATATGGCCGGGC') assert pair.offset == 50 assert pair.overlap == 85 assert pair.tail == contig6 newrecord = merge_and_reannotate(pair, 'newcontig') assert newrecord.sequence == ('TTGCCCAGGCTGGTCTCAAACTCCTGAGCTCAAAGCGATCTGT' 'CGGCCTGGGCATCCAAAAAAAGTTTTCTTAAAAACATATATGG' 'CCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAG' 'GCCGAGGCGGGCGGATCACGAGGTCAGGAGATCGAGACCATCC' 'TGGCTAACACG')
def test_merge_and_reannotate_edge_case_opposite_orientation(record7, record9): """ Test merge/reannotation with edge cases to check for fencepost errors. CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTATGTGAGGCGATAACTCAA ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| reverse complement -----> GTATGTGAGGCGATAACTCAAGACCACGGGAGCTCACTTCGTTGACGCGAGCGCCTTGCT ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| ||||||||||||||||| """ # noqa pair = OverlappingReadPair(tail=record7, head=record9, offset=39, overlap=21, sameorient=False, swapped=False) newrecord = merge_and_reannotate(pair, 'SoMeCoNtIg') assert newrecord.name == 'SoMeCoNtIg' assert newrecord.sequence == ('CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTAT' 'GTGAGGCGATAACTCAAGACCACGGGAGCTCACTTCGTTGACG' 'CGAGCGCCTTGCT') assert len(newrecord.ikmers) == 8 testseqs = [ 'TCCCCACCCGGATACTT', 'CCCCACCCGGATACTTG', 'CCCGGATACTTGAAGCA', 'GGTATGTGAGGCGATAA', 'GTATGTGAGGCGATAAC', 'TATGTGAGGCGATAACT', ] testoffsets = [4, 5, 10, 38, 39, 40, 81, 82] for kmer, seq, offset in zip(newrecord.ikmers, testseqs, testoffsets): assert kmer.sequence == seq assert kmer.offset == offset
def test_merge_and_reannotate_opposite_orientation(record1, record3): """ Assemble a read pair and re-annotate the associated interesting k-mers. GCTGCACCGATGTACGCAAA ||||| --> GCTGCACCGATGTACGCAAAGCTATTTAAAACC ACGCAAAGCTATTTAAAACC ***** ***** """ pair = OverlappingReadPair(tail=record1, head=record3, offset=13, overlap=7, sameorient=False, swapped=False) newrecord = merge_and_reannotate(pair, 'contig1') assert newrecord.name == 'contig1' assert newrecord.sequence == 'GCTGCACCGATGTACGCAAAGCTATTTAAAACC' assert len(newrecord.ikmers) == 2 assert newrecord.ikmers[0].offset == 14 assert newrecord.ikmers[1].offset == 27