def setUp(self): self.contig = Contig(' ' * 60, None) self.contig.add_mapped_sequence( MockObject(reference_start=0, reference_end=10)) self.contig.add_mapped_sequence( MockObject(reference_start=0, reference_end=20)) self.contig.add_mapped_sequence( MockObject(reference_start=50, reference_end=60))
def test_drop_similar_different_lengths(self): c1 = Contig( 'atcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatcgatcgatatgggcatcagc', 2) c2 = Contig( 'atcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatcgatcgatagggcatcagc', 1) result = filter_contigs([c2, c1], 0.10) self.assertEqual(1, len(result)) self.assertEqual(c1.seq, result[0].seq)
def test_drop_similar_different_lengths(self): c1 = Contig( 'atcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatcgatcgatatgggcatcagc', 2, ) c2 = Contig( 'atcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatcgatcgatagggcatcagc', 1, ) result = filter_contigs([c2, c1], 0.10) assert len(result) == 1 assert result[0].seq == c1.seq
class TestBreakpointContigRemappedDepth(unittest.TestCase): def setUp(self): self.contig = Contig(' ' * 60, None) self.contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=10)) self.contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=20)) self.contig.add_mapped_sequence(MockObject(reference_start=50, reference_end=60)) def test_break_left_deletion(self): b = Breakpoint('10', 1030, 1030, orient=ORIENT.LEFT) read = MockRead( cigar=_cigar.convert_string_to_cigar('35M10D5I20M'), reference_start=999, reference_name='10' ) align.SplitAlignment.breakpoint_contig_remapped_depth(b, self.contig, read)
def test_blat_contigs_deletion_revcomp(self): ev = GenomeEvidence(Breakpoint('fake', 1714, orient=ORIENT.LEFT), Breakpoint('fake', 2968, orient=ORIENT.RIGHT), opposing_strands=False, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100) seq = 'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT' \ 'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT' ev.contigs = [Contig(reverse_complement(seq), 0)] align.select_contig_alignments( ev, align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=REFERENCE_GENOME_FILE_2BIT, aligner='blat')) print('alignments:', ev.contigs[0].alignments) alignment = list(ev.contigs[0].alignments)[0] print(alignment) self.assertTrue(alignment.read2 is None) self.assertEqual(0, alignment.read1.reference_id) self.assertTrue(alignment.read1.is_reverse) self.assertEqual(seq, alignment.read1.query_sequence) self.assertEqual(Interval(0, 175), align.query_coverage_interval(alignment.read1)) self.assertEqual(1612, alignment.read1.reference_start) self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)], alignment.read1.cigar)
def test_blat_contigs(self): ev = GenomeEvidence( Breakpoint('reference3', 1114, orient=ORIENT.RIGHT), Breakpoint('reference3', 2187, orient=ORIENT.RIGHT), opposing_strands=True, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100, stdev_count_abnormal=2, min_splits_reads_resolution=1, min_flanking_pairs_resolution=1 ) ev.contigs = [ Contig( 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAG' 'TCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTG' 'TTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT', 0) ] print(ev.contigs[0].seq) seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat') print(seq) align.select_contig_alignments(ev, seq) print(ev.contigs[0].alignments) alignment = list(ev.contigs[0].alignments)[0] self.assertEqual(1, alignment.read1.reference_id) self.assertEqual(1, alignment.read2.reference_id) self.assertEqual(Interval(125, 244), align.query_coverage_interval(alignment.read1)) self.assertEqual(Interval(117, 244), align.query_coverage_interval(alignment.read2)) self.assertEqual(1114, alignment.read1.reference_start) self.assertEqual(2187, alignment.read2.reference_start) self.assertEqual([(CIGAR.S, 125), (CIGAR.EQ, 120)], alignment.read1.cigar) self.assertEqual([(CIGAR.S, 117), (CIGAR.EQ, 128)], alignment.read2.cigar)
def test_blat_contigs_deletion(self): ev = GenomeEvidence( Breakpoint('fake', 1714, orient=ORIENT.LEFT), Breakpoint('fake', 2968, orient=ORIENT.RIGHT), opposing_strands=False, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100 ) ev.contigs = [ Contig( 'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT' 'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT', 0) ] seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat') for query, reads in seq.items(): print('>>>', query) for read in reads: print(repr(read)) align.select_contig_alignments(ev, seq) alignments = list(ev.contigs[0].alignments) print('alignments:') for aln in alignments: print(aln, repr(aln.read1), repr(aln.read2)) self.assertEqual(1, len(alignments)) alignment = alignments[0] self.assertTrue(alignment.read2 is None) self.assertEqual(0, alignment.read1.reference_id) self.assertTrue(not alignment.read1.is_reverse) self.assertEqual(Interval(0, 175), align.query_coverage_interval(alignment.read1)) self.assertEqual(1612, alignment.read1.reference_start) self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)], alignment.read1.cigar)
def test_bwa_contigs(self): ev = GenomeEvidence( Breakpoint('reference3', 1114, orient=ORIENT.RIGHT), Breakpoint('reference3', 2187, orient=ORIENT.RIGHT), opposing_strands=True, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100, config={ 'validate.stdev_count_abnormal': 2, 'validate.min_splits_reads_resolution': 1, 'validate.min_flanking_pairs_resolution': 1, }, ) ev.contigs = [ Contig( 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAG' 'TCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTG' 'TTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT', 0, ) ] print(ev.contigs[0].seq) seq = align.align_sequences( {'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.fa'), aligner='bwa mem', aligner_output_file='mem.out', aligner_fa_input_file='mem.in.fa', ) align.select_contig_alignments(ev, seq) print(ev.contigs[0].alignments) alignment = list(ev.contigs[0].alignments)[0] assert alignment.read2.query_sequence == reverse_complement(alignment.read1.query_sequence) assert alignment.read1.reference_name == 'reference3' assert alignment.read2.reference_name == 'reference3' assert alignment.read1.reference_id == 1 assert alignment.read2.reference_id == 1 assert align.query_coverage_interval(alignment.read1) == Interval(125, 244) assert align.query_coverage_interval(alignment.read2) == Interval(117, 244) assert alignment.read1.reference_start == 1114 assert alignment.read2.reference_start == 2187 assert alignment.read1.cigar == [(CIGAR.S, 125), (CIGAR.EQ, 120)] assert alignment.read2.cigar == [(CIGAR.S, 117), (CIGAR.EQ, 128)]
def test_break_left_deletion(self): contig = Contig(' ' * 60, None) contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=10)) contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=20)) contig.add_mapped_sequence(MockObject(reference_start=50, reference_end=60)) b = Breakpoint('10', 1030, 1030, orient=ORIENT.LEFT) read = MockRead( cigar=_cigar.convert_string_to_cigar('35M10D5I20M'), reference_start=999, reference_name='10', ) align.SplitAlignment.breakpoint_contig_remapped_depth(b, contig, read)
def test_blat_contigs_deletion_revcomp(self): ev = GenomeEvidence( Breakpoint('fake', 1714, orient=ORIENT.LEFT), Breakpoint('fake', 2968, orient=ORIENT.RIGHT), opposing_strands=False, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100, ) seq = ( 'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT' 'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT' ) ev.contigs = [Contig(reverse_complement(seq), 0)] align.select_contig_alignments( ev, align.align_sequences( {'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat', ), ) print('alignments:', ev.contigs[0].alignments) alignment = list(ev.contigs[0].alignments)[0] print(alignment) assert alignment.read2 is None assert alignment.read1.reference_id == 0 assert alignment.read1.is_reverse assert alignment.read1.query_sequence == seq assert align.query_coverage_interval(alignment.read1) == Interval(0, 175) assert alignment.read1.reference_start == 1612 assert alignment.read1.cigar == [(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)]
def test_retain_disimilar_different_lengths(self): c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 2) c2 = Contig('atcgatcgatcgatcgatcgatcccgtgatatagggcatcagc', 1) result = filter_contigs([c2, c1], 0.10) self.assertEqual(2, len(result))
def test_retain_disimilar(self): c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 2) c2 = Contig('atcgadatcgatcgatcgatctgtdstcgatatagggca', 1) result = filter_contigs([c2, c1], 0.10) self.assertEqual(2, len(result))
def test_drop_alt_allele_by_score(self): c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 2) c2 = Contig('atcgatcgatcgatcgatctatcgatatagggcatcagc', 1) result = filter_contigs([c2, c1], 0.10) self.assertEqual(1, len(result)) self.assertEqual(c1.seq, result[0].seq)
def test_drop_reverse_complement(self): c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 1) c2 = Contig('gctgatgccctatatcgatcgatcgatcgatcgatcgat', 1) result = filter_contigs([c2, c1], 0.10) self.assertEqual(1, len(result)) self.assertEqual(c1.seq, result[0].seq)
def test_drop_alt_allele_by_score(self): c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 2) c2 = Contig('atcgatcgatcgatcgatctatcgatatagggcatcagc', 1) result = filter_contigs([c2, c1], 0.10) assert len(result) == 1 assert result[0].seq == c1.seq
def test_drop_reverse_complement(self): c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 1) c2 = Contig('gctgatgccctatatcgatcgatcgatcgatcgatcgat', 1) result = filter_contigs([c2, c1], 0.10) assert len(result) == 1 assert result[0].seq == c1.seq