def test_blat_contigs_deletion_revcomp(self): ev = GenomeEvidence(Breakpoint('fake', 1714, orient=ORIENT.LEFT), Breakpoint('fake', 2968, orient=ORIENT.RIGHT), opposing_strands=False, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100) seq = 'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT' \ 'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT' ev.contigs = [Contig(reverse_complement(seq), 0)] align.select_contig_alignments( ev, align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=REFERENCE_GENOME_FILE_2BIT, aligner='blat')) print('alignments:', ev.contigs[0].alignments) alignment = list(ev.contigs[0].alignments)[0] print(alignment) self.assertTrue(alignment.read2 is None) self.assertEqual(0, alignment.read1.reference_id) self.assertTrue(alignment.read1.is_reverse) self.assertEqual(seq, alignment.read1.query_sequence) self.assertEqual(Interval(0, 175), align.query_coverage_interval(alignment.read1)) self.assertEqual(1612, alignment.read1.reference_start) self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)], alignment.read1.cigar)
def test_inversion_and_deletion(self): s = 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT' evidence = MockObject( interchromosomal=False, opposing_strands=True, break1=MockObject(orient=ORIENT.RIGHT, chr='3'), break2=MockObject(orient=ORIENT.RIGHT, chr='3'), contigs=[MockObject(seq=s, alignments=set())], standardize_read=lambda x: x, contig_aln_max_event_size=DEFAULTS.contig_aln_max_event_size, contig_aln_merge_inner_anchor=5, contig_aln_merge_outer_anchor=DEFAULTS.contig_aln_merge_outer_anchor, contig_aln_min_query_consumption=0.9, contig_aln_min_extend_overlap=DEFAULTS.contig_aln_min_extend_overlap, contig_aln_min_anchor_size=DEFAULTS.contig_aln_min_anchor_size, contig_aln_min_score=DEFAULTS.contig_aln_min_score, outer_window1=Interval(1000, 1200), outer_window2=Interval(2000, 2200), reference_genome=None, bam_cache=mock.Mock(stranded=False) ) read1 = SamRead( reference_id=3, reference_start=1114, cigar=[(CIGAR.S, 125), (CIGAR.EQ, 120)], query_sequence=s, is_reverse=False, reference_name='3', alignment_rank=0 ) read2 = SamRead( reference_id=3, reference_start=2187, cigar=[(CIGAR.S, 117), (CIGAR.EQ, 8), (CIGAR.D, 1), (CIGAR.EQ, 120)], query_sequence=reverse_complement(s), is_reverse=True, reference_name='3', alignment_rank=1 ) raw_alignments = {s: [read1, read2]} align.select_contig_alignments(evidence, raw_alignments) alignments = list(evidence.contigs[0].alignments) self.assertEqual(2, len(alignments))
def test_blat_contigs(self): ev = GenomeEvidence( Breakpoint('reference3', 1114, orient=ORIENT.RIGHT), Breakpoint('reference3', 2187, orient=ORIENT.RIGHT), opposing_strands=True, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100, stdev_count_abnormal=2, min_splits_reads_resolution=1, min_flanking_pairs_resolution=1 ) ev.contigs = [ Contig( 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAG' 'TCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTG' 'TTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT', 0) ] print(ev.contigs[0].seq) seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat') print(seq) align.select_contig_alignments(ev, seq) print(ev.contigs[0].alignments) alignment = list(ev.contigs[0].alignments)[0] self.assertEqual(1, alignment.read1.reference_id) self.assertEqual(1, alignment.read2.reference_id) self.assertEqual(Interval(125, 244), align.query_coverage_interval(alignment.read1)) self.assertEqual(Interval(117, 244), align.query_coverage_interval(alignment.read2)) self.assertEqual(1114, alignment.read1.reference_start) self.assertEqual(2187, alignment.read2.reference_start) self.assertEqual([(CIGAR.S, 125), (CIGAR.EQ, 120)], alignment.read1.cigar) self.assertEqual([(CIGAR.S, 117), (CIGAR.EQ, 128)], alignment.read2.cigar)
def test_blat_contigs_deletion(self): ev = GenomeEvidence( Breakpoint('fake', 1714, orient=ORIENT.LEFT), Breakpoint('fake', 2968, orient=ORIENT.RIGHT), opposing_strands=False, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100 ) ev.contigs = [ Contig( 'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT' 'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT', 0) ] seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat') for query, reads in seq.items(): print('>>>', query) for read in reads: print(repr(read)) align.select_contig_alignments(ev, seq) alignments = list(ev.contigs[0].alignments) print('alignments:') for aln in alignments: print(aln, repr(aln.read1), repr(aln.read2)) self.assertEqual(1, len(alignments)) alignment = alignments[0] self.assertTrue(alignment.read2 is None) self.assertEqual(0, alignment.read1.reference_id) self.assertTrue(not alignment.read1.is_reverse) self.assertEqual(Interval(0, 175), align.query_coverage_interval(alignment.read1)) self.assertEqual(1612, alignment.read1.reference_start) self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)], alignment.read1.cigar)
def test_bwa_contigs(self): ev = GenomeEvidence( Breakpoint('reference3', 1114, orient=ORIENT.RIGHT), Breakpoint('reference3', 2187, orient=ORIENT.RIGHT), opposing_strands=True, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100, config={ 'validate.stdev_count_abnormal': 2, 'validate.min_splits_reads_resolution': 1, 'validate.min_flanking_pairs_resolution': 1, }, ) ev.contigs = [ Contig( 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAG' 'TCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTG' 'TTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT', 0, ) ] print(ev.contigs[0].seq) seq = align.align_sequences( {'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.fa'), aligner='bwa mem', aligner_output_file='mem.out', aligner_fa_input_file='mem.in.fa', ) align.select_contig_alignments(ev, seq) print(ev.contigs[0].alignments) alignment = list(ev.contigs[0].alignments)[0] assert alignment.read2.query_sequence == reverse_complement(alignment.read1.query_sequence) assert alignment.read1.reference_name == 'reference3' assert alignment.read2.reference_name == 'reference3' assert alignment.read1.reference_id == 1 assert alignment.read2.reference_id == 1 assert align.query_coverage_interval(alignment.read1) == Interval(125, 244) assert align.query_coverage_interval(alignment.read2) == Interval(117, 244) assert alignment.read1.reference_start == 1114 assert alignment.read2.reference_start == 2187 assert alignment.read1.cigar == [(CIGAR.S, 125), (CIGAR.EQ, 120)] assert alignment.read2.cigar == [(CIGAR.S, 117), (CIGAR.EQ, 128)]
def test_blat_contigs_deletion_revcomp(self): ev = GenomeEvidence( Breakpoint('fake', 1714, orient=ORIENT.LEFT), Breakpoint('fake', 2968, orient=ORIENT.RIGHT), opposing_strands=False, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100, ) seq = ( 'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT' 'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT' ) ev.contigs = [Contig(reverse_complement(seq), 0)] align.select_contig_alignments( ev, align.align_sequences( {'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat', ), ) print('alignments:', ev.contigs[0].alignments) alignment = list(ev.contigs[0].alignments)[0] print(alignment) assert alignment.read2 is None assert alignment.read1.reference_id == 0 assert alignment.read1.is_reverse assert alignment.read1.query_sequence == seq assert align.query_coverage_interval(alignment.read1) == Interval(0, 175) assert alignment.read1.reference_start == 1612 assert alignment.read1.cigar == [(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)]