예제 #1
0
 def test_drop_similar_different_lengths(self):
     c1 = Contig(
         'atcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatcgatcgatatgggcatcagc',
         2)
     c2 = Contig(
         'atcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatcgatcgatagggcatcagc',
         1)
     result = filter_contigs([c2, c1], 0.10)
     self.assertEqual(1, len(result))
     self.assertEqual(c1.seq, result[0].seq)
예제 #2
0
 def test_drop_similar_different_lengths(self):
     c1 = Contig(
         'atcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatcgatcgatatgggcatcagc',
         2,
     )
     c2 = Contig(
         'atcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatatcgatcgatcgatcgatcgatcgatagggcatcagc',
         1,
     )
     result = filter_contigs([c2, c1], 0.10)
     assert len(result) == 1
     assert result[0].seq == c1.seq
예제 #3
0
 def test_blat_contigs_deletion_revcomp(self):
     ev = GenomeEvidence(Breakpoint('fake', 1714, orient=ORIENT.LEFT),
                         Breakpoint('fake', 2968, orient=ORIENT.RIGHT),
                         opposing_strands=False,
                         bam_cache=BAM_CACHE,
                         reference_genome=REFERENCE_GENOME,
                         read_length=40,
                         stdev_fragment_size=25,
                         median_fragment_size=100)
     seq = 'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT' \
           'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT'
     ev.contigs = [Contig(reverse_complement(seq), 0)]
     align.select_contig_alignments(
         ev,
         align.align_sequences({'seq': ev.contigs[0].seq},
                               BAM_CACHE,
                               REFERENCE_GENOME,
                               aligner_reference=REFERENCE_GENOME_FILE_2BIT,
                               aligner='blat'))
     print('alignments:', ev.contigs[0].alignments)
     alignment = list(ev.contigs[0].alignments)[0]
     print(alignment)
     self.assertTrue(alignment.read2 is None)
     self.assertEqual(0, alignment.read1.reference_id)
     self.assertTrue(alignment.read1.is_reverse)
     self.assertEqual(seq, alignment.read1.query_sequence)
     self.assertEqual(Interval(0, 175),
                      align.query_coverage_interval(alignment.read1))
     self.assertEqual(1612, alignment.read1.reference_start)
     self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)],
                      alignment.read1.cigar)
예제 #4
0
 def test_blat_contigs(self):
     ev = GenomeEvidence(
         Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
         Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
         opposing_strands=True,
         bam_cache=BAM_CACHE,
         reference_genome=REFERENCE_GENOME,
         read_length=40,
         stdev_fragment_size=25,
         median_fragment_size=100,
         stdev_count_abnormal=2,
         min_splits_reads_resolution=1,
         min_flanking_pairs_resolution=1
     )
     ev.contigs = [
         Contig(
             'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAG'
             'TCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTG'
             'TTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT', 0)
     ]
     print(ev.contigs[0].seq)
     seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat')
     print(seq)
     align.select_contig_alignments(ev, seq)
     print(ev.contigs[0].alignments)
     alignment = list(ev.contigs[0].alignments)[0]
     self.assertEqual(1, alignment.read1.reference_id)
     self.assertEqual(1, alignment.read2.reference_id)
     self.assertEqual(Interval(125, 244), align.query_coverage_interval(alignment.read1))
     self.assertEqual(Interval(117, 244), align.query_coverage_interval(alignment.read2))
     self.assertEqual(1114, alignment.read1.reference_start)
     self.assertEqual(2187, alignment.read2.reference_start)
     self.assertEqual([(CIGAR.S, 125), (CIGAR.EQ, 120)], alignment.read1.cigar)
     self.assertEqual([(CIGAR.S, 117), (CIGAR.EQ, 128)], alignment.read2.cigar)
예제 #5
0
 def test_blat_contigs_deletion(self):
     ev = GenomeEvidence(
         Breakpoint('fake', 1714, orient=ORIENT.LEFT),
         Breakpoint('fake', 2968, orient=ORIENT.RIGHT),
         opposing_strands=False,
         bam_cache=BAM_CACHE,
         reference_genome=REFERENCE_GENOME,
         read_length=40,
         stdev_fragment_size=25,
         median_fragment_size=100
     )
     ev.contigs = [
         Contig(
             'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT'
             'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT', 0)
     ]
     seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat')
     for query, reads in seq.items():
         print('>>>', query)
         for read in reads:
             print(repr(read))
     align.select_contig_alignments(ev, seq)
     alignments = list(ev.contigs[0].alignments)
     print('alignments:')
     for aln in alignments:
         print(aln, repr(aln.read1), repr(aln.read2))
     self.assertEqual(1, len(alignments))
     alignment = alignments[0]
     self.assertTrue(alignment.read2 is None)
     self.assertEqual(0, alignment.read1.reference_id)
     self.assertTrue(not alignment.read1.is_reverse)
     self.assertEqual(Interval(0, 175), align.query_coverage_interval(alignment.read1))
     self.assertEqual(1612, alignment.read1.reference_start)
     self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)], alignment.read1.cigar)
예제 #6
0
 def setUp(self):
     self.contig = Contig(' ' * 60, None)
     self.contig.add_mapped_sequence(
         MockObject(reference_start=0, reference_end=10))
     self.contig.add_mapped_sequence(
         MockObject(reference_start=0, reference_end=20))
     self.contig.add_mapped_sequence(
         MockObject(reference_start=50, reference_end=60))
예제 #7
0
파일: test_align.py 프로젝트: bcgsc/mavis
    def test_break_left_deletion(self):
        contig = Contig(' ' * 60, None)
        contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=10))
        contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=20))
        contig.add_mapped_sequence(MockObject(reference_start=50, reference_end=60))

        b = Breakpoint('10', 1030, 1030, orient=ORIENT.LEFT)
        read = MockRead(
            cigar=_cigar.convert_string_to_cigar('35M10D5I20M'),
            reference_start=999,
            reference_name='10',
        )
        align.SplitAlignment.breakpoint_contig_remapped_depth(b, contig, read)
예제 #8
0
파일: test_align.py 프로젝트: bcgsc/mavis
 def test_bwa_contigs(self):
     ev = GenomeEvidence(
         Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
         Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
         opposing_strands=True,
         bam_cache=BAM_CACHE,
         reference_genome=REFERENCE_GENOME,
         read_length=40,
         stdev_fragment_size=25,
         median_fragment_size=100,
         config={
             'validate.stdev_count_abnormal': 2,
             'validate.min_splits_reads_resolution': 1,
             'validate.min_flanking_pairs_resolution': 1,
         },
     )
     ev.contigs = [
         Contig(
             'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAG'
             'TCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTG'
             'TTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT',
             0,
         )
     ]
     print(ev.contigs[0].seq)
     seq = align.align_sequences(
         {'seq': ev.contigs[0].seq},
         BAM_CACHE,
         REFERENCE_GENOME,
         aligner_reference=get_data('mock_reference_genome.fa'),
         aligner='bwa mem',
         aligner_output_file='mem.out',
         aligner_fa_input_file='mem.in.fa',
     )
     align.select_contig_alignments(ev, seq)
     print(ev.contigs[0].alignments)
     alignment = list(ev.contigs[0].alignments)[0]
     assert alignment.read2.query_sequence == reverse_complement(alignment.read1.query_sequence)
     assert alignment.read1.reference_name == 'reference3'
     assert alignment.read2.reference_name == 'reference3'
     assert alignment.read1.reference_id == 1
     assert alignment.read2.reference_id == 1
     assert align.query_coverage_interval(alignment.read1) == Interval(125, 244)
     assert align.query_coverage_interval(alignment.read2) == Interval(117, 244)
     assert alignment.read1.reference_start == 1114
     assert alignment.read2.reference_start == 2187
     assert alignment.read1.cigar == [(CIGAR.S, 125), (CIGAR.EQ, 120)]
     assert alignment.read2.cigar == [(CIGAR.S, 117), (CIGAR.EQ, 128)]
예제 #9
0
파일: test_align.py 프로젝트: bcgsc/mavis
 def test_blat_contigs_deletion_revcomp(self):
     ev = GenomeEvidence(
         Breakpoint('fake', 1714, orient=ORIENT.LEFT),
         Breakpoint('fake', 2968, orient=ORIENT.RIGHT),
         opposing_strands=False,
         bam_cache=BAM_CACHE,
         reference_genome=REFERENCE_GENOME,
         read_length=40,
         stdev_fragment_size=25,
         median_fragment_size=100,
     )
     seq = (
         'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT'
         'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT'
     )
     ev.contigs = [Contig(reverse_complement(seq), 0)]
     align.select_contig_alignments(
         ev,
         align.align_sequences(
             {'seq': ev.contigs[0].seq},
             BAM_CACHE,
             REFERENCE_GENOME,
             aligner_reference=get_data('mock_reference_genome.2bit'),
             aligner='blat',
         ),
     )
     print('alignments:', ev.contigs[0].alignments)
     alignment = list(ev.contigs[0].alignments)[0]
     print(alignment)
     assert alignment.read2 is None
     assert alignment.read1.reference_id == 0
     assert alignment.read1.is_reverse
     assert alignment.read1.query_sequence == seq
     assert align.query_coverage_interval(alignment.read1) == Interval(0, 175)
     assert alignment.read1.reference_start == 1612
     assert alignment.read1.cigar == [(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)]
예제 #10
0
 def test_retain_disimilar_different_lengths(self):
     c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 2)
     c2 = Contig('atcgatcgatcgatcgatcgatcccgtgatatagggcatcagc', 1)
     result = filter_contigs([c2, c1], 0.10)
     self.assertEqual(2, len(result))
예제 #11
0
 def test_retain_disimilar(self):
     c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 2)
     c2 = Contig('atcgadatcgatcgatcgatctgtdstcgatatagggca', 1)
     result = filter_contigs([c2, c1], 0.10)
     self.assertEqual(2, len(result))
예제 #12
0
 def test_drop_alt_allele_by_score(self):
     c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 2)
     c2 = Contig('atcgatcgatcgatcgatctatcgatatagggcatcagc', 1)
     result = filter_contigs([c2, c1], 0.10)
     self.assertEqual(1, len(result))
     self.assertEqual(c1.seq, result[0].seq)
예제 #13
0
 def test_drop_reverse_complement(self):
     c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 1)
     c2 = Contig('gctgatgccctatatcgatcgatcgatcgatcgatcgat', 1)
     result = filter_contigs([c2, c1], 0.10)
     self.assertEqual(1, len(result))
     self.assertEqual(c1.seq, result[0].seq)
예제 #14
0
 def test_drop_alt_allele_by_score(self):
     c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 2)
     c2 = Contig('atcgatcgatcgatcgatctatcgatatagggcatcagc', 1)
     result = filter_contigs([c2, c1], 0.10)
     assert len(result) == 1
     assert result[0].seq == c1.seq
예제 #15
0
 def test_drop_reverse_complement(self):
     c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 1)
     c2 = Contig('gctgatgccctatatcgatcgatcgatcgatcgatcgat', 1)
     result = filter_contigs([c2, c1], 0.10)
     assert len(result) == 1
     assert result[0].seq == c1.seq