Ejemplo n.º 1
0
 def test_inverted_translocation(self):
     b = BreakpointPair(
         Breakpoint(1, 1, 2, ORIENT.LEFT),
         Breakpoint(2, 1, 2, ORIENT.LEFT),
         opposing_strands=True,
     )
     BreakpointPair.classify(b)
Ejemplo n.º 2
0
 def test_blat_contigs_deletion(self):
     ev = GenomeEvidence(
         Breakpoint('fake', 1714, orient=ORIENT.LEFT),
         Breakpoint('fake', 2968, orient=ORIENT.RIGHT),
         opposing_strands=False,
         bam_cache=BAM_CACHE,
         reference_genome=REFERENCE_GENOME,
         read_length=40,
         stdev_fragment_size=25,
         median_fragment_size=100
     )
     ev.contigs = [
         Contig(
             'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT'
             'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT', 0)
     ]
     seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat')
     for query, reads in seq.items():
         print('>>>', query)
         for read in reads:
             print(repr(read))
     align.select_contig_alignments(ev, seq)
     alignments = list(ev.contigs[0].alignments)
     print('alignments:')
     for aln in alignments:
         print(aln, repr(aln.read1), repr(aln.read2))
     self.assertEqual(1, len(alignments))
     alignment = alignments[0]
     self.assertTrue(alignment.read2 is None)
     self.assertEqual(0, alignment.read1.reference_id)
     self.assertTrue(not alignment.read1.is_reverse)
     self.assertEqual(Interval(0, 175), align.query_coverage_interval(alignment.read1))
     self.assertEqual(1612, alignment.read1.reference_start)
     self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)], alignment.read1.cigar)
Ejemplo n.º 3
0
 def test_blat_contigs_deletion_revcomp(self):
     ev = GenomeEvidence(Breakpoint('fake', 1714, orient=ORIENT.LEFT),
                         Breakpoint('fake', 2968, orient=ORIENT.RIGHT),
                         opposing_strands=False,
                         bam_cache=BAM_CACHE,
                         reference_genome=REFERENCE_GENOME,
                         read_length=40,
                         stdev_fragment_size=25,
                         median_fragment_size=100)
     seq = 'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT' \
           'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT'
     ev.contigs = [Contig(reverse_complement(seq), 0)]
     align.select_contig_alignments(
         ev,
         align.align_sequences({'seq': ev.contigs[0].seq},
                               BAM_CACHE,
                               REFERENCE_GENOME,
                               aligner_reference=REFERENCE_GENOME_FILE_2BIT,
                               aligner='blat'))
     print('alignments:', ev.contigs[0].alignments)
     alignment = list(ev.contigs[0].alignments)[0]
     print(alignment)
     self.assertTrue(alignment.read2 is None)
     self.assertEqual(0, alignment.read1.reference_id)
     self.assertTrue(alignment.read1.is_reverse)
     self.assertEqual(seq, alignment.read1.query_sequence)
     self.assertEqual(Interval(0, 175),
                      align.query_coverage_interval(alignment.read1))
     self.assertEqual(1612, alignment.read1.reference_start)
     self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)],
                      alignment.read1.cigar)
Ejemplo n.º 4
0
    def test_load_evidence_inversion(self):
        # first example
        ev1 = self.genome_evidence(Breakpoint('reference3',
                                              1114,
                                              orient=ORIENT.RIGHT),
                                   Breakpoint('reference3',
                                              2187,
                                              orient=ORIENT.RIGHT),
                                   opposing_strands=True)

        ev1.load_evidence()
        print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
        self.assertEqual(54, self.count_original_reads(ev1.split_reads[0]))
        self.assertEqual(20, self.count_original_reads(ev1.split_reads[1]))
        self.assertEqual(104, len(ev1.flanking_pairs))

        # second example
        ev1 = self.genome_evidence(Breakpoint('reference7',
                                              15000,
                                              orient=ORIENT.RIGHT),
                                   Breakpoint('reference7',
                                              19000,
                                              orient=ORIENT.RIGHT),
                                   opposing_strands=True)
        ev1.load_evidence()
        print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
        self.assertEqual(15, self.count_original_reads(ev1.split_reads[1]))
        self.assertEqual(27, self.count_original_reads(ev1.split_reads[0]))
        self.assertEqual(52, len(ev1.flanking_pairs))
Ejemplo n.º 5
0
    def test_net_zero(self):
        transcript = PreTranscript([(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.POS)
        for patt in transcript.generate_splicing_patterns():
            transcript.transcripts.append(Transcript(transcript, patt))
        trans_evidence = MockObject(
            annotations={},
            read_length=100,
            max_expected_fragment_size=550,
            call_error=11,
            overlapping_transcripts={transcript},
        )
        setattr(
            trans_evidence,
            '_select_transcripts',
            lambda *pos: trans_evidence.overlapping_transcripts,
        )
        setattr(
            trans_evidence,
            'distance',
            partial(TranscriptomeEvidence.distance, trans_evidence),
        )

        bpp = BreakpointPair(
            Breakpoint('1', 1099, orient=ORIENT.LEFT),
            Breakpoint('1', 1302, orient=ORIENT.RIGHT),
            untemplated_seq='TT',
        )
        dist = partial(TranscriptomeEvidence.distance, trans_evidence)
        assert bpp.net_size() == Interval(-200)
        assert bpp.net_size(dist) == Interval(0)
Ejemplo n.º 6
0
    def test_annotate_small_intronic_inversion(self):
        gene = get_example_genes()['SVEP1']
        reference_annotations = {gene.chr: [gene]}
        reference_genome = {
            gene.chr:
            MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1))
        }
        best = get_best(gene)

        bpp = BreakpointPair(
            Breakpoint('9', 113152627, 113152627, orient='L'),
            Breakpoint('9', 113152635, 113152635, orient='L'),
            opposing_strands=True,
            stranded=False,
            event_type=SVTYPE.INV,
            protocol=PROTOCOL.GENOME,
            untemplated_seq='',
        )
        annotations = annotate_events([bpp],
                                      reference_genome=reference_genome,
                                      annotations=reference_annotations)
        for a in annotations:
            print(a, a.transcript1, a.transcript2)
        assert len(annotations) == 1
        ann = annotations[0]
        assert ann.transcript1 == best
        assert ann.transcript2 == best
        refseq = best.transcripts[0].get_seq(reference_genome)
        assert len(ann.fusion.transcripts) == 1
        assert ann.fusion.transcripts[0].get_seq() == refseq
Ejemplo n.º 7
0
    def test_retained_intron(self):
        gene = get_example_genes()['PRKCB']
        reference_genome = {
            gene.chr:
            MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1))
        }
        best = get_best(gene)

        bpp = BreakpointPair(
            Breakpoint('16', 23957049, orient='L'),
            Breakpoint('16', 23957050, orient='R'),
            opposing_strands=False,
            stranded=False,
            event_type=SVTYPE.INS,
            protocol=PROTOCOL.TRANS,
            untemplated_seq='A',
        )
        ann = Annotation(bpp, transcript1=best, transcript2=best)
        ft = FusionTranscript.build(
            ann,
            reference_genome,
            min_orf_size=300,
            max_orf_cap=10,
            min_domain_mapping_match=0.9,
        )
        assert len(ft.transcripts) == 1
        print(ft.transcripts[0].splicing_pattern)
        print(best.transcripts[0].splicing_pattern)
        assert ft.transcripts[
            0].splicing_pattern.splice_type == SPLICE_TYPE.RETAIN
Ejemplo n.º 8
0
 def test_inversion_insertion(self):
     bpp = BreakpointPair(
         Breakpoint('1', 10, orient=ORIENT.LEFT),
         Breakpoint('1', 15, orient=ORIENT.LEFT),
         untemplated_seq='TT',
     )
     self.assertEqual(Interval(2), bpp.net_size())
Ejemplo n.º 9
0
 def test___init__opstrand_conflict(self):
     with self.assertRaises(AssertionError):
         BreakpointPair(
             Breakpoint('1', 1, strand=STRAND.POS),
             Breakpoint('1', 2, strand=STRAND.POS),
             opposing_strands=True,
         )
Ejemplo n.º 10
0
 def test_duplication_with_insertion(self):
     bpp = BreakpointPair(
         Breakpoint('1', 10, orient=ORIENT.RIGHT),
         Breakpoint('1', 15, orient=ORIENT.LEFT),
         untemplated_seq='TTT',
     )
     self.assertEqual(Interval(9), bpp.net_size())
Ejemplo n.º 11
0
 def test_deletion(self):
     bpp = BreakpointPair(
         Breakpoint('1', 10, orient=ORIENT.LEFT),
         Breakpoint('1', 15, orient=ORIENT.RIGHT),
         untemplated_seq='',
     )
     self.assertEqual(Interval(-4), bpp.net_size())
Ejemplo n.º 12
0
 def test_large_indel(self):
     bpp = BreakpointPair(
         Breakpoint('1', 10, orient=ORIENT.LEFT),
         Breakpoint('1', 101, orient=ORIENT.RIGHT),
         untemplated_seq='TTT',
     )
     self.assertEqual(Interval(-87), bpp.net_size())
Ejemplo n.º 13
0
 def test_insertion(self):
     b = BreakpointPair(
         Breakpoint(1, 1, 1, strand=STRAND.NS, orient=ORIENT.LEFT),
         Breakpoint(1, 2, 2, strand=STRAND.NS, orient=ORIENT.RIGHT),
         opposing_strands=False,
     )
     self.assertEqual(sorted([SVTYPE.INS]), sorted(BreakpointPair.classify(b)))
Ejemplo n.º 14
0
 def test_translocation(self):
     b = BreakpointPair(
         Breakpoint(1, 1, 2, ORIENT.RIGHT),
         Breakpoint(2, 1, 2, ORIENT.LEFT),
         opposing_strands=False,
     )
     BreakpointPair.classify(b)
Ejemplo n.º 15
0
 def test_annotate_events_synonymous(self):
     for gene_list in self.reference_annotations.values():
         for gene in gene_list:
             for t in gene.transcripts:
                 print(t)
     b1 = Breakpoint(self.gene.chr,
                     95344068,
                     orient=ORIENT.LEFT,
                     strand=STRAND.NS)
     b2 = Breakpoint(self.gene.chr,
                     95344379,
                     orient=ORIENT.RIGHT,
                     strand=STRAND.NS)
     bpp = BreakpointPair(
         b1,
         b2,
         stranded=False,
         opposing_strands=False,
         event_type=SVTYPE.DEL,
         protocol=PROTOCOL.GENOME,
         untemplated_seq='',
     )
     annotations = annotate_events([bpp],
                                   reference_genome=self.reference_genome,
                                   annotations=self.reference_annotations)
     ann = annotations[0]
     for a in annotations:
         print(a, a.fusion, a.fusion.transcripts)
         print(a.transcript1, a.transcript1.transcripts)
     fseq = ann.fusion.transcripts[0].get_seq()
     refseq = ann.transcript1.transcripts[0].get_seq(self.reference_genome)
     self.assertEqual(refseq, fseq)
     self.assertEqual(1, len(annotations))
Ejemplo n.º 16
0
    def test_load_evidence_inversion(self):
        # first example
        ev1 = self.genome_evidence(
            Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
            Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
            opposing_strands=True,
        )

        ev1.load_evidence()
        print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
        assert self.count_original_reads(ev1.split_reads[0]) == 54
        assert self.count_original_reads(ev1.split_reads[1]) == 20
        assert len(ev1.flanking_pairs) == 104

        # second example
        ev1 = self.genome_evidence(
            Breakpoint('reference7', 15000, orient=ORIENT.RIGHT),
            Breakpoint('reference7', 19000, orient=ORIENT.RIGHT),
            opposing_strands=True,
        )
        ev1.load_evidence()
        print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
        assert self.count_original_reads(ev1.split_reads[1]) == 15
        assert self.count_original_reads(ev1.split_reads[0]) == 27
        assert len(ev1.flanking_pairs) == 52
Ejemplo n.º 17
0
    def test_small_duplication(self):
        bpp = BreakpointPair(
            Breakpoint('6', 157100005, strand='+', orient='R'),
            Breakpoint('6', 157100007, strand='+', orient='L'),
            event_type=SVTYPE.DUP,
            untemplated_seq='',
            protocol=PROTOCOL.GENOME,
        )
        # annotate the breakpoint with the gene
        annotations = annotate_events([bpp],
                                      reference_genome=self.reference_genome,
                                      annotations=self.reference_annotations)
        self.assertEqual(1, len(annotations))

        ann = Annotation(bpp, transcript1=self.best, transcript2=self.best)
        ft = FusionTranscript.build(
            ann,
            self.reference_genome,
            min_orf_size=300,
            max_orf_cap=10,
            min_domain_mapping_match=0.9,
        )
        ref_tx = self.best.translations[0]
        fusion_tx = ft.translations[0]

        # compare the fusion translation to the refernece translation to create the protein notation
        ref_aa_seq = ref_tx.get_aa_seq(self.reference_genome)
        call = IndelCall(ref_aa_seq, fusion_tx.get_aa_seq())
        self.assertTrue(call.is_dup)

        notation = call_protein_indel(ref_tx, fusion_tx, self.reference_genome)
        print(notation)
        self.assertEqual('ENST00000346085:p.G319dupG', notation)
Ejemplo n.º 18
0
 def test_fusion_with_novel_splice_site(self):
     bpp = BreakpointPair(
         Breakpoint('7', 150268089, 150268089, 'L', '+'),
         Breakpoint('8', 79715940, 79715940, 'L', '-'),
         event_type=SVTYPE.ITRANS,
         protocol=PROTOCOL.GENOME,
         untemplated_seq='',
     )
     gimap4 = EXAMPLE_GENES['GIMAP4']
     il7 = EXAMPLE_GENES['IL7']
     ref_genome = {
         gimap4.chr:
         MockObject(seq=MockLongString(gimap4.seq, offset=gimap4.start -
                                       1)),
         il7.chr:
         MockObject(seq=MockLongString(il7.seq, offset=il7.start - 1)),
     }
     annotations = annotate_events([bpp], {
         gimap4.chr: [gimap4],
         il7.chr: [il7]
     }, ref_genome)
     assert len(annotations) == 1
     ann = annotations[0]
     print(ann, ann.transcript1, ann.transcript2)
     print(ann.fusion)
     print(
         ann.fusion.transcripts[0].splicing_pattern,
         ann.fusion.transcripts[0].splicing_pattern.splice_type,
     )
     for ex in ann.fusion.transcripts[0].exons:
         print(ex, len(ex))
     assert False
Ejemplo n.º 19
0
    def test_build_single_transcript_inversion(self):
        gene = get_example_genes()['SVEP1']
        reference_genome = {
            gene.chr:
            MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1))
        }
        best = get_best(gene)

        bpp = BreakpointPair(
            Breakpoint('9', 113152627, 113152627, orient='L'),
            Breakpoint('9', 113152635, 113152635, orient='L'),
            opposing_strands=True,
            stranded=False,
            event_type=SVTYPE.INV,
            protocol=PROTOCOL.GENOME,
            untemplated_seq='',
        )
        ann = Annotation(bpp, transcript1=best, transcript2=best)
        ft = FusionTranscript.build(
            ann,
            reference_genome,
            min_orf_size=300,
            max_orf_cap=10,
            min_domain_mapping_match=0.9,
        )
        refseq = best.transcripts[0].get_seq(reference_genome)
        assert len(ft.transcripts) == 1
        assert ft.transcripts[0].get_seq() == refseq
Ejemplo n.º 20
0
 def test_close_del(self):
     # ....TT|TT....
     b1 = Breakpoint(REF_CHR, 1001, strand=STRAND.POS, orient=ORIENT.LEFT)
     b2 = Breakpoint(REF_CHR, 1002, strand=STRAND.POS, orient=ORIENT.RIGHT)
     bpp = BreakpointPair(b1, b2)
     self.assertEqual(('', ''),
                      bpp.breakpoint_sequence_homology(REFERENCE_GENOME))
Ejemplo n.º 21
0
    def test_load_evidence_translocation(self):
        ev1 = self.genome_evidence(Breakpoint('reference10',
                                              520,
                                              orient=ORIENT.RIGHT),
                                   Breakpoint('reference19',
                                              964,
                                              orient=ORIENT.LEFT),
                                   opposing_strands=False)
        ev1.load_evidence()
        print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
        self.assertEqual(14, self.count_original_reads(ev1.split_reads[0]))
        self.assertEqual(20, self.count_original_reads(ev1.split_reads[1]))
        self.assertEqual(21, len(ev1.flanking_pairs))

        # second example
        ev1 = self.genome_evidence(Breakpoint('reference2',
                                              2000,
                                              orient=ORIENT.LEFT),
                                   Breakpoint('reference4',
                                              2000,
                                              orient=ORIENT.RIGHT),
                                   opposing_strands=False)
        ev1.load_evidence()
        print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
        self.assertEqual(21, self.count_original_reads(ev1.split_reads[0]))
        # one of the reads that appears to look good in the bam is too low quality % match
        self.assertEqual(40, self.count_original_reads(ev1.split_reads[1]))
        self.assertEqual(57, len(ev1.flanking_pairs))
Ejemplo n.º 22
0
 def setUp(self):
     self.gev1 = BreakpointPair(
         Breakpoint('1', 1),
         Breakpoint('1', 10),
         opposing_strands=True,
         data={
             COLUMNS.event_type: SVTYPE.DEL,
             COLUMNS.call_method: CALL_METHOD.CONTIG,
             COLUMNS.fusion_sequence_fasta_id: None,
             COLUMNS.protocol: PROTOCOL.GENOME,
             COLUMNS.fusion_cdna_coding_end: None,
             COLUMNS.fusion_cdna_coding_start: None,
         },
     )
     self.gev2 = BreakpointPair(
         Breakpoint('1', 1),
         Breakpoint('1', 100),
         opposing_strands=True,
         data={
             COLUMNS.event_type: SVTYPE.DEL,
             COLUMNS.call_method: CALL_METHOD.CONTIG,
             COLUMNS.fusion_sequence_fasta_id: None,
             COLUMNS.protocol: PROTOCOL.GENOME,
             COLUMNS.fusion_cdna_coding_start: None,
             COLUMNS.fusion_cdna_coding_end: None,
         },
     )
     self.best_transcripts = {'ABCA': True, 'ABCD': True}
Ejemplo n.º 23
0
 def test_order_is_retained(self):
     # BPP(Breakpoint(1:1925143-1925155R), Breakpoint(1:1925144L), opposing=False)
     # >>  BPP(Breakpoint(1:1925144L), Breakpoint(1:1925144-1925158R), opposing=False)
     # >>  BPP(Breakpoint(1:1925143L), Breakpoint(1:1925143-1925158R), opposing=False)
     pairs = [
         BreakpointPair(
             Breakpoint('2', 1925144, 1925144, 'L'),
             Breakpoint('2', 1925144, 1925158, 'R'),
             event_type='deletion',
             opposing_strands=False,
         ),
         BreakpointPair(
             Breakpoint('2', 1925143, 1925143, 'L'),
             Breakpoint('2', 1925143, 1925158, 'R'),
             event_type='deletion',
             opposing_strands=False,
         ),
     ]
     mapping = merge_breakpoint_pairs(pairs, 100, 25)
     for merge, inputs in mapping.items():
         print(merge)
         print(inputs)
     self.assertEqual(1, len(mapping))
     merge = list(mapping)[0]
     self.assertEqual('L', merge.break1.orient)
     self.assertEqual('R', merge.break2.orient)
Ejemplo n.º 24
0
 def test_mixed_protocol_fusions_different_sequence(self):
     genome_ev = BreakpointPair(Breakpoint('1', 1),
                                Breakpoint('1', 10),
                                opposing_strands=True,
                                data={
                                    COLUMNS.event_type: SVTYPE.DEL,
                                    COLUMNS.call_method: CALL_METHOD.CONTIG,
                                    COLUMNS.fusion_sequence_fasta_id: 'a',
                                    COLUMNS.protocol: PROTOCOL.GENOME,
                                    COLUMNS.transcript1: None,
                                    COLUMNS.transcript2: None,
                                    COLUMNS.fusion_cdna_coding_start: 1,
                                    COLUMNS.fusion_cdna_coding_end: 10
                                })
     trans_ev = BreakpointPair(Breakpoint('1', 50),
                               Breakpoint('1', 60),
                               opposing_strands=True,
                               data={
                                   COLUMNS.event_type: SVTYPE.DEL,
                                   COLUMNS.call_method: CALL_METHOD.CONTIG,
                                   COLUMNS.fusion_sequence_fasta_id: 'b',
                                   COLUMNS.protocol: PROTOCOL.TRANS,
                                   COLUMNS.transcript1: None,
                                   COLUMNS.transcript2: None,
                                   COLUMNS.fusion_cdna_coding_start: 1,
                                   COLUMNS.fusion_cdna_coding_end: 10
                               })
     self.assertFalse(
         pairing.inferred_equivalent(genome_ev, trans_ev, self.TRANSCRIPTS))
Ejemplo n.º 25
0
 def test_shift_overaligned(self):
     # qwertyuiopas---kkkkk------dfghjklzxcvbnm
     # ..........      ................
     gene = Gene('1', 1, 1000, strand='+')
     transcript = PreTranscript(exons=[(1, 12), (20, 28)], gene=gene, strand='+')
     for spl_patt in transcript.generate_splicing_patterns():
         transcript.transcripts.append(Transcript(transcript, spl_patt))
     gene.transcripts.append(transcript)
     read = SamRead(
         reference_name='1',
         reference_start=0,
         cigar=_cigar.convert_string_to_cigar('14=7D12='),
         query_sequence='qwertyuiopasdfghjklzxcvbnm',
     )
     evidence = TranscriptomeEvidence(
         annotations={},
         reference_genome={'1': MockObject(seq='qwertyuiopasdfkkkkkdfghjklzxcvbnm')},
         bam_cache=MockObject(get_read_reference_name=lambda r: r.reference_name),
         break1=Breakpoint('1', 1, orient='L', strand='+'),
         break2=Breakpoint('1', 10, orient='R', strand='+'),
         read_length=75,
         stdev_fragment_size=75,
         median_fragment_size=220,
     )
     evidence.overlapping_transcripts.add(transcript)
     new_read = evidence.standardize_read(read)
     assert new_read.cigar == _cigar.convert_string_to_cigar('12=7N14=')
Ejemplo n.º 26
0
    def test_duplication(self):
        b = BreakpointPair(
            Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.RIGHT),
            Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.LEFT))
        self.assertEqual({SVTYPE.DUP}, BreakpointPair.classify(b))

        b = BreakpointPair(
            Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.RIGHT),
            Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.LEFT))
        self.assertEqual({SVTYPE.DUP}, BreakpointPair.classify(b))

        b = BreakpointPair(
            Breakpoint(1, 1, 2, strand=STRAND.NEG, orient=ORIENT.RIGHT),
            Breakpoint(1, 10, 11, strand=STRAND.NEG, orient=ORIENT.LEFT))
        self.assertEqual({SVTYPE.DUP}, BreakpointPair.classify(b))

        b = BreakpointPair(
            Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.RIGHT),
            Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.NS))
        self.assertEqual({SVTYPE.DUP}, BreakpointPair.classify(b))

        b = BreakpointPair(
            Breakpoint(1, 1, 2, strand=STRAND.NEG, orient=ORIENT.RIGHT),
            Breakpoint(1, 10, 11, strand=STRAND.NEG, orient=ORIENT.NS))
        self.assertEqual({SVTYPE.DUP}, BreakpointPair.classify(b))
Ejemplo n.º 27
0
 def test_blat_contigs(self):
     ev = GenomeEvidence(
         Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
         Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
         opposing_strands=True,
         bam_cache=BAM_CACHE,
         reference_genome=REFERENCE_GENOME,
         read_length=40,
         stdev_fragment_size=25,
         median_fragment_size=100,
         stdev_count_abnormal=2,
         min_splits_reads_resolution=1,
         min_flanking_pairs_resolution=1
     )
     ev.contigs = [
         Contig(
             'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAG'
             'TCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTG'
             'TTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT', 0)
     ]
     print(ev.contigs[0].seq)
     seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat')
     print(seq)
     align.select_contig_alignments(ev, seq)
     print(ev.contigs[0].alignments)
     alignment = list(ev.contigs[0].alignments)[0]
     self.assertEqual(1, alignment.read1.reference_id)
     self.assertEqual(1, alignment.read2.reference_id)
     self.assertEqual(Interval(125, 244), align.query_coverage_interval(alignment.read1))
     self.assertEqual(Interval(117, 244), align.query_coverage_interval(alignment.read2))
     self.assertEqual(1114, alignment.read1.reference_start)
     self.assertEqual(2187, alignment.read2.reference_start)
     self.assertEqual([(CIGAR.S, 125), (CIGAR.EQ, 120)], alignment.read1.cigar)
     self.assertEqual([(CIGAR.S, 117), (CIGAR.EQ, 128)], alignment.read2.cigar)
Ejemplo n.º 28
0
 def test_build_single_transcript_inversion_reverse_strand(self):
     # 1:205178631R 1:205178835R inversion
     bpp = BreakpointPair(
         Breakpoint('1', 205178631, orient='R'),
         Breakpoint('1', 205178835, orient='R'),
         opposing_strands=True,
         stranded=False,
         event_type=SVTYPE.INV,
         protocol=PROTOCOL.GENOME,
         untemplated_seq='',
     )
     ann = Annotation(bpp, transcript1=self.best, transcript2=self.best)
     ft = FusionTranscript.build(
         ann,
         self.reference_genome,
         min_orf_size=300,
         max_orf_cap=10,
         min_domain_mapping_match=0.9,
     )
     print(ft.exons)
     print(ft.break1, ft.break2)
     for ex in ft.exons:
         print(
             ex,
             len(ex),
             '==>',
             ft.exon_mapping.get(ex.position, None),
             len(ft.exon_mapping.get(ex.position, None)),
             ft.exon_number(ex),
         )
     # refseq = self.best.transcripts[0].get_seq(self.reference_genome)
     self.assertEqual(1, len(ft.transcripts))
     self.assertEqual(1860, ft.break1)
     self.assertEqual(2065, ft.break2)
     flatten_fusion_transcript(ft.transcripts[0])  # test no error
Ejemplo n.º 29
0
 def test_insertion_to_duplication(self):
     # BPP(Breakpoint(3:60204611L), Breakpoint(3:60204612R), opposing=False, seq='CATACATACATACATACATACATACATACATA')
     # insertion contig [seq2] contig_alignment_score: 0.99, contig_alignment_mq: Interval(255, 255)
     # (3:60132614[seq2]140=71788D69=32I86=, None))
     bpp = BreakpointPair(
         Breakpoint('3', 60204611, orient='L'),
         Breakpoint('3', 60204612, orient='R'),
         untemplated_seq='CATACATACATACATACATACATACATACATA',
         opposing_strands=False)
     reference_genome = {
         '3':
         MockObject(seq=MockLongString(
             'CAGGGTCTGAGCTCTTAACTCTATACTGCCTACATACATACATACATACATACATATATACATACATATATAAATT',
             offset=60204555))
     }
     print(reference_genome['3'].seq[60204588:60204588 + 8], 'CATACATA')
     setattr(bpp, 'read1', MockObject(query_sequence='', query_name=None))
     setattr(bpp, 'read2', None)
     event = align.convert_to_duplication(bpp, reference_genome)
     print(event)
     self.assertEqual(ORIENT.RIGHT, event.break1.orient)
     self.assertEqual(60204588, event.break1.start)
     self.assertEqual(ORIENT.LEFT, event.break2.orient)
     self.assertEqual(60204611, event.break2.start)
     # CATACATACATACATACATACATACATACATA
     # ........................********
     self.assertEqual('CATACATA', event.untemplated_seq)
Ejemplo n.º 30
0
 def test___init__invalid_inter_lr_opp(self):
     with self.assertRaises(InvalidRearrangement):
         BreakpointPair(
             Breakpoint(1, 1, 2, ORIENT.LEFT),
             Breakpoint(2, 1, 2, ORIENT.RIGHT),
             opposing_strands=True,
         )