def test_retained_intron(self): gene = get_example_genes()['PRKCB'] reference_genome = { gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1)) } best = get_best(gene) bpp = BreakpointPair( Breakpoint('16', 23957049, orient='L'), Breakpoint('16', 23957050, orient='R'), opposing_strands=False, stranded=False, event_type=SVTYPE.INS, protocol=PROTOCOL.TRANS, untemplated_seq='A', ) ann = Annotation(bpp, transcript1=best, transcript2=best) ft = FusionTranscript.build( ann, reference_genome, min_orf_size=300, max_orf_cap=10, min_domain_mapping_match=0.9, ) assert len(ft.transcripts) == 1 print(ft.transcripts[0].splicing_pattern) print(best.transcripts[0].splicing_pattern) assert ft.transcripts[ 0].splicing_pattern.splice_type == SPLICE_TYPE.RETAIN
def test_build_single_transcript_inversion(self): gene = get_example_genes()['SVEP1'] reference_genome = { gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1)) } best = get_best(gene) bpp = BreakpointPair( Breakpoint('9', 113152627, 113152627, orient='L'), Breakpoint('9', 113152635, 113152635, orient='L'), opposing_strands=True, stranded=False, event_type=SVTYPE.INV, protocol=PROTOCOL.GENOME, untemplated_seq='', ) ann = Annotation(bpp, transcript1=best, transcript2=best) ft = FusionTranscript.build( ann, reference_genome, min_orf_size=300, max_orf_cap=10, min_domain_mapping_match=0.9, ) refseq = best.transcripts[0].get_seq(reference_genome) assert len(ft.transcripts) == 1 assert ft.transcripts[0].get_seq() == refseq
def test_small_duplication(self): bpp = BreakpointPair( Breakpoint('6', 157100005, strand='+', orient='R'), Breakpoint('6', 157100007, strand='+', orient='L'), event_type=SVTYPE.DUP, untemplated_seq='', protocol=PROTOCOL.GENOME, ) # annotate the breakpoint with the gene annotations = annotate_events([bpp], reference_genome=self.reference_genome, annotations=self.reference_annotations) self.assertEqual(1, len(annotations)) ann = Annotation(bpp, transcript1=self.best, transcript2=self.best) ft = FusionTranscript.build( ann, self.reference_genome, min_orf_size=300, max_orf_cap=10, min_domain_mapping_match=0.9, ) ref_tx = self.best.translations[0] fusion_tx = ft.translations[0] # compare the fusion translation to the refernece translation to create the protein notation ref_aa_seq = ref_tx.get_aa_seq(self.reference_genome) call = IndelCall(ref_aa_seq, fusion_tx.get_aa_seq()) self.assertTrue(call.is_dup) notation = call_protein_indel(ref_tx, fusion_tx, self.reference_genome) print(notation) self.assertEqual('ENST00000346085:p.G319dupG', notation)
def test_build_single_transcript_inversion_reverse_strand(self): # 1:205178631R 1:205178835R inversion bpp = BreakpointPair( Breakpoint('1', 205178631, orient='R'), Breakpoint('1', 205178835, orient='R'), opposing_strands=True, stranded=False, event_type=SVTYPE.INV, protocol=PROTOCOL.GENOME, untemplated_seq='', ) ann = Annotation(bpp, transcript1=self.best, transcript2=self.best) ft = FusionTranscript.build( ann, self.reference_genome, min_orf_size=300, max_orf_cap=10, min_domain_mapping_match=0.9, ) print(ft.exons) print(ft.break1, ft.break2) for ex in ft.exons: print( ex, len(ex), '==>', ft.exon_mapping.get(ex.position, None), len(ft.exon_mapping.get(ex.position, None)), ft.exon_number(ex), ) # refseq = self.best.transcripts[0].get_seq(self.reference_genome) self.assertEqual(1, len(ft.transcripts)) self.assertEqual(1860, ft.break1) self.assertEqual(2065, ft.break2) flatten_fusion_transcript(ft.transcripts[0]) # test no error
def test_build_single_transcript_inversion(self): bpp = BreakpointPair( Breakpoint('9', 113152627, 113152627, orient='L'), Breakpoint('9', 113152635, 113152635, orient='L'), opposing_strands=True, stranded=False, event_type=SVTYPE.INV, protocol=PROTOCOL.GENOME, untemplated_seq='' ) ann = Annotation(bpp, transcript1=self.best, transcript2=self.best) ft = FusionTranscript.build( ann, self.reference_genome, min_orf_size=300, max_orf_cap=10, min_domain_mapping_match=0.9 ) refseq = self.best.transcripts[0].get_seq(self.reference_genome) self.assertEqual(1, len(ft.transcripts)) self.assertEqual(refseq, ft.transcripts[0].get_seq())
def test_build_single_transcript_inversion_reverse_strand(self): print(get_example_genes().keys()) gene = get_example_genes()['DSTYK'] reference_genome = { gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1)) } best = get_best(gene) # 1:205178631R 1:205178835R inversion bpp = BreakpointPair( Breakpoint('1', 205178631, orient='R'), Breakpoint('1', 205178835, orient='R'), opposing_strands=True, stranded=False, event_type=SVTYPE.INV, protocol=PROTOCOL.GENOME, untemplated_seq='', ) ann = Annotation(bpp, transcript1=best, transcript2=best) ft = FusionTranscript.build( ann, reference_genome, min_orf_size=300, max_orf_cap=10, min_domain_mapping_match=0.9, ) print(ft.exons) print(ft.break1, ft.break2) for ex in ft.exons: print( ex, len(ex), '==>', ft.exon_mapping.get(ex.position, None), len(ft.exon_mapping.get(ex.position, None)), ft.exon_number(ex), ) # refseq = best.transcripts[0].get_seq(reference_genome) assert len(ft.transcripts) == 1 assert ft.break1 == 1860 assert ft.break2 == 2065 flatten_fusion_transcript(ft.transcripts[0]) # test no error
def test_retained_intron(self): bpp = BreakpointPair( Breakpoint('16', 23957049, orient='L'), Breakpoint('16', 23957050, orient='R'), opposing_strands=False, stranded=False, event_type=SVTYPE.INS, protocol=PROTOCOL.TRANS, untemplated_seq='A' ) ann = Annotation(bpp, transcript1=self.best, transcript2=self.best) ft = FusionTranscript.build( ann, self.reference_genome, min_orf_size=300, max_orf_cap=10, min_domain_mapping_match=0.9 ) self.assertEqual(1, len(ft.transcripts)) print(ft.transcripts[0].splicing_pattern) print(self.best.transcripts[0].splicing_pattern) self.assertEqual(SPLICE_TYPE.RETAIN, ft.transcripts[0].splicing_pattern.splice_type)
def test_small_duplication(self): gene = get_example_genes()['ARID1B'] reference_annotations = {gene.chr: [gene]} reference_genome = { gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1)) } best = get_best(gene) bpp = BreakpointPair( Breakpoint('6', 157100005, strand='+', orient='R'), Breakpoint('6', 157100007, strand='+', orient='L'), event_type=SVTYPE.DUP, untemplated_seq='', protocol=PROTOCOL.GENOME, ) # annotate the breakpoint with the gene annotations = annotate_events([bpp], reference_genome=reference_genome, annotations=reference_annotations) assert len(annotations) == 1 ann = Annotation(bpp, transcript1=best, transcript2=best) ft = FusionTranscript.build( ann, reference_genome, min_orf_size=300, max_orf_cap=10, min_domain_mapping_match=0.9, ) ref_tx = best.translations[0] fusion_tx = ft.translations[0] # compare the fusion translation to the refernece translation to create the protein notation ref_aa_seq = ref_tx.get_aa_seq(reference_genome) call = IndelCall(ref_aa_seq, fusion_tx.get_aa_seq()) assert call.is_dup notation = call_protein_indel(ref_tx, fusion_tx, reference_genome) print(notation) assert notation == 'ENST00000346085:p.G319dupG'