def test_annotate_small_intronic_inversion(self): gene = get_example_genes()['SVEP1'] reference_annotations = {gene.chr: [gene]} reference_genome = { gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1)) } best = get_best(gene) bpp = BreakpointPair( Breakpoint('9', 113152627, 113152627, orient='L'), Breakpoint('9', 113152635, 113152635, orient='L'), opposing_strands=True, stranded=False, event_type=SVTYPE.INV, protocol=PROTOCOL.GENOME, untemplated_seq='', ) annotations = annotate_events([bpp], reference_genome=reference_genome, annotations=reference_annotations) for a in annotations: print(a, a.transcript1, a.transcript2) assert len(annotations) == 1 ann = annotations[0] assert ann.transcript1 == best assert ann.transcript2 == best refseq = best.transcripts[0].get_seq(reference_genome) assert len(ann.fusion.transcripts) == 1 assert ann.fusion.transcripts[0].get_seq() == refseq
def test_small_duplication(self): bpp = BreakpointPair( Breakpoint('6', 157100005, strand='+', orient='R'), Breakpoint('6', 157100007, strand='+', orient='L'), event_type=SVTYPE.DUP, untemplated_seq='', protocol=PROTOCOL.GENOME, ) # annotate the breakpoint with the gene annotations = annotate_events([bpp], reference_genome=self.reference_genome, annotations=self.reference_annotations) self.assertEqual(1, len(annotations)) ann = Annotation(bpp, transcript1=self.best, transcript2=self.best) ft = FusionTranscript.build( ann, self.reference_genome, min_orf_size=300, max_orf_cap=10, min_domain_mapping_match=0.9, ) ref_tx = self.best.translations[0] fusion_tx = ft.translations[0] # compare the fusion translation to the refernece translation to create the protein notation ref_aa_seq = ref_tx.get_aa_seq(self.reference_genome) call = IndelCall(ref_aa_seq, fusion_tx.get_aa_seq()) self.assertTrue(call.is_dup) notation = call_protein_indel(ref_tx, fusion_tx, self.reference_genome) print(notation) self.assertEqual('ENST00000346085:p.G319dupG', notation)
def test_annotate_events_synonymous(self): for gene_list in self.reference_annotations.values(): for gene in gene_list: for t in gene.transcripts: print(t) b1 = Breakpoint(self.gene.chr, 95344068, orient=ORIENT.LEFT, strand=STRAND.NS) b2 = Breakpoint(self.gene.chr, 95344379, orient=ORIENT.RIGHT, strand=STRAND.NS) bpp = BreakpointPair( b1, b2, stranded=False, opposing_strands=False, event_type=SVTYPE.DEL, protocol=PROTOCOL.GENOME, untemplated_seq='', ) annotations = annotate_events([bpp], reference_genome=self.reference_genome, annotations=self.reference_annotations) ann = annotations[0] for a in annotations: print(a, a.fusion, a.fusion.transcripts) print(a.transcript1, a.transcript1.transcripts) fseq = ann.fusion.transcripts[0].get_seq() refseq = ann.transcript1.transcripts[0].get_seq(self.reference_genome) self.assertEqual(refseq, fseq) self.assertEqual(1, len(annotations))
def test_fusion_with_novel_splice_site(self): bpp = BreakpointPair( Breakpoint('7', 150268089, 150268089, 'L', '+'), Breakpoint('8', 79715940, 79715940, 'L', '-'), event_type=SVTYPE.ITRANS, protocol=PROTOCOL.GENOME, untemplated_seq='', ) gimap4 = EXAMPLE_GENES['GIMAP4'] il7 = EXAMPLE_GENES['IL7'] ref_genome = { gimap4.chr: MockObject(seq=MockLongString(gimap4.seq, offset=gimap4.start - 1)), il7.chr: MockObject(seq=MockLongString(il7.seq, offset=il7.start - 1)), } annotations = annotate_events([bpp], { gimap4.chr: [gimap4], il7.chr: [il7] }, ref_genome) assert len(annotations) == 1 ann = annotations[0] print(ann, ann.transcript1, ann.transcript2) print(ann.fusion) print( ann.fusion.transcripts[0].splicing_pattern, ann.fusion.transcripts[0].splicing_pattern.splice_type, ) for ex in ann.fusion.transcripts[0].exons: print(ex, len(ex)) assert False
def test_annotate_small_intronic_inversion(self): bpp = BreakpointPair( Breakpoint('9', 113152627, 113152627, orient='L'), Breakpoint('9', 113152635, 113152635, orient='L'), opposing_strands=True, stranded=False, event_type=SVTYPE.INV, protocol=PROTOCOL.GENOME, untemplated_seq='' ) annotations = annotate_events([bpp], reference_genome=self.reference_genome, annotations=self.reference_annotations) for a in annotations: print(a, a.transcript1, a.transcript2) self.assertEqual(1, len(annotations)) ann = annotations[0] self.assertEqual(self.best, ann.transcript1) self.assertEqual(self.best, ann.transcript2) refseq = self.best.transcripts[0].get_seq(self.reference_genome) self.assertEqual(1, len(ann.fusion.transcripts)) self.assertEqual(refseq, ann.fusion.transcripts[0].get_seq())
def test_small_duplication(self): gene = get_example_genes()['ARID1B'] reference_annotations = {gene.chr: [gene]} reference_genome = { gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1)) } best = get_best(gene) bpp = BreakpointPair( Breakpoint('6', 157100005, strand='+', orient='R'), Breakpoint('6', 157100007, strand='+', orient='L'), event_type=SVTYPE.DUP, untemplated_seq='', protocol=PROTOCOL.GENOME, ) # annotate the breakpoint with the gene annotations = annotate_events([bpp], reference_genome=reference_genome, annotations=reference_annotations) assert len(annotations) == 1 ann = Annotation(bpp, transcript1=best, transcript2=best) ft = FusionTranscript.build( ann, reference_genome, min_orf_size=300, max_orf_cap=10, min_domain_mapping_match=0.9, ) ref_tx = best.translations[0] fusion_tx = ft.translations[0] # compare the fusion translation to the refernece translation to create the protein notation ref_aa_seq = ref_tx.get_aa_seq(reference_genome) call = IndelCall(ref_aa_seq, fusion_tx.get_aa_seq()) assert call.is_dup notation = call_protein_indel(ref_tx, fusion_tx, reference_genome) print(notation) assert notation == 'ENST00000346085:p.G319dupG'
def test_annotate_events_synonymous(self): gene = get_example_genes()['NDUFA12'] reference_annotations = {gene.chr: [gene]} reference_genome = { gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1)) } for gene_list in reference_annotations.values(): for gene in gene_list: for t in gene.transcripts: print(t) b1 = Breakpoint(gene.chr, 95344068, orient=ORIENT.LEFT, strand=STRAND.NS) b2 = Breakpoint(gene.chr, 95344379, orient=ORIENT.RIGHT, strand=STRAND.NS) bpp = BreakpointPair( b1, b2, stranded=False, opposing_strands=False, event_type=SVTYPE.DEL, protocol=PROTOCOL.GENOME, untemplated_seq='', ) annotations = annotate_events([bpp], reference_genome=reference_genome, annotations=reference_annotations) ann = annotations[0] for a in annotations: print(a, a.fusion, a.fusion.transcripts) print(a.transcript1, a.transcript1.transcripts) fseq = ann.fusion.transcripts[0].get_seq() refseq = ann.transcript1.transcripts[0].get_seq(reference_genome) assert fseq == refseq assert len(annotations) == 1
def test_fusion_with_novel_splice_site(self): raise unittest.SkipTest('TODO: dependent functionality not yet implemented') bpp = BreakpointPair( Breakpoint('7', 150268089, 150268089, 'L', '+'), Breakpoint('8', 79715940, 79715940, 'L', '-'), event_type=SVTYPE.ITRANS, protocol=PROTOCOL.GENOME, untemplated_seq='' ) gimap4 = EXAMPLE_GENES['GIMAP4'] il7 = EXAMPLE_GENES['IL7'] ref_genome = { gimap4.chr: MockObject(seq=MockLongString(gimap4.seq, offset=gimap4.start - 1)), il7.chr: MockObject(seq=MockLongString(il7.seq, offset=il7.start - 1)) } annotations = annotate_events([bpp], {gimap4.chr: [gimap4], il7.chr: [il7]}, ref_genome) self.assertEqual(1, len(annotations)) ann = annotations[0] print(ann, ann.transcript1, ann.transcript2) print(ann.fusion) print(ann.fusion.transcripts[0].splicing_pattern, ann.fusion.transcripts[0].splicing_pattern.splice_type) for ex in ann.fusion.transcripts[0].exons: print(ex, len(ex)) self.assertTrue(False)