def test_inverted_translocation(self): b = BreakpointPair( Breakpoint(1, 1, 2, ORIENT.LEFT), Breakpoint(2, 1, 2, ORIENT.LEFT), opposing_strands=True, ) BreakpointPair.classify(b)
def test_blat_contigs_deletion(self): ev = GenomeEvidence( Breakpoint('fake', 1714, orient=ORIENT.LEFT), Breakpoint('fake', 2968, orient=ORIENT.RIGHT), opposing_strands=False, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100 ) ev.contigs = [ Contig( 'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT' 'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT', 0) ] seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat') for query, reads in seq.items(): print('>>>', query) for read in reads: print(repr(read)) align.select_contig_alignments(ev, seq) alignments = list(ev.contigs[0].alignments) print('alignments:') for aln in alignments: print(aln, repr(aln.read1), repr(aln.read2)) self.assertEqual(1, len(alignments)) alignment = alignments[0] self.assertTrue(alignment.read2 is None) self.assertEqual(0, alignment.read1.reference_id) self.assertTrue(not alignment.read1.is_reverse) self.assertEqual(Interval(0, 175), align.query_coverage_interval(alignment.read1)) self.assertEqual(1612, alignment.read1.reference_start) self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)], alignment.read1.cigar)
def test_blat_contigs_deletion_revcomp(self): ev = GenomeEvidence(Breakpoint('fake', 1714, orient=ORIENT.LEFT), Breakpoint('fake', 2968, orient=ORIENT.RIGHT), opposing_strands=False, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100) seq = 'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT' \ 'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT' ev.contigs = [Contig(reverse_complement(seq), 0)] align.select_contig_alignments( ev, align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=REFERENCE_GENOME_FILE_2BIT, aligner='blat')) print('alignments:', ev.contigs[0].alignments) alignment = list(ev.contigs[0].alignments)[0] print(alignment) self.assertTrue(alignment.read2 is None) self.assertEqual(0, alignment.read1.reference_id) self.assertTrue(alignment.read1.is_reverse) self.assertEqual(seq, alignment.read1.query_sequence) self.assertEqual(Interval(0, 175), align.query_coverage_interval(alignment.read1)) self.assertEqual(1612, alignment.read1.reference_start) self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)], alignment.read1.cigar)
def test_load_evidence_inversion(self): # first example ev1 = self.genome_evidence(Breakpoint('reference3', 1114, orient=ORIENT.RIGHT), Breakpoint('reference3', 2187, orient=ORIENT.RIGHT), opposing_strands=True) ev1.load_evidence() print(len(ev1.split_reads[0]), len(ev1.flanking_pairs)) self.assertEqual(54, self.count_original_reads(ev1.split_reads[0])) self.assertEqual(20, self.count_original_reads(ev1.split_reads[1])) self.assertEqual(104, len(ev1.flanking_pairs)) # second example ev1 = self.genome_evidence(Breakpoint('reference7', 15000, orient=ORIENT.RIGHT), Breakpoint('reference7', 19000, orient=ORIENT.RIGHT), opposing_strands=True) ev1.load_evidence() print(len(ev1.split_reads[0]), len(ev1.flanking_pairs)) self.assertEqual(15, self.count_original_reads(ev1.split_reads[1])) self.assertEqual(27, self.count_original_reads(ev1.split_reads[0])) self.assertEqual(52, len(ev1.flanking_pairs))
def test_net_zero(self): transcript = PreTranscript([(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.POS) for patt in transcript.generate_splicing_patterns(): transcript.transcripts.append(Transcript(transcript, patt)) trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={transcript}, ) setattr( trans_evidence, '_select_transcripts', lambda *pos: trans_evidence.overlapping_transcripts, ) setattr( trans_evidence, 'distance', partial(TranscriptomeEvidence.distance, trans_evidence), ) bpp = BreakpointPair( Breakpoint('1', 1099, orient=ORIENT.LEFT), Breakpoint('1', 1302, orient=ORIENT.RIGHT), untemplated_seq='TT', ) dist = partial(TranscriptomeEvidence.distance, trans_evidence) assert bpp.net_size() == Interval(-200) assert bpp.net_size(dist) == Interval(0)
def test_annotate_small_intronic_inversion(self): gene = get_example_genes()['SVEP1'] reference_annotations = {gene.chr: [gene]} reference_genome = { gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1)) } best = get_best(gene) bpp = BreakpointPair( Breakpoint('9', 113152627, 113152627, orient='L'), Breakpoint('9', 113152635, 113152635, orient='L'), opposing_strands=True, stranded=False, event_type=SVTYPE.INV, protocol=PROTOCOL.GENOME, untemplated_seq='', ) annotations = annotate_events([bpp], reference_genome=reference_genome, annotations=reference_annotations) for a in annotations: print(a, a.transcript1, a.transcript2) assert len(annotations) == 1 ann = annotations[0] assert ann.transcript1 == best assert ann.transcript2 == best refseq = best.transcripts[0].get_seq(reference_genome) assert len(ann.fusion.transcripts) == 1 assert ann.fusion.transcripts[0].get_seq() == refseq
def test_retained_intron(self): gene = get_example_genes()['PRKCB'] reference_genome = { gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1)) } best = get_best(gene) bpp = BreakpointPair( Breakpoint('16', 23957049, orient='L'), Breakpoint('16', 23957050, orient='R'), opposing_strands=False, stranded=False, event_type=SVTYPE.INS, protocol=PROTOCOL.TRANS, untemplated_seq='A', ) ann = Annotation(bpp, transcript1=best, transcript2=best) ft = FusionTranscript.build( ann, reference_genome, min_orf_size=300, max_orf_cap=10, min_domain_mapping_match=0.9, ) assert len(ft.transcripts) == 1 print(ft.transcripts[0].splicing_pattern) print(best.transcripts[0].splicing_pattern) assert ft.transcripts[ 0].splicing_pattern.splice_type == SPLICE_TYPE.RETAIN
def test_inversion_insertion(self): bpp = BreakpointPair( Breakpoint('1', 10, orient=ORIENT.LEFT), Breakpoint('1', 15, orient=ORIENT.LEFT), untemplated_seq='TT', ) self.assertEqual(Interval(2), bpp.net_size())
def test___init__opstrand_conflict(self): with self.assertRaises(AssertionError): BreakpointPair( Breakpoint('1', 1, strand=STRAND.POS), Breakpoint('1', 2, strand=STRAND.POS), opposing_strands=True, )
def test_duplication_with_insertion(self): bpp = BreakpointPair( Breakpoint('1', 10, orient=ORIENT.RIGHT), Breakpoint('1', 15, orient=ORIENT.LEFT), untemplated_seq='TTT', ) self.assertEqual(Interval(9), bpp.net_size())
def test_deletion(self): bpp = BreakpointPair( Breakpoint('1', 10, orient=ORIENT.LEFT), Breakpoint('1', 15, orient=ORIENT.RIGHT), untemplated_seq='', ) self.assertEqual(Interval(-4), bpp.net_size())
def test_large_indel(self): bpp = BreakpointPair( Breakpoint('1', 10, orient=ORIENT.LEFT), Breakpoint('1', 101, orient=ORIENT.RIGHT), untemplated_seq='TTT', ) self.assertEqual(Interval(-87), bpp.net_size())
def test_insertion(self): b = BreakpointPair( Breakpoint(1, 1, 1, strand=STRAND.NS, orient=ORIENT.LEFT), Breakpoint(1, 2, 2, strand=STRAND.NS, orient=ORIENT.RIGHT), opposing_strands=False, ) self.assertEqual(sorted([SVTYPE.INS]), sorted(BreakpointPair.classify(b)))
def test_translocation(self): b = BreakpointPair( Breakpoint(1, 1, 2, ORIENT.RIGHT), Breakpoint(2, 1, 2, ORIENT.LEFT), opposing_strands=False, ) BreakpointPair.classify(b)
def test_annotate_events_synonymous(self): for gene_list in self.reference_annotations.values(): for gene in gene_list: for t in gene.transcripts: print(t) b1 = Breakpoint(self.gene.chr, 95344068, orient=ORIENT.LEFT, strand=STRAND.NS) b2 = Breakpoint(self.gene.chr, 95344379, orient=ORIENT.RIGHT, strand=STRAND.NS) bpp = BreakpointPair( b1, b2, stranded=False, opposing_strands=False, event_type=SVTYPE.DEL, protocol=PROTOCOL.GENOME, untemplated_seq='', ) annotations = annotate_events([bpp], reference_genome=self.reference_genome, annotations=self.reference_annotations) ann = annotations[0] for a in annotations: print(a, a.fusion, a.fusion.transcripts) print(a.transcript1, a.transcript1.transcripts) fseq = ann.fusion.transcripts[0].get_seq() refseq = ann.transcript1.transcripts[0].get_seq(self.reference_genome) self.assertEqual(refseq, fseq) self.assertEqual(1, len(annotations))
def test_load_evidence_inversion(self): # first example ev1 = self.genome_evidence( Breakpoint('reference3', 1114, orient=ORIENT.RIGHT), Breakpoint('reference3', 2187, orient=ORIENT.RIGHT), opposing_strands=True, ) ev1.load_evidence() print(len(ev1.split_reads[0]), len(ev1.flanking_pairs)) assert self.count_original_reads(ev1.split_reads[0]) == 54 assert self.count_original_reads(ev1.split_reads[1]) == 20 assert len(ev1.flanking_pairs) == 104 # second example ev1 = self.genome_evidence( Breakpoint('reference7', 15000, orient=ORIENT.RIGHT), Breakpoint('reference7', 19000, orient=ORIENT.RIGHT), opposing_strands=True, ) ev1.load_evidence() print(len(ev1.split_reads[0]), len(ev1.flanking_pairs)) assert self.count_original_reads(ev1.split_reads[1]) == 15 assert self.count_original_reads(ev1.split_reads[0]) == 27 assert len(ev1.flanking_pairs) == 52
def test_small_duplication(self): bpp = BreakpointPair( Breakpoint('6', 157100005, strand='+', orient='R'), Breakpoint('6', 157100007, strand='+', orient='L'), event_type=SVTYPE.DUP, untemplated_seq='', protocol=PROTOCOL.GENOME, ) # annotate the breakpoint with the gene annotations = annotate_events([bpp], reference_genome=self.reference_genome, annotations=self.reference_annotations) self.assertEqual(1, len(annotations)) ann = Annotation(bpp, transcript1=self.best, transcript2=self.best) ft = FusionTranscript.build( ann, self.reference_genome, min_orf_size=300, max_orf_cap=10, min_domain_mapping_match=0.9, ) ref_tx = self.best.translations[0] fusion_tx = ft.translations[0] # compare the fusion translation to the refernece translation to create the protein notation ref_aa_seq = ref_tx.get_aa_seq(self.reference_genome) call = IndelCall(ref_aa_seq, fusion_tx.get_aa_seq()) self.assertTrue(call.is_dup) notation = call_protein_indel(ref_tx, fusion_tx, self.reference_genome) print(notation) self.assertEqual('ENST00000346085:p.G319dupG', notation)
def test_fusion_with_novel_splice_site(self): bpp = BreakpointPair( Breakpoint('7', 150268089, 150268089, 'L', '+'), Breakpoint('8', 79715940, 79715940, 'L', '-'), event_type=SVTYPE.ITRANS, protocol=PROTOCOL.GENOME, untemplated_seq='', ) gimap4 = EXAMPLE_GENES['GIMAP4'] il7 = EXAMPLE_GENES['IL7'] ref_genome = { gimap4.chr: MockObject(seq=MockLongString(gimap4.seq, offset=gimap4.start - 1)), il7.chr: MockObject(seq=MockLongString(il7.seq, offset=il7.start - 1)), } annotations = annotate_events([bpp], { gimap4.chr: [gimap4], il7.chr: [il7] }, ref_genome) assert len(annotations) == 1 ann = annotations[0] print(ann, ann.transcript1, ann.transcript2) print(ann.fusion) print( ann.fusion.transcripts[0].splicing_pattern, ann.fusion.transcripts[0].splicing_pattern.splice_type, ) for ex in ann.fusion.transcripts[0].exons: print(ex, len(ex)) assert False
def test_build_single_transcript_inversion(self): gene = get_example_genes()['SVEP1'] reference_genome = { gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1)) } best = get_best(gene) bpp = BreakpointPair( Breakpoint('9', 113152627, 113152627, orient='L'), Breakpoint('9', 113152635, 113152635, orient='L'), opposing_strands=True, stranded=False, event_type=SVTYPE.INV, protocol=PROTOCOL.GENOME, untemplated_seq='', ) ann = Annotation(bpp, transcript1=best, transcript2=best) ft = FusionTranscript.build( ann, reference_genome, min_orf_size=300, max_orf_cap=10, min_domain_mapping_match=0.9, ) refseq = best.transcripts[0].get_seq(reference_genome) assert len(ft.transcripts) == 1 assert ft.transcripts[0].get_seq() == refseq
def test_close_del(self): # ....TT|TT.... b1 = Breakpoint(REF_CHR, 1001, strand=STRAND.POS, orient=ORIENT.LEFT) b2 = Breakpoint(REF_CHR, 1002, strand=STRAND.POS, orient=ORIENT.RIGHT) bpp = BreakpointPair(b1, b2) self.assertEqual(('', ''), bpp.breakpoint_sequence_homology(REFERENCE_GENOME))
def test_load_evidence_translocation(self): ev1 = self.genome_evidence(Breakpoint('reference10', 520, orient=ORIENT.RIGHT), Breakpoint('reference19', 964, orient=ORIENT.LEFT), opposing_strands=False) ev1.load_evidence() print(len(ev1.split_reads[0]), len(ev1.flanking_pairs)) self.assertEqual(14, self.count_original_reads(ev1.split_reads[0])) self.assertEqual(20, self.count_original_reads(ev1.split_reads[1])) self.assertEqual(21, len(ev1.flanking_pairs)) # second example ev1 = self.genome_evidence(Breakpoint('reference2', 2000, orient=ORIENT.LEFT), Breakpoint('reference4', 2000, orient=ORIENT.RIGHT), opposing_strands=False) ev1.load_evidence() print(len(ev1.split_reads[0]), len(ev1.flanking_pairs)) self.assertEqual(21, self.count_original_reads(ev1.split_reads[0])) # one of the reads that appears to look good in the bam is too low quality % match self.assertEqual(40, self.count_original_reads(ev1.split_reads[1])) self.assertEqual(57, len(ev1.flanking_pairs))
def setUp(self): self.gev1 = BreakpointPair( Breakpoint('1', 1), Breakpoint('1', 10), opposing_strands=True, data={ COLUMNS.event_type: SVTYPE.DEL, COLUMNS.call_method: CALL_METHOD.CONTIG, COLUMNS.fusion_sequence_fasta_id: None, COLUMNS.protocol: PROTOCOL.GENOME, COLUMNS.fusion_cdna_coding_end: None, COLUMNS.fusion_cdna_coding_start: None, }, ) self.gev2 = BreakpointPair( Breakpoint('1', 1), Breakpoint('1', 100), opposing_strands=True, data={ COLUMNS.event_type: SVTYPE.DEL, COLUMNS.call_method: CALL_METHOD.CONTIG, COLUMNS.fusion_sequence_fasta_id: None, COLUMNS.protocol: PROTOCOL.GENOME, COLUMNS.fusion_cdna_coding_start: None, COLUMNS.fusion_cdna_coding_end: None, }, ) self.best_transcripts = {'ABCA': True, 'ABCD': True}
def test_order_is_retained(self): # BPP(Breakpoint(1:1925143-1925155R), Breakpoint(1:1925144L), opposing=False) # >> BPP(Breakpoint(1:1925144L), Breakpoint(1:1925144-1925158R), opposing=False) # >> BPP(Breakpoint(1:1925143L), Breakpoint(1:1925143-1925158R), opposing=False) pairs = [ BreakpointPair( Breakpoint('2', 1925144, 1925144, 'L'), Breakpoint('2', 1925144, 1925158, 'R'), event_type='deletion', opposing_strands=False, ), BreakpointPair( Breakpoint('2', 1925143, 1925143, 'L'), Breakpoint('2', 1925143, 1925158, 'R'), event_type='deletion', opposing_strands=False, ), ] mapping = merge_breakpoint_pairs(pairs, 100, 25) for merge, inputs in mapping.items(): print(merge) print(inputs) self.assertEqual(1, len(mapping)) merge = list(mapping)[0] self.assertEqual('L', merge.break1.orient) self.assertEqual('R', merge.break2.orient)
def test_mixed_protocol_fusions_different_sequence(self): genome_ev = BreakpointPair(Breakpoint('1', 1), Breakpoint('1', 10), opposing_strands=True, data={ COLUMNS.event_type: SVTYPE.DEL, COLUMNS.call_method: CALL_METHOD.CONTIG, COLUMNS.fusion_sequence_fasta_id: 'a', COLUMNS.protocol: PROTOCOL.GENOME, COLUMNS.transcript1: None, COLUMNS.transcript2: None, COLUMNS.fusion_cdna_coding_start: 1, COLUMNS.fusion_cdna_coding_end: 10 }) trans_ev = BreakpointPair(Breakpoint('1', 50), Breakpoint('1', 60), opposing_strands=True, data={ COLUMNS.event_type: SVTYPE.DEL, COLUMNS.call_method: CALL_METHOD.CONTIG, COLUMNS.fusion_sequence_fasta_id: 'b', COLUMNS.protocol: PROTOCOL.TRANS, COLUMNS.transcript1: None, COLUMNS.transcript2: None, COLUMNS.fusion_cdna_coding_start: 1, COLUMNS.fusion_cdna_coding_end: 10 }) self.assertFalse( pairing.inferred_equivalent(genome_ev, trans_ev, self.TRANSCRIPTS))
def test_shift_overaligned(self): # qwertyuiopas---kkkkk------dfghjklzxcvbnm # .......... ................ gene = Gene('1', 1, 1000, strand='+') transcript = PreTranscript(exons=[(1, 12), (20, 28)], gene=gene, strand='+') for spl_patt in transcript.generate_splicing_patterns(): transcript.transcripts.append(Transcript(transcript, spl_patt)) gene.transcripts.append(transcript) read = SamRead( reference_name='1', reference_start=0, cigar=_cigar.convert_string_to_cigar('14=7D12='), query_sequence='qwertyuiopasdfghjklzxcvbnm', ) evidence = TranscriptomeEvidence( annotations={}, reference_genome={'1': MockObject(seq='qwertyuiopasdfkkkkkdfghjklzxcvbnm')}, bam_cache=MockObject(get_read_reference_name=lambda r: r.reference_name), break1=Breakpoint('1', 1, orient='L', strand='+'), break2=Breakpoint('1', 10, orient='R', strand='+'), read_length=75, stdev_fragment_size=75, median_fragment_size=220, ) evidence.overlapping_transcripts.add(transcript) new_read = evidence.standardize_read(read) assert new_read.cigar == _cigar.convert_string_to_cigar('12=7N14=')
def test_duplication(self): b = BreakpointPair( Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.RIGHT), Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.LEFT)) self.assertEqual({SVTYPE.DUP}, BreakpointPair.classify(b)) b = BreakpointPair( Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.RIGHT), Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.LEFT)) self.assertEqual({SVTYPE.DUP}, BreakpointPair.classify(b)) b = BreakpointPair( Breakpoint(1, 1, 2, strand=STRAND.NEG, orient=ORIENT.RIGHT), Breakpoint(1, 10, 11, strand=STRAND.NEG, orient=ORIENT.LEFT)) self.assertEqual({SVTYPE.DUP}, BreakpointPair.classify(b)) b = BreakpointPair( Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.RIGHT), Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.NS)) self.assertEqual({SVTYPE.DUP}, BreakpointPair.classify(b)) b = BreakpointPair( Breakpoint(1, 1, 2, strand=STRAND.NEG, orient=ORIENT.RIGHT), Breakpoint(1, 10, 11, strand=STRAND.NEG, orient=ORIENT.NS)) self.assertEqual({SVTYPE.DUP}, BreakpointPair.classify(b))
def test_blat_contigs(self): ev = GenomeEvidence( Breakpoint('reference3', 1114, orient=ORIENT.RIGHT), Breakpoint('reference3', 2187, orient=ORIENT.RIGHT), opposing_strands=True, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100, stdev_count_abnormal=2, min_splits_reads_resolution=1, min_flanking_pairs_resolution=1 ) ev.contigs = [ Contig( 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAG' 'TCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTG' 'TTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT', 0) ] print(ev.contigs[0].seq) seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat') print(seq) align.select_contig_alignments(ev, seq) print(ev.contigs[0].alignments) alignment = list(ev.contigs[0].alignments)[0] self.assertEqual(1, alignment.read1.reference_id) self.assertEqual(1, alignment.read2.reference_id) self.assertEqual(Interval(125, 244), align.query_coverage_interval(alignment.read1)) self.assertEqual(Interval(117, 244), align.query_coverage_interval(alignment.read2)) self.assertEqual(1114, alignment.read1.reference_start) self.assertEqual(2187, alignment.read2.reference_start) self.assertEqual([(CIGAR.S, 125), (CIGAR.EQ, 120)], alignment.read1.cigar) self.assertEqual([(CIGAR.S, 117), (CIGAR.EQ, 128)], alignment.read2.cigar)
def test_build_single_transcript_inversion_reverse_strand(self): # 1:205178631R 1:205178835R inversion bpp = BreakpointPair( Breakpoint('1', 205178631, orient='R'), Breakpoint('1', 205178835, orient='R'), opposing_strands=True, stranded=False, event_type=SVTYPE.INV, protocol=PROTOCOL.GENOME, untemplated_seq='', ) ann = Annotation(bpp, transcript1=self.best, transcript2=self.best) ft = FusionTranscript.build( ann, self.reference_genome, min_orf_size=300, max_orf_cap=10, min_domain_mapping_match=0.9, ) print(ft.exons) print(ft.break1, ft.break2) for ex in ft.exons: print( ex, len(ex), '==>', ft.exon_mapping.get(ex.position, None), len(ft.exon_mapping.get(ex.position, None)), ft.exon_number(ex), ) # refseq = self.best.transcripts[0].get_seq(self.reference_genome) self.assertEqual(1, len(ft.transcripts)) self.assertEqual(1860, ft.break1) self.assertEqual(2065, ft.break2) flatten_fusion_transcript(ft.transcripts[0]) # test no error
def test_insertion_to_duplication(self): # BPP(Breakpoint(3:60204611L), Breakpoint(3:60204612R), opposing=False, seq='CATACATACATACATACATACATACATACATA') # insertion contig [seq2] contig_alignment_score: 0.99, contig_alignment_mq: Interval(255, 255) # (3:60132614[seq2]140=71788D69=32I86=, None)) bpp = BreakpointPair( Breakpoint('3', 60204611, orient='L'), Breakpoint('3', 60204612, orient='R'), untemplated_seq='CATACATACATACATACATACATACATACATA', opposing_strands=False) reference_genome = { '3': MockObject(seq=MockLongString( 'CAGGGTCTGAGCTCTTAACTCTATACTGCCTACATACATACATACATACATACATATATACATACATATATAAATT', offset=60204555)) } print(reference_genome['3'].seq[60204588:60204588 + 8], 'CATACATA') setattr(bpp, 'read1', MockObject(query_sequence='', query_name=None)) setattr(bpp, 'read2', None) event = align.convert_to_duplication(bpp, reference_genome) print(event) self.assertEqual(ORIENT.RIGHT, event.break1.orient) self.assertEqual(60204588, event.break1.start) self.assertEqual(ORIENT.LEFT, event.break2.orient) self.assertEqual(60204611, event.break2.start) # CATACATACATACATACATACATACATACATA # ........................******** self.assertEqual('CATACATA', event.untemplated_seq)
def test___init__invalid_inter_lr_opp(self): with self.assertRaises(InvalidRearrangement): BreakpointPair( Breakpoint(1, 1, 2, ORIENT.LEFT), Breakpoint(2, 1, 2, ORIENT.RIGHT), opposing_strands=True, )