Exemple #1
0
 def exons_protein_coding_merged(self):
     """Get the merged exon regions for a gene , only for protein coding exons.
     Empty result on non protein coding genes
     result is a a tuple of np arrays, (starts, stops)
     """
     return (
         IntervalSet.from_tuples(self._exons_protein_coding).merge_hull().to_numpy()
     )
Exemple #2
0
 def test_invert(self):
     i = IntervalSet.from_tuples([
         (5,10),
     ])
     i2 = i.invert(0, 15)
     assert i2.to_tuples() == [
         (0,5),
         (10,15)]
Exemple #3
0
 def introns(self):
     """Return [(start, stop),...] for all introns in the transcript
     Order is in genomic order.
     Intron is defined as everything inside tss..tes that is not an exon,
     so if a gene, by any reason would extend beyond it's exons,
     that region would also be covered.
     """
     gene_start = self.gene.start
     gene_stop = self.gene.stop
     exons = sorted(self.exons_tuples)
     return IntervalSet.from_tuples(exons).invert(gene_start, gene_stop).to_tuples()
Exemple #4
0
    def test_from_tuples(self):
        i = IntervalSet.from_tuples([
            (1,10),
            (1,15),
            (0,5),

        ])
        assert i.to_tuples() == [
            (0,5),
            (1,15),
            (1,10),
        ]
Exemple #5
0
 def introns_strict(self):
     """Get truly intronic regions - ie. not covered by any exon for this gene
     result is a a tuple of np arrays, (starts, stops)
     By it's definition, the introns are disjunct
     """
     gene_start = self.start
     gene_stop = self.stop
     exons = []
     for tr in self.transcripts:
         try:
             exons.extend(tr.exons)
         except TypeError:  # pragma: no cover
             raise ValueError(f"No exons defined for {tr.transcript_stable_id}")
     return IntervalSet.from_tuples(exons).invert(gene_start, gene_stop).to_numpy()
Exemple #6
0
 def introns_all(self):
     """Get intronic regions - ie. an intron in any of the transcripts.
     May contain repetitions and overlaps and is not sorted!
     """
     gene_start = self.start
     gene_stop = self.stop
     introns = [], []
     for tr in self.transcripts:
         try:
             starts, stops = (
                 IntervalSet.from_tuples(tr.exons)
                 .invert(gene_start, gene_stop)
                 .to_numpy()
             )
         except TypeError:  # pragma: no cover
             raise ValueError(f"No exons defined for {tr.transcript_stable_id}")
         introns[0].extend(starts)
         introns[1].extend(stops)
     return introns
Exemple #7
0
 def exons_merged(self):
     """Get the merged exon regions for a gene given by gene_stable_id
     result is a a tuple of np arrays, (starts, stops)
     """
     return IntervalSet.from_tuples(self._exons).merge_hull().to_numpy()