Beispiel #1
0
    def tes(self, slack=0):

        pr = self.pr

        if not pr.stranded:
            raise Exception(
                "Cannot compute TSSes or TESes without strand info. Perhaps use slack() instead?"
            )

        pr = pr[pr.Feature == "transcript"]
        pr = pr.apply(lambda df: _tes(df, slack))

        pr.Feature = "tes"

        return pr
Beispiel #2
0
    def tes(self, slack=0):
        """Return the transcription end sites.

        Returns the 3' for every interval with feature "transcript".

        See Also
        --------
        pyranges.genomicfeatures.GenomicFeaturesMethods.tss : return the transcription start sites

        Examples
        --------

        >>> gr = pr.data.ensembl_gtf()
        >>> gr
        +--------------+------------+--------------+-----------+-----------+------------+--------------+------------+------------------------------------+-------+
        | Chromosome   | Source     | Feature      | Start     | End       | Score      | Strand       | Frame      | gene_biotype                       | +19   |
        | (category)   | (object)   | (category)   | (int32)   | (int32)   | (object)   | (category)   | (object)   | (object)                           | ...   |
        |--------------+------------+--------------+-----------+-----------+------------+--------------+------------+------------------------------------+-------|
        | 1            | havana     | gene         | 11868     | 14409     | .          | +            | .          | transcribed_unprocessed_pseudogene | ...   |
        | 1            | havana     | transcript   | 11868     | 14409     | .          | +            | .          | transcribed_unprocessed_pseudogene | ...   |
        | 1            | havana     | exon         | 11868     | 12227     | .          | +            | .          | transcribed_unprocessed_pseudogene | ...   |
        | 1            | havana     | exon         | 12612     | 12721     | .          | +            | .          | transcribed_unprocessed_pseudogene | ...   |
        | ...          | ...        | ...          | ...       | ...       | ...        | ...          | ...        | ...                                | ...   |
        | 1            | havana     | gene         | 1173055   | 1179555   | .          | -            | .          | lncRNA                             | ...   |
        | 1            | havana     | transcript   | 1173055   | 1179555   | .          | -            | .          | lncRNA                             | ...   |
        | 1            | havana     | exon         | 1179364   | 1179555   | .          | -            | .          | lncRNA                             | ...   |
        | 1            | havana     | exon         | 1173055   | 1176396   | .          | -            | .          | lncRNA                             | ...   |
        +--------------+------------+--------------+-----------+-----------+------------+--------------+------------+------------------------------------+-------+
        Stranded PyRanges object has 2,446 rows and 28 columns from 1 chromosomes.
        For printing, the PyRanges was sorted on Chromosome and Strand.
        19 hidden columns: gene_id, gene_name, gene_source, gene_version, tag, transcript_biotype, transcript_id, transcript_name, transcript_source, transcript_support_level, ... (+ 9 more.)

        >>> gr.features.tes()
        +--------------+------------+------------+-----------+-----------+------------+--------------+------------+------------------------------------+-------+
        | Chromosome   | Source     | Feature    | Start     | End       | Score      | Strand       | Frame      | gene_biotype                       | +19   |
        | (category)   | (object)   | (object)   | (int32)   | (int32)   | (object)   | (category)   | (object)   | (object)                           | ...   |
        |--------------+------------+------------+-----------+-----------+------------+--------------+------------+------------------------------------+-------|
        | 1            | havana     | tes        | 14409     | 14410     | .          | +            | .          | transcribed_unprocessed_pseudogene | ...   |
        | 1            | havana     | tes        | 13670     | 13671     | .          | +            | .          | transcribed_unprocessed_pseudogene | ...   |
        | 1            | havana     | tes        | 31097     | 31098     | .          | +            | .          | lncRNA                             | ...   |
        | 1            | havana     | tes        | 31109     | 31110     | .          | +            | .          | lncRNA                             | ...   |
        | ...          | ...        | ...        | ...       | ...       | ...        | ...          | ...        | ...                                | ...   |
        | 1            | havana     | tes        | 1092813   | 1092814   | .          | -            | .          | protein_coding                     | ...   |
        | 1            | havana     | tes        | 1116087   | 1116088   | .          | -            | .          | protein_coding                     | ...   |
        | 1            | havana     | tes        | 1116089   | 1116090   | .          | -            | .          | protein_coding                     | ...   |
        | 1            | havana     | tes        | 1179555   | 1179556   | .          | -            | .          | lncRNA                             | ...   |
        +--------------+------------+------------+-----------+-----------+------------+--------------+------------+------------------------------------+-------+
        Stranded PyRanges object has 280 rows and 28 columns from 1 chromosomes.
        For printing, the PyRanges was sorted on Chromosome and Strand.
        19 hidden columns: gene_id, gene_name, gene_source, gene_version, tag, transcript_biotype, transcript_id, transcript_name, transcript_source, transcript_support_level, ... (+ 9 more.)
        """

        pr = self.pr

        if not pr.stranded:
            raise Exception(
                "Cannot compute TSSes or TESes without strand info. Perhaps use slack() instead?"
            )

        pr = pr[pr.Feature == "transcript"]
        pr = pr.apply(lambda df: _tes(df))

        pr.Feature = "tes"

        return pr