Example #1
0
    def as_gff(
        self,
        software_version: Optional[str] = None,
        database_version: Optional[str] = None,
        keep_all: bool = False,
        id_index: int = 1,
    ) -> Iterator[GFFRecord]:
        if not (keep_all or self.is_significant):
            return

        attr = GFFAttributes(target=Target(self.hmm, self.hmm_start,
                                           self.hmm_end),
                             custom={
                                 "env_start":
                                 str(self.env_start),
                                 "env_end":
                                 str(self.env_end),
                                 "hmm_name":
                                 str(self.hmm_name),
                                 "hmm_type":
                                 str(self.hmm_type),
                                 "hmm_len":
                                 str(self.hmm_len),
                                 "bitscore":
                                 str(self.bitscore),
                                 "evalue":
                                 str(self.evalue),
                                 "is_significant":
                                 "true" if self.is_significant else "false",
                             })

        if self.clan is not None:
            attr.custom["clan"] = str(self.clan)

        if self.active_sites is not None:
            attr.custom["active_sites"] = list(
                a.strip() for a in self.active_sites.replace("]", ",").replace(
                    "[", ",").split(",") if a != "")

        yield GFFRecord(seqid=self.name,
                        source=self.gen_source(software_version,
                                               database_version),
                        type="protein_hmm_match",
                        start=self.ali_start,
                        end=self.ali_end,
                        score=self.evalue,
                        strand=Strand.UNSTRANDED,
                        attributes=attr)
        return
Example #2
0
    def as_gff(
        self,
        software_version: Optional[str] = None,
        database_version: Optional[str] = None,
        keep_all: bool = True,
        id_index: int = 1,
    ) -> Iterator[GFFRecord]:
        attr = GFFAttributes(custom={
            "kind": self.kind,
            "pattern": self.pattern,
            "match": self.match,
        })

        if self.kind == "kex2_cutsite":
            type_ = "propeptide_cleavage_site"
        else:
            type_ = "polypeptide_motif"

        yield GFFRecord(seqid=self.name,
                        source=self.gen_source(software_version,
                                               database_version),
                        type=type_,
                        start=self.start,
                        end=self.end,
                        score=None,
                        strand=Strand.PLUS,
                        attributes=attr)
        return
Example #3
0
    def as_gff(
        self,
        software_version: Optional[str] = None,
        database_version: Optional[str] = None,
        keep_all: bool = False,
        id_index: int = 1,
    ) -> Iterator[GFFRecord]:

        if not self.prediction == "SignalPeptide":
            return

        # THis should always be true for signal peptides i think.
        assert self.cs_pos is not None

        # d_decision = prediction of issecreted.
        # ymax = first aa of mature peptide
        attr = GFFAttributes(custom={
            "prediction": str(self.prediction),
            "prob": str(self.prob),
            "cs_pos": str(self.cs_pos),
        })


        yield GFFRecord(
            seqid=self.name,
            source=self.gen_source(software_version, database_version),
            type="signal_peptide",
            start=0,
            end=self.cs_pos,
            score=self.prob,
            strand=Strand.UNSTRANDED,
            attributes=attr
        )
        return
Example #4
0
    def as_gff(
        self,
        software_version: Optional[str] = None,
        database_version: Optional[str] = None,
        keep_all: bool = False,
        id_index: int = 1,
    ) -> Iterator[GFFRecord]:
        if not self.is_secreted:
            return

        # d_decision = prediction of issecreted.
        # ymax = first aa of mature peptide
        attr = GFFAttributes(
            custom={
                "is_secreted": "true" if self.is_secreted else "false",
                "cmax": str(self.cmax),
                "cmax_pos": str(self.cmax_pos),
                "cmax_decision": "true" if self.cmax_decision else "false",
                "sprob": str(self.sprob),
                "sprob_decision": "true" if self.sprob_decision else "false",
            })

        yield GFFRecord(seqid=self.name,
                        source=self.gen_source(software_version,
                                               database_version),
                        type="signal_peptide",
                        start=0,
                        end=self.cmax_pos - 1,
                        score=self.sprob,
                        strand=Strand.PLUS,
                        attributes=attr)
        return
Example #5
0
    def as_gff(
        self,
        software_version: Optional[str] = None,
        database_version: Optional[str] = None,
        keep_all: bool = True,
        id_index: int = 1,
    ) -> Iterator[GFFRecord]:
        source = self.gen_source(software_version, database_version)

        if self.chloroplast_decision:
            assert self.chloroplast_start is not None
            assert self.chloroplast_end is not None

            attr = GFFAttributes(
                note=["Putative internal chloroplast localization peptide"],
                custom={
                    "prob": str(self.chloroplast_prob),
                })

            yield GFFRecord(seqid=self.name,
                            source=source,
                            type="peptide_localization_signal",
                            start=self.chloroplast_start,
                            end=self.chloroplast_end,
                            score=self.chloroplast_prob,
                            strand=Strand.PLUS,
                            attributes=attr)

        if self.mitochondria_decision:
            assert self.mitochondria_start is not None
            assert self.mitochondria_end is not None
            attr = GFFAttributes(
                note=["Putative internal mitochondrial localization peptide"],
                custom={
                    "prob": str(self.mitochondria_prob),
                })

            yield GFFRecord(seqid=self.name,
                            source=source,
                            type="mitochondrial_targeting_signal",
                            start=self.mitochondria_start,
                            end=self.mitochondria_end,
                            score=self.mitochondria_prob,
                            strand=Strand.UNSTRANDED,
                            attributes=attr)
        return
Example #6
0
    def as_gff(
        self,
        software_version: Optional[str] = None,
        database_version: Optional[str] = None,
        keep_all: bool = False,
        id_index: int = 1,
    ) -> Iterator[GFFRecord]:
        if self.cs_pos is None:
            return
        elif "Probable protein fragment" in self.cs_pos:
            return

        # dict(cs, cs_prob)
        cs = cs_actual_pos(self.cs_pos)

        # d_decision = prediction of issecreted.
        # ymax = first aa of mature peptide
        attr = GFFAttributes(
            custom={
                "prediction": str(self.prediction),
                "prob_signal": str(self.sp),
                "prob_mitochondrial": str(self.mtp),
                "prob_chloroplast": str(self.ctp),
                "prob_lumen": str(self.lutp),
                "prob_other": str(self.other),
                "prob_cut_site": str(cs["cs_prob"]),
            })

        if self.prediction == "SP":
            type_ = "signal_peptide"
            prob: Optional[float] = self.sp

        elif self.prediction == "mTP":
            type_ = "mitochondrial_targeting_signal"
            prob = self.mtp

        elif self.prediction == "cTP":
            type_ = "transit_peptide"
            prob = self.ctp

        elif self.prediction == "luTP":
            type_ = "transit_peptide"
            prob = self.lutp

        else:
            # Should happen
            return

        yield GFFRecord(seqid=self.name,
                        source=self.gen_source(software_version,
                                               database_version),
                        type=type_,
                        start=0,
                        end=int(cs["cs"]) - 1,
                        score=prob,
                        strand=Strand.UNSTRANDED,
                        attributes=attr)
        return
Example #7
0
    def as_gff(
        self,
        software_version: Optional[str] = None,
        database_version: Optional[str] = None,
        keep_all: bool = False,
        id_index: int = 1,
    ) -> Iterator[GFFRecord]:

        if not (keep_all or self.decide_significant()):
            return

        attr = GFFAttributes(target=Target(self.target, self.tstart,
                                           self.tend),
                             gap=parse_cigar(self.cigar),
                             custom={
                                 "tlen": str(self.tlen),
                                 "evalue": str(self.evalue),
                                 "gapopen": str(self.gapopen),
                                 "pident": str(self.pident),
                                 "alnlen": str(self.alnlen),
                                 "raw": str(self.raw),
                                 "bits": str(self.bits),
                                 "mismatch": str(self.mismatch),
                                 "qcov": str(self.qcov),
                                 "tcov": str(self.tcov),
                             })

        yield GFFRecord(seqid=self.query,
                        source=self.gen_source(software_version,
                                               database_version),
                        type="protein_match",
                        start=self.qstart,
                        end=self.qend,
                        score=self.evalue,
                        strand=Strand.UNSTRANDED,
                        attributes=attr)
        return
Example #8
0
    def as_gff(
        self,
        software_version: Optional[str] = None,
        database_version: Optional[str] = None,
        keep_all: bool = False,
        id_index: int = 1,
    ) -> Iterator[GFFRecord]:

        if self.cs_pos is None:
            return

        # dict(cs, cs_prob)
        if self.cs_pos == "CS pos: ?. Probable protein fragment":
            return

        cs = s6_cs_actual_pos(self.cs_pos)

        # d_decision = prediction of issecreted.
        # ymax = first aa of mature peptide
        attr = GFFAttributes(
            custom={
                "prediction": str(self.prediction),
                "prob_signal": str(self.prob_signal),
                "prob_other": str(self.prob_other),
                "prob_cut_site": str(cs["cs_prob"]),
            })

        yield GFFRecord(seqid=self.name,
                        source=self.gen_source(software_version,
                                               database_version),
                        type="signal_peptide",
                        start=0,
                        end=int(cs["cs"]) - 1,
                        score=self.prob_signal,
                        strand=Strand.PLUS,
                        attributes=attr)
        return
Example #9
0
    def as_gff(
        self,
        software_version: Optional[str] = None,
        database_version: Optional[str] = None,
        keep_all: bool = False,
        id_index: int = 1,
    ) -> Iterator[GFFRecord]:
        if not (keep_all or self.decide_significant()):
            return

        attr = GFFAttributes(target=Target(self.hmm, self.hmm_from,
                                           self.hmm_to),
                             custom={
                                 "hmm_len": str(self.hmm_len),
                                 "query_len": str(self.query_len),
                                 "full_evalue": str(self.full_evalue),
                                 "full_score": str(self.full_score),
                                 "full_bias": str(self.full_bias),
                                 "nmatches": str(self.nmatches),
                                 "domain_c_evalue": str(self.domain_c_evalue),
                                 "domain_i_evalue": str(self.domain_i_evalue),
                                 "domain_score": str(self.domain_score),
                                 "domain_bias": str(self.domain_bias),
                                 "acc": str(self.acc),
                                 "description": str(self.description),
                             })

        yield GFFRecord(seqid=self.query,
                        source=self.gen_source(software_version,
                                               database_version),
                        type="protein_hmm_match",
                        start=self.query_from,
                        end=self.query_to,
                        score=self.domain_i_evalue,
                        strand=Strand.UNSTRANDED,
                        attributes=attr)
        return