def as_gff( self, software_version: Optional[str] = None, database_version: Optional[str] = None, keep_all: bool = False, id_index: int = 1, ) -> Iterator[GFFRecord]: if not (keep_all or self.is_significant): return attr = GFFAttributes(target=Target(self.hmm, self.hmm_start, self.hmm_end), custom={ "env_start": str(self.env_start), "env_end": str(self.env_end), "hmm_name": str(self.hmm_name), "hmm_type": str(self.hmm_type), "hmm_len": str(self.hmm_len), "bitscore": str(self.bitscore), "evalue": str(self.evalue), "is_significant": "true" if self.is_significant else "false", }) if self.clan is not None: attr.custom["clan"] = str(self.clan) if self.active_sites is not None: attr.custom["active_sites"] = list( a.strip() for a in self.active_sites.replace("]", ",").replace( "[", ",").split(",") if a != "") yield GFFRecord(seqid=self.name, source=self.gen_source(software_version, database_version), type="protein_hmm_match", start=self.ali_start, end=self.ali_end, score=self.evalue, strand=Strand.UNSTRANDED, attributes=attr) return
def as_gff( self, software_version: Optional[str] = None, database_version: Optional[str] = None, keep_all: bool = True, id_index: int = 1, ) -> Iterator[GFFRecord]: attr = GFFAttributes(custom={ "kind": self.kind, "pattern": self.pattern, "match": self.match, }) if self.kind == "kex2_cutsite": type_ = "propeptide_cleavage_site" else: type_ = "polypeptide_motif" yield GFFRecord(seqid=self.name, source=self.gen_source(software_version, database_version), type=type_, start=self.start, end=self.end, score=None, strand=Strand.PLUS, attributes=attr) return
def as_gff( self, software_version: Optional[str] = None, database_version: Optional[str] = None, keep_all: bool = False, id_index: int = 1, ) -> Iterator[GFFRecord]: if not self.prediction == "SignalPeptide": return # THis should always be true for signal peptides i think. assert self.cs_pos is not None # d_decision = prediction of issecreted. # ymax = first aa of mature peptide attr = GFFAttributes(custom={ "prediction": str(self.prediction), "prob": str(self.prob), "cs_pos": str(self.cs_pos), }) yield GFFRecord( seqid=self.name, source=self.gen_source(software_version, database_version), type="signal_peptide", start=0, end=self.cs_pos, score=self.prob, strand=Strand.UNSTRANDED, attributes=attr ) return
def as_gff( self, software_version: Optional[str] = None, database_version: Optional[str] = None, keep_all: bool = False, id_index: int = 1, ) -> Iterator[GFFRecord]: if not self.is_secreted: return # d_decision = prediction of issecreted. # ymax = first aa of mature peptide attr = GFFAttributes( custom={ "is_secreted": "true" if self.is_secreted else "false", "cmax": str(self.cmax), "cmax_pos": str(self.cmax_pos), "cmax_decision": "true" if self.cmax_decision else "false", "sprob": str(self.sprob), "sprob_decision": "true" if self.sprob_decision else "false", }) yield GFFRecord(seqid=self.name, source=self.gen_source(software_version, database_version), type="signal_peptide", start=0, end=self.cmax_pos - 1, score=self.sprob, strand=Strand.PLUS, attributes=attr) return
def as_gff( self, software_version: Optional[str] = None, database_version: Optional[str] = None, keep_all: bool = True, id_index: int = 1, ) -> Iterator[GFFRecord]: source = self.gen_source(software_version, database_version) if self.chloroplast_decision: assert self.chloroplast_start is not None assert self.chloroplast_end is not None attr = GFFAttributes( note=["Putative internal chloroplast localization peptide"], custom={ "prob": str(self.chloroplast_prob), }) yield GFFRecord(seqid=self.name, source=source, type="peptide_localization_signal", start=self.chloroplast_start, end=self.chloroplast_end, score=self.chloroplast_prob, strand=Strand.PLUS, attributes=attr) if self.mitochondria_decision: assert self.mitochondria_start is not None assert self.mitochondria_end is not None attr = GFFAttributes( note=["Putative internal mitochondrial localization peptide"], custom={ "prob": str(self.mitochondria_prob), }) yield GFFRecord(seqid=self.name, source=source, type="mitochondrial_targeting_signal", start=self.mitochondria_start, end=self.mitochondria_end, score=self.mitochondria_prob, strand=Strand.UNSTRANDED, attributes=attr) return
def as_gff( self, software_version: Optional[str] = None, database_version: Optional[str] = None, keep_all: bool = False, id_index: int = 1, ) -> Iterator[GFFRecord]: if self.cs_pos is None: return elif "Probable protein fragment" in self.cs_pos: return # dict(cs, cs_prob) cs = cs_actual_pos(self.cs_pos) # d_decision = prediction of issecreted. # ymax = first aa of mature peptide attr = GFFAttributes( custom={ "prediction": str(self.prediction), "prob_signal": str(self.sp), "prob_mitochondrial": str(self.mtp), "prob_chloroplast": str(self.ctp), "prob_lumen": str(self.lutp), "prob_other": str(self.other), "prob_cut_site": str(cs["cs_prob"]), }) if self.prediction == "SP": type_ = "signal_peptide" prob: Optional[float] = self.sp elif self.prediction == "mTP": type_ = "mitochondrial_targeting_signal" prob = self.mtp elif self.prediction == "cTP": type_ = "transit_peptide" prob = self.ctp elif self.prediction == "luTP": type_ = "transit_peptide" prob = self.lutp else: # Should happen return yield GFFRecord(seqid=self.name, source=self.gen_source(software_version, database_version), type=type_, start=0, end=int(cs["cs"]) - 1, score=prob, strand=Strand.UNSTRANDED, attributes=attr) return
def as_gff( self, software_version: Optional[str] = None, database_version: Optional[str] = None, keep_all: bool = False, id_index: int = 1, ) -> Iterator[GFFRecord]: if not (keep_all or self.decide_significant()): return attr = GFFAttributes(target=Target(self.target, self.tstart, self.tend), gap=parse_cigar(self.cigar), custom={ "tlen": str(self.tlen), "evalue": str(self.evalue), "gapopen": str(self.gapopen), "pident": str(self.pident), "alnlen": str(self.alnlen), "raw": str(self.raw), "bits": str(self.bits), "mismatch": str(self.mismatch), "qcov": str(self.qcov), "tcov": str(self.tcov), }) yield GFFRecord(seqid=self.query, source=self.gen_source(software_version, database_version), type="protein_match", start=self.qstart, end=self.qend, score=self.evalue, strand=Strand.UNSTRANDED, attributes=attr) return
def as_gff( self, software_version: Optional[str] = None, database_version: Optional[str] = None, keep_all: bool = False, id_index: int = 1, ) -> Iterator[GFFRecord]: if self.cs_pos is None: return # dict(cs, cs_prob) if self.cs_pos == "CS pos: ?. Probable protein fragment": return cs = s6_cs_actual_pos(self.cs_pos) # d_decision = prediction of issecreted. # ymax = first aa of mature peptide attr = GFFAttributes( custom={ "prediction": str(self.prediction), "prob_signal": str(self.prob_signal), "prob_other": str(self.prob_other), "prob_cut_site": str(cs["cs_prob"]), }) yield GFFRecord(seqid=self.name, source=self.gen_source(software_version, database_version), type="signal_peptide", start=0, end=int(cs["cs"]) - 1, score=self.prob_signal, strand=Strand.PLUS, attributes=attr) return
def as_gff( self, software_version: Optional[str] = None, database_version: Optional[str] = None, keep_all: bool = False, id_index: int = 1, ) -> Iterator[GFFRecord]: if not (keep_all or self.decide_significant()): return attr = GFFAttributes(target=Target(self.hmm, self.hmm_from, self.hmm_to), custom={ "hmm_len": str(self.hmm_len), "query_len": str(self.query_len), "full_evalue": str(self.full_evalue), "full_score": str(self.full_score), "full_bias": str(self.full_bias), "nmatches": str(self.nmatches), "domain_c_evalue": str(self.domain_c_evalue), "domain_i_evalue": str(self.domain_i_evalue), "domain_score": str(self.domain_score), "domain_bias": str(self.domain_bias), "acc": str(self.acc), "description": str(self.description), }) yield GFFRecord(seqid=self.query, source=self.gen_source(software_version, database_version), type="protein_hmm_match", start=self.query_from, end=self.query_to, score=self.domain_i_evalue, strand=Strand.UNSTRANDED, attributes=attr) return