예제 #1
0
    def __init__(self, location: Location, translation: str, locus_tag: str = None,
                 protein_id: str = None, product: str = "", gene: str = None,
                 translation_table: int = 1) -> None:
        super().__init__(location, feature_type="CDS")
        _verify_location(location)
        # mandatory
        self._gene_functions = GeneFunctionAnnotations()

        if not (protein_id or locus_tag or gene):
            raise ValueError("CDSFeature requires at least one of: gene, protein_id, locus_tag")
        # semi-optional
        self.protein_id = _sanitise_id_value(protein_id)
        self.locus_tag = _sanitise_id_value(locus_tag)
        self.gene = _sanitise_id_value(gene)
        self.translation = str(translation)

        # optional
        if not isinstance(product, str):
            raise TypeError("product must be a string, not %s" % type(product))
        self.product = product
        self.transl_table = int(translation_table)
        self._sec_met = SecMetQualifier()
        self._nrps_pks = NRPSPKSQualifier(self.location.strand)

        self.motifs = []  # type: List[features.CDSMotif]

        # runtime-only data
        self.region = None  # type: Optional[features.Region]
        self.unique_id = None  # type: Optional[str] # set only when added to a record
예제 #2
0
class CDSFeature(Feature):
    """ A feature representing a single CDS/gene. """
    __slots__ = [
        "_translation", "protein_id", "locus_tag", "gene", "product",
        "transl_table", "_sec_met", "_gene_functions", "unique_id",
        "_nrps_pks", "motifs", "region"
    ]

    def __init__(self,
                 location: Location,
                 translation: str,
                 locus_tag: str = None,
                 protein_id: str = None,
                 product: str = "",
                 gene: str = None,
                 translation_table: int = 1) -> None:
        super().__init__(location, feature_type="CDS")
        _verify_location(location)
        # mandatory
        self._gene_functions = GeneFunctionAnnotations()

        if not (protein_id or locus_tag or gene):
            raise ValueError(
                "CDSFeature requires at least one of: gene, protein_id, locus_tag"
            )
        # semi-optional
        self.protein_id = _sanitise_id_value(protein_id)
        self.locus_tag = _sanitise_id_value(locus_tag)
        self.gene = _sanitise_id_value(gene)
        self.translation = str(translation)

        # optional
        if not isinstance(product, str):
            raise TypeError("product must be a string, not %s" % type(product))
        self.product = product
        self.transl_table = int(translation_table)
        self._sec_met = SecMetQualifier()
        self._nrps_pks = NRPSPKSQualifier(self.location.strand)

        self.motifs = []  # type: List[features.CDSMotif]

        # runtime-only data
        self.region = None  # type: Optional[features.Region]
        self.unique_id = None  # type: Optional[str] # set only when added to a record

    @property
    def gene_functions(self) -> GeneFunctionAnnotations:
        """ All gene function annotations for the CDS """
        return self._gene_functions

    @property
    def gene_function(self) -> GeneFunction:
        """ The likely gene function of the CDS, as determined by all annotated
            gene functions.
        """
        return self._gene_functions.get_classification()

    @property
    def sec_met(self) -> Optional[SecMetQualifier]:
        """ The qualifier containing secondary metabolite information for the
            CDSFeature.
        """
        return self._sec_met

    @sec_met.setter
    def sec_met(self, sec_met: SecMetQualifier) -> None:
        if sec_met is not None and not isinstance(sec_met, SecMetQualifier):
            raise TypeError(
                "CDSFeature.sec_met can only be set to an instance of SecMetQualifier"
            )
        self._sec_met = sec_met

    @property
    def nrps_pks(self) -> NRPSPKSQualifier:
        """ The NRPSPKSQualifier of the feature """
        return self._nrps_pks

    @nrps_pks.setter
    def nrps_pks(self, qualifier: NRPSPKSQualifier) -> None:
        if qualifier is not None and not isinstance(qualifier,
                                                    NRPSPKSQualifier):
            raise TypeError(
                "CDSFeature.nrps_pks can only be set to an instance of NRPSPKSQualifier"
            )
        self._nrps_pks = qualifier

    @property
    def translation(self) -> str:
        """ The translation of the CDS, as a string of amino acids """
        return self._translation

    @translation.setter
    def translation(self, translation: str) -> None:
        if not translation:
            raise ValueError("valid translation required")
        invalid = set(translation) - _VALID_TRANSLATION_CHARS
        if invalid:
            raise ValueError("invalid translation characters: %s" % invalid)
        if not _is_valid_translation_length(translation, self.location):
            raise ValueError(
                "translation longer than location allows: %s > %s" %
                (len(translation) * 3, len(self.location)))
        self._translation = translation  # pylint: disable=attribute-defined-outside-init

    def get_accession(self) -> str:
        "Get the gene ID from protein id, gene name or locus_tag, in that order"
        for val in [self.protein_id, self.gene, self.locus_tag]:
            if val:
                return val
        raise ValueError("%s altered to contain no identifiers" % self)

    def get_name(self) -> str:
        "Get the gene ID from locus_tag, gene name or protein id, in that order"
        for val in [self.locus_tag, self.gene, self.protein_id]:
            if val:
                return val
        raise ValueError("%s altered to contain no identifiers" % self)

    @staticmethod
    def from_biopython(
            bio_feature: SeqFeature,
            feature: "CDSFeature" = None,  # type: ignore
            leftovers: Optional[Dict] = None,
            record: Any = None) -> "CDSFeature":
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)
        # grab mandatory qualifiers
        transl_table = 1
        if record:
            transl_table = record.transl_table
        if "transl_table" in leftovers:
            transl_table = int(leftovers.pop("transl_table")[0])

        # semi-optional qualifiers
        protein_id = leftovers.pop("protein_id", [None])[0]
        locus_tag = leftovers.pop("locus_tag", [None])[0]
        gene = leftovers.pop("gene", [None])[0]
        if not (gene or protein_id or locus_tag):
            if "pseudo" in leftovers or "pseudogene" in leftovers:
                gene = "pseudo%s_%s"
            else:
                gene = "cds%s_%s"
            gene = gene % (bio_feature.location.start,
                           bio_feature.location.end)
        name = locus_tag or protein_id or gene

        try:
            _verify_location(bio_feature.location)
        except Exception as err:
            message = "invalid location for %s: %s" % (name, str(err))
            raise SecmetInvalidInputError(message) from err

        try:
            translation = _ensure_valid_translation(
                leftovers.pop("translation", [""])[0], bio_feature.location,
                transl_table, record)
        except ValueError as err:
            raise SecmetInvalidInputError(str(err) + ": %s" % name) from err

        feature = CDSFeature(bio_feature.location,
                             translation,
                             gene=gene,
                             locus_tag=locus_tag,
                             protein_id=protein_id,
                             translation_table=transl_table)

        # grab optional qualifiers
        feature.product = leftovers.pop("product", [""])[0]
        sec_met = leftovers.pop("sec_met_domain", None)
        if sec_met:
            feature.sec_met = SecMetQualifier.from_biopython(sec_met)
        gene_functions = leftovers.pop("gene_functions", [])
        if gene_functions:
            feature.gene_functions.add_from_qualifier(gene_functions)

        # grab parent optional qualifiers
        super(CDSFeature, feature).from_biopython(bio_feature,
                                                  feature=feature,
                                                  leftovers=leftovers)

        return feature

    def to_biopython(self,
                     qualifiers: Dict[str, List[str]] = None) -> SeqFeature:
        mine = OrderedDict()  # type: Dict[str, List[str]]
        # mandatory
        mine["translation"] = [self.translation]
        # optional
        for attr in [
                "gene", "transl_table", "locus_tag", "protein_id", "product"
        ]:
            val = getattr(self, attr)
            if val:
                mine[attr] = [str(val)]
        if self._gene_functions:
            mine["gene_functions"] = list(map(str, self._gene_functions))
            mine["gene_kind"] = [str(self.gene_function)]
        if self.sec_met:
            mine["sec_met_domain"] = list(map(str, self.sec_met))
        # respect qualifiers given to us
        if qualifiers:
            mine.update(qualifiers)
        return super().to_biopython(mine)

    def __repr__(self) -> str:
        return str(self)

    def __str__(self) -> str:
        return "CDS(%s, %s)" % (self.get_name(), self.location)

    def strip_antismash_annotations(self) -> None:
        """ Remove all antiSMASH-specific annotations from the feature """
        self.sec_met = SecMetQualifier()
        self.gene_functions.clear()
        self.nrps_pks = NRPSPKSQualifier(self.location.strand)