Exemple #1
0
    def from_biopython(bio_feature: SeqFeature, feature: "CDSFeature" = None,  # type: ignore
                       leftovers: Optional[Dict] = None, record: Any = None) -> "CDSFeature":
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)
        # grab mandatory qualifiers
        transl_table = 1
        if record:
            transl_table = record.transl_table
        if "transl_table" in leftovers:
            transl_table = int(leftovers.pop("transl_table")[0])

        # semi-optional qualifiers
        protein_id = leftovers.pop("protein_id", [None])[0]
        locus_tag = leftovers.pop("locus_tag", [None])[0]
        gene = leftovers.pop("gene", [None])[0]
        if not (gene or protein_id or locus_tag):
            if "pseudo" in leftovers or "pseudogene" in leftovers:
                gene = "pseudo%s_%s"
            else:
                gene = "cds%s_%s"
            gene = gene % (bio_feature.location.start, bio_feature.location.end)
        name = locus_tag or protein_id or gene

        try:
            _verify_location(bio_feature.location)
        except Exception as err:
            message = "invalid location for %s: %s" % (name, str(err))
            raise SecmetInvalidInputError(message) from err

        try:
            translation = _ensure_valid_translation(leftovers.pop("translation", [""])[0],
                                                    bio_feature.location, transl_table, record)
        except ValueError as err:
            raise SecmetInvalidInputError(str(err) + ": %s" % name) from err

        feature = CDSFeature(bio_feature.location, translation, gene=gene,
                             locus_tag=locus_tag, protein_id=protein_id,
                             translation_table=transl_table)

        # grab optional qualifiers
        feature.product = leftovers.pop("product", [""])[0]
        sec_met = leftovers.pop("sec_met_domain", None)
        if sec_met:
            feature.sec_met = SecMetQualifier.from_biopython(sec_met)
        gene_functions = leftovers.pop("gene_functions", [])
        if gene_functions:
            feature.gene_functions.add_from_qualifier(gene_functions)
        feature.nrps_pks.add_from_qualifier(leftovers.pop("NRPS_PKS", []))

        # grab parent optional qualifiers
        super(CDSFeature, feature).from_biopython(bio_feature, feature=feature, leftovers=leftovers)

        return feature
Exemple #2
0
    def from_biopython(bio_feature: SeqFeature, feature: "CDSFeature" = None,  # type: ignore
                       leftovers: Optional[Dict] = None, record: Any = None) -> "CDSFeature":
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)
        # grab mandatory qualifiers
        transl_table = 1
        if record:
            transl_table = record.transl_table
        if "transl_table" in leftovers:
            transl_table = int(leftovers.pop("transl_table")[0])
        translation = leftovers.pop("translation", [""])[0]

        # semi-optional qualifiers
        protein_id = leftovers.pop("protein_id", [None])[0]
        locus_tag = leftovers.pop("locus_tag", [None])[0]
        gene = leftovers.pop("gene", [None])[0]
        if not (gene or protein_id or locus_tag):
            if "pseudo" in leftovers or "pseudogene" in leftovers:
                gene = "pseudo%s_%s"
            else:
                gene = "cds%s_%s"
            gene = gene % (bio_feature.location.start, bio_feature.location.end)

        try:
            _verify_location(bio_feature.location)
        except Exception as err:
            message = "invalid location for %s: %s" % (gene or protein_id or locus_tag, str(err))
            raise SecmetInvalidInputError(message) from err

        # ensure translation is valid if it exists
        if translation:
            invalid = set(translation) - _VALID_TRANSLATION_CHARS
            if invalid:
                logging.warning("Regenerating translation for CDS %s (at %s) containing invalid characters: %s",
                                locus_tag or protein_id or gene, bio_feature.location, invalid)
                translation = ""
        # ensure that the translation fits
        if not _is_valid_translation_length(translation, bio_feature.location):
            raise SecmetInvalidInputError("translation longer than location allows: %s > %s" % (
                                len(translation) * 3, len(bio_feature.location)))
        # finally, generate the translation if it doesn't exist
        if not translation:
            if not record:
                raise SecmetInvalidInputError("no translation in CDS and no record to generate it with")
            if bio_feature.location.end > len(record.seq):
                raise SecmetInvalidInputError("feature missing translation and sequence too short: %s" % (
                                              (gene or protein_id or locus_tag)))
            translation = record.get_aa_translation_from_location(bio_feature.location, transl_table)

        assert _is_valid_translation_length(translation, bio_feature.location)

        feature = CDSFeature(bio_feature.location, translation, gene=gene,
                             locus_tag=locus_tag, protein_id=protein_id,
                             translation_table=transl_table)

        # grab optional qualifiers
        feature.product = leftovers.pop("product", [""])[0]
        sec_met = leftovers.pop("sec_met_domain", None)
        if sec_met:
            feature.sec_met = SecMetQualifier.from_biopython(sec_met)
        gene_functions = leftovers.pop("gene_functions", [])
        if gene_functions:
            feature.gene_functions.add_from_qualifier(gene_functions)

        # grab parent optional qualifiers
        super(CDSFeature, feature).from_biopython(bio_feature, feature=feature, leftovers=leftovers)

        return feature