Python Protein.alias Examples

Programming Language: Python

Namespace/Package Name: SNDG.BioMongo.Model.Protein

Class/Type: Protein

Method/Function: alias

Examples at hotexamples.com: 2

Python Protein.alias - 2 examples found. These are the top rated real world Python examples of SNDG.BioMongo.Model.Protein.Protein.alias extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

objects(30)

Protein(2)

alias(2)

description(2)

gene(2)

ontologies(2)

auth(1)

gene_id(1)

name(1)

organism(1)

properties(1)

seq_collection_id(1)

Example #1

Show file

File: Importer.py Project: ezequieljsosa/sndg-bio

def from_TriTrypDB(name, gff, fasta, tax, tmp_dir=None):
    genome = {x.id: x for x in sp(fasta)}
    from BCBio import GFF
    import re
    annotation = list(GFF.parse(gff, base_dict=genome))
    contig = annotation[0]

    seqCol = BioDocFactory.create_genome(name, contig, tax, Tax)
    seqCol.save()

    if not tmp_dir:
        tmp_dir = "/tmp/" + name + "/"
    mkdir(tmp_dir)
    gene_ids = {}
    with tqdm(annotation) as pbar:
        for contig in pbar:
            pbar.set_description(contig.id)
            if len(contig.seq) > 15000000:
                contig.seq = ""
            contigDoc, gene_ids2 = BioDocFactory.create_contig(
                contig,
                seqCol,
                type_map={
                    "rRNA": "rRNA",
                    "ncRNA": "ncRNA",
                    NCBI.f_mRNA: "gene",
                    "exon": "exon",
                    "gene": "gene",
                    NCBI.f_CDS: NCBI.f_CDS,
                    "rRNA": "rRNA",
                    "tRNA": "tRNA",
                    "tmRNA": "tmRNA",
                    "snoRNA": "snoRNA",
                    "three_prime_UTR": "three_prime_UTR",
                    "five_prime_UTR": "five_prime_UTR"
                })
            gene_ids.update(gene_ids2)
            contigDoc.save()
    prots = []
    with tqdm(tritryp_protein_iter(annotation)) as pbar:
        for (protein, cds_f) in pbar:

            protDoc = Protein(seq=str(protein.seq), name=protein.id)

            if "description" in cds_f.qualifiers:
                protein_description = cds_f.qualifiers['description'][0]
            elif "Note" in cds_f.qualifiers:
                protein_description = cds_f.qualifiers['Note'][0]
            elif "product" in cds_f.qualifiers:
                protein_description = cds_f.qualifiers['product'][0]
            else:
                protein_description = ""

            protDoc.description = protein_description

            gos = []
            if "Ontology_term" in cds_f.qualifiers:
                gos = [
                    x.lower() for x in cds_f.qualifiers["Ontology_term"]
                    if "GO:" in x and (
                        x not in ["GO:0008150", "GO:0003674", "GO:0005575"])
                ]

            note = cds_f.qualifiers["Note"][0].split(
                " ")[0] if "Note" in cds_f.qualifiers else ""
            ecs = ["ec:" + note] if re.match(
                '^[0-9]+\.[0-9\-]+\.[0-9\-]+\.[0-9\-]$', note) else []
            ontologies = list(set(ecs + gos))

            protDoc.gene = [protein.id]
            protDoc.ontologies = ontologies
            protDoc.alias = [protein.id]

            if len(protDoc.seq) > 30000:
                raise Exception("No existen proteinas tan largas...")
            protDoc.gene_id = gene_ids[protein.id]
            protDoc.organism = name
            protDoc.auth = str(BioMongoDB.demo_id)
            protDoc.seq_collection_id = seqCol
            prots.append(protDoc)
            if pbar.n and ((pbar.n % 1000) == 0):
                Protein.objects.insert(prots)
                prots = []
    if prots:
        Protein.objects.insert(prots)

    _common_annotations(name, tmp_dir)

Example #2

Show file

    def create_protein(cls, seqrecord, feature, exons=[]):

        alias = cls.alias(feature)

        locus_tag = feature.qualifiers["locus_tag"][0]
        protein_name = locus_tag
        seq = str(seqrecord.seq)
        # if "translation" in feature.qualifiers:
        #     seq = feature.qualifiers["translation"][0]
        # else:
        #     if exons:
        #         seq = str(reduce(Seq.__add__, [exon.extract(seqrecord.seq) for exon in exons]).translate())
        #     else:
        #         seq = str(feature.extract(Seq(str(seqrecord.seq))).translate())

        if "gene_symbol_source" in feature.qualifiers:
            try:
                int(feature.qualifiers["gene_symbol_source"][0])
                protein_name = ""
            except:
                protein_name = feature.qualifiers['gene_symbol'][0]
        #         elif "product" in mrna_feature.qualifiers:
        #             protein_name = mrna_feature.qualifiers['product'][0]

        p = Protein(seq=seq, name=protein_name)

        if "description" in feature.qualifiers:
            protein_description = feature.qualifiers['description'][0]
        elif "Note" in feature.qualifiers:
            protein_description = feature.qualifiers['Note'][0]
        elif "product" in feature.qualifiers:
            protein_description = feature.qualifiers['product'][0]
        else:
            protein_description = ""

        bp = BioProperty(
            _type="annotation",
            description="homolog proteins and sources used for the annotation")

        if "protein_id" in feature.qualifiers:
            bp.ncbi_protein_id = feature.qualifiers["protein_id"][0]

        if "db_xref" in feature.qualifiers:
            bp.ncbi_db_xref = feature.qualifiers["db_xref"][0]

        if "Dbxref" in feature.qualifiers:
            bp.ncbi_db_xref = feature.qualifiers["Dbxref"][0]

        if "top_cog_hit" in feature.qualifiers:
            bp.cog = feature.qualifiers["top_cog_hit"][0]
        if "gene_symbol_source" in feature.qualifiers:
            bp.source = feature.qualifiers["gene_symbol_source"][0]
        if "gene_product_name_source" in feature.qualifiers:
            bp.source = feature.qualifiers["gene_product_name_source"][0]
        ecs = []

        if "EC" in feature.qualifiers:
            ecs = ecs + [
                x.lower() if x.lower().startswith("ec") else "ec:" + x.lower()
                for x in feature.qualifiers["EC"] if "." in x
            ]

        if "EC_number" in feature.qualifiers:
            ecs = ecs + [
                x.lower() if x.lower().startswith("ec") else "ec:" + x.lower()
                for x in feature.qualifiers["EC_number"] if "." in x
            ]

        if "Dbxref" in feature.qualifiers:
            ecs = ecs + [
                x.lower() if x.lower().startswith("ec") else "ec:" + x.lower()
                for x in feature.qualifiers["Dbxref"] if "." in x
            ]

        gos = []

        if "db_xref" in feature.qualifiers:
            gos = gos + [
                x.lower()
                for x in feature.qualifiers["db_xref"] if "GO:" in x and
                (x not in ["GO:0008150", "GO:0003674", "GO:0005575"])
            ]
        if "GO" in feature.qualifiers:
            gos = gos + [
                x.lower() for x in feature.qualifiers["GO"] if "GO:" in x and
                (x not in ["GO:0008150", "GO:0003674", "GO:0005575"])
            ]
        if "Ontology_term" in feature.qualifiers:
            gos = [
                x.lower() for x in feature.qualifiers["Ontology_term"]
                if "GO:" in x and (
                    x not in ["GO:0008150", "GO:0003674", "GO:0005575"])
            ]

        ontologies = list(set(ecs + gos))

        p.gene = list([locus_tag, protein_name
                       ]) if locus_tag != protein_name else [locus_tag]

        p.name = protein_name
        p.description = protein_description
        p.ontologies = ontologies
        p.properties = [bp]

        p.alias = alias

        return p