Ejemplo n.º 1
0
    def store_quantification(self,
                             analysis: Analysis,
                             assayacc: str,
                             assaydb: str = "SRA") -> None:
        """Store quantification to link assay accession to analysis."""
        # first try to get from Assay dbxref (e.g.: "SRR12345" - from SRA/NCBI)
        try:
            db_assay = Db.objects.get(name=assaydb)
            dbxref_assay = Dbxref.objects.get(accession=assayacc, db=db_assay)
            assay = Assay.objects.get(dbxref=dbxref_assay)
        # then searches name
        except ObjectDoesNotExist:
            assay = Assay.objects.get(name=assayacc)
        # then gives up
        except IntegrityError as e:
            raise ImportingError(e)

        try:
            acquisition = Acquisition.objects.create(assay=assay,
                                                     name=assayacc)
        except IntegrityError as e:
            raise ImportingError(e)
        try:
            Quantification.objects.create(acquisition=acquisition,
                                          analysis=analysis)
        except IntegrityError as e:
            raise ImportingError(e)
Ejemplo n.º 2
0
    def __init__(self,
                 filename: str,
                 doi: str = None,
                 description: str = None,
                 url: str = None) -> None:
        """Execute the init function."""
        # Save DB file info
        self.db, created = Db.objects.get_or_create(name="FASTA_SOURCE",
                                                    description=description,
                                                    url=url)
        self.filename = filename

        # Retrieve sequence ontology object
        self.cvterm_contained_in = Cvterm.objects.get(name="contained in",
                                                      cv__name="relationship")

        # Retrieve DOI's Dbxref
        dbxref_doi = None
        self.pub_dbxref_doi = None
        if doi:
            try:
                dbxref_doi = Dbxref.objects.get(accession=doi)
            except ObjectDoesNotExist as e:
                raise ImportingError(e)
            try:
                self.pub_dbxref_doi = PubDbxref.objects.get(dbxref=dbxref_doi)
            except ObjectDoesNotExist as e:
                raise ImportingError(e)
Ejemplo n.º 3
0
    def store_biopython_seq_record(
        self,
        seq_obj: SeqRecord,
        soterm: str,
        organism: str,
        ignore_residues: bool = False,
    ) -> None:
        """Store Biopython SeqRecord."""
        soterm_obj = Cvterm.objects.get(name=soterm, cv__name="sequence")
        organism_obj = retrieve_organism(organism)

        try:
            dbxref, created = Dbxref.objects.get_or_create(
                db=self.db, accession=seq_obj.id)
            Dbxrefprop.objects.get_or_create(
                dbxref=dbxref,
                type_id=self.cvterm_contained_in.cvterm_id,
                value=self.filename,
                rank=0,
            )
            retrieve_feature_id(accession=seq_obj.id, soterm=soterm)
            raise ImportingError("The sequence {} is already "
                                 "registered.".format(seq_obj.id))
        except ObjectDoesNotExist:
            residues = seq_obj.seq

            m = md5(str(seq_obj.seq).encode()).hexdigest()
            if ignore_residues is True:
                residues = ""

            name = None
            if seq_obj.description != "<unknown description>":
                name = seq_obj.description

            # storing feature
            feature = Feature(
                dbxref=dbxref,
                organism=organism_obj,
                uniquename=seq_obj.id,
                name=name,
                residues=residues,
                seqlen=len(seq_obj.seq),
                md5checksum=m,
                type=soterm_obj,
                is_analysis=False,
                is_obsolete=False,
                timeaccessioned=datetime.now(timezone.utc),
                timelastmodified=datetime.now(timezone.utc),
            )
            feature.save()

            # DOI: try to link sequence to publication's DOI
            if feature and self.pub_dbxref_doi:
                try:
                    FeaturePub.objects.create(
                        feature=feature, pub_id=self.pub_dbxref_doi.pub_id)
                except IntegrityError as e:
                    raise ImportingError(e)
Ejemplo n.º 4
0
    def __init__(self, filecontent: str, doi: str = None) -> None:
        """Execute the init function."""
        # initialization of lists/sets to store ignored attributes, and
        # ignored goterms
        self.db_null, created = Db.objects.get_or_create(name="null")
        null_dbxref, created = Dbxref.objects.get_or_create(
            db=self.db_null, accession="null"
        )
        null_cv, created = Cv.objects.get_or_create(name="null")
        null_cvterm, created = Cvterm.objects.get_or_create(
            cv=null_cv,
            name="null",
            definition="",
            dbxref=null_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )

        if filecontent == "genome":
            self.filter = VALID_GENOME_ATTRS
        elif filecontent == "polymorphism":
            self.filter = VALID_POLYMORPHISM_ATTRS
        elif filecontent == "qtl":
            self.filter = VALID_QTL_ATTRS
        else:
            raise ImportingError(
                "Attributes type required: (eg. genome, polymorphism, qtl)"
            )

        # Retrieve DOI's Dbxref
        dbxref_doi = None
        pub_dbxref_doi = None
        if doi:
            try:
                dbxref_doi = Dbxref.objects.get(accession=doi)
            except ObjectDoesNotExist:
                raise ImportingError("{} not registered.".format(doi))
            try:
                pub_dbxref_doi = PubDbxref.objects.get(dbxref=dbxref_doi)
            except ObjectDoesNotExist:
                raise ImportingError("{} not registered.".format(doi))
            try:
                self.pub = Pub.objects.get(pub_id=pub_dbxref_doi.pub_id)
            except ObjectDoesNotExist:
                raise ImportingError("{} not registered.".format(doi))
        else:
            self.pub, created = Pub.objects.get_or_create(
                miniref="null",
                uniquename="null",
                type_id=null_cvterm.cvterm_id,
                is_obsolete=False,
            )

        self.ignored_attrs: Set[str] = set()
        self.ignored_goterms: Set[str] = set()
Ejemplo n.º 5
0
    def __init__(self, source: str, filename: str, doi: str = None) -> None:
        """Execute the init function."""
        # initialization of lists/sets to store ignored attributes,
        # ignored goterms, and relationships
        self.cache: Dict[str, str] = dict()
        self.usedcache = 0
        self.relationships: List[Dict[str, str]] = list()
        self.ignored_attrs: Set[str] = set()
        self.ignored_goterms: Set[str] = set()

        try:
            self.db, created = Db.objects.get_or_create(name=source.upper())
            self.filename = filename
        except IntegrityError as e:
            raise ImportingError(e)

        self.db_null, created = Db.objects.get_or_create(name="null")
        null_dbxref, created = Dbxref.objects.get_or_create(db=self.db_null,
                                                            accession="null")
        null_cv, created = Cv.objects.get_or_create(name="null")
        null_cvterm, created = Cvterm.objects.get_or_create(
            cv=null_cv,
            name="null",
            definition="",
            dbxref=null_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        self.pub, created = Pub.objects.get_or_create(
            miniref="null",
            uniquename="null",
            type_id=null_cvterm.cvterm_id,
            is_obsolete=False,
        )

        self.cvterm_contained_in = Cvterm.objects.get(name="contained in",
                                                      cv__name="relationship")
        self.aa_cvterm = Cvterm.objects.get(name="polypeptide",
                                            cv__name="sequence")
        self.so_term_protein_match = Cvterm.objects.get(name="protein_match",
                                                        cv__name="sequence")
        # Retrieve DOI's Dbxref
        dbxref_doi = None
        self.pub_dbxref_doi = None
        if doi:
            try:
                dbxref_doi = Dbxref.objects.get(accession=doi)
            except ObjectDoesNotExist:
                raise ImportingError("{} not registered.".format(doi))
            try:
                self.pub_dbxref_doi = PubDbxref.objects.get(dbxref=dbxref_doi)
            except ObjectDoesNotExist:
                raise ImportingError("{} not registered.".format(doi))
Ejemplo n.º 6
0
def insert_organism(
    genus: str,
    species: str = "spp.",
    infraspecific_name: str = None,
    abbreviation: str = None,
    common_name: str = None,
    comment: str = None,
) -> None:
    """Insert organism."""
    try:
        spp = Organism.objects.get(genus=genus,
                                   species=species,
                                   infraspecific_name=infraspecific_name)
        if spp is not None:
            raise ImportingError("Organism already registered ({} {})!".format(
                genus, species))
    except ObjectDoesNotExist:
        organism = Organism.objects.create(
            abbreviation=abbreviation,
            genus=genus,
            species=species,
            common_name=common_name,
            infraspecific_name=infraspecific_name,
            comment=comment,
        )
        organism.save()
Ejemplo n.º 7
0
 def _nullfields(self, fields: list) -> None:
     counter = 0
     for field in fields:
         if field is None or field == "":
             raise ImportingError(
                 "Found null or empty field in position {}".format(counter))
         counter += 1
Ejemplo n.º 8
0
 def store_assay_project(self, assay: Assay, project: Project) -> None:
     """Store assay_project."""
     try:
         assayproject, created = AssayProject.objects.get_or_create(
             assay=assay, project=project)
     except IntegrityError as e:
         raise ImportingError(e)
Ejemplo n.º 9
0
 def handle(self, file: str, verbosity: int = 0, **options):
     """Execute the main function."""
     # get cvterm for contained in
     try:
         cvterm = Cvterm.objects.get(name="contained in",
                                     cv__name="relationship")
     except IntegrityError as e:
         raise ImportingError(e)
     filename = os.path.basename(file)
     if verbosity > 0:
         self.stdout.write("Removing ...")
     try:
         FeatureRelationship.objects.filter(
             FeatureRelationshipprop_feature_relationship_FeatureRelationship__value
             =filename,
             FeatureRelationshipprop_feature_relationship_FeatureRelationship__type
             =cvterm,
         ).delete()
         if verbosity > 0:
             self.stdout.write(self.style.SUCCESS("Done"))
     except IntegrityError as e:
         raise CommandError(
             "It's not possible to delete every record. You must "
             "delete relationships loaded after '{}' that might "
             "depend on it. {}".format(filename, e))
     except ObjectDoesNotExist:
         raise CommandError(
             "Cannot remove '{}' (not registered)".format(filename))
Ejemplo n.º 10
0
    def __init__(self, filecontent: str) -> None:
        """Execute the init function."""
        # initialization of lists/sets to store ignored attributes, and
        # ignored goterms
        self.db_null, created = Db.objects.get_or_create(name="null")
        null_dbxref, created = Dbxref.objects.get_or_create(db=self.db_null,
                                                            accession="null")
        null_cv, created = Cv.objects.get_or_create(name="null")
        null_cvterm, created = Cvterm.objects.get_or_create(
            cv=null_cv,
            name="null",
            definition="",
            dbxref=null_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        self.pub, created = Pub.objects.get_or_create(
            miniref="null",
            uniquename="null",
            type_id=null_cvterm.cvterm_id,
            is_obsolete=False,
        )

        if filecontent == "genome":
            self.filter = VALID_GENOME_ATTRS
        elif filecontent == "polymorphism":
            self.filter = VALID_POLYMORPHISM_ATTRS
        elif filecontent == "qtl":
            self.filter = VALID_QTL_ATTRS
        else:
            raise ImportingError(
                "Attributes type required: (eg. genome, polymorphism, qtl)")

        self.ignored_attrs: Set[str] = set()
        self.ignored_goterms: Set[str] = set()
Ejemplo n.º 11
0
 def _is_readable(self, file_path: str) -> None:
     """Check file is readable."""
     try:
         f = open(file_path, "r")
         f.close()
     except IOError:
         raise ImportingError("{} is not readable".format(file_path))
Ejemplo n.º 12
0
 def get_phylonode_by_accession(self, accession: int) -> Phylonode:
     """Get phylonode by dbxref.accession."""
     try:
         organism = self.get_organism_by_accession(accession)
         phylonode = Phylonode.objects.get(
             PhylonodeOrganism_phylonode_Phylonode__organism=organism)
     except ObjectDoesNotExist as e:
         raise ImportingError(e)
     return phylonode
Ejemplo n.º 13
0
 def __init__(
     self,
     filename: str,
     program: str,
     programversion: str,
     so_query: str,
     so_subject: str,
     org_query: str,
     org_subject: str,
     input_format: str,
     algorithm: str = None,
     name: str = None,
     description: str = None,
 ) -> None:
     """Execute the init function."""
     try:
         self.org_query = retrieve_organism(org_query)
         self.org_subject = retrieve_organism(org_subject)
         self.input_format = input_format
         self.so_query = so_query
         self.so_subject = so_subject
         self.so_term_match_part = Cvterm.objects.get(name="match_part",
                                                      cv__name="sequence")
         self.ro_term_similarity = Cvterm.objects.get(
             name="in similarity relationship with",
             cv__name="relationship")
         self.cvterm_contained_in = Cvterm.objects.get(
             name="contained in", cv__name="relationship")
         self.analysis_loader = AnalysisLoader()
         self.analysis = self.analysis_loader.store_analysis(
             algorithm=algorithm,
             name=name,
             description=description,
             sourcename=filename,
             filename=filename,
             program=program,
             programversion=programversion,
             timeexecuted=datetime.now(),
         )
     except IntegrityError as e:
         raise ImportingError(e)
     except ObjectDoesNotExist as e:
         raise ImportingError(e)
Ejemplo n.º 14
0
 def store_analysis(
     self,
     program: str,
     sourcename: str,
     programversion: str,
     filename: str = None,
     timeexecuted: str = None,
     algorithm: str = None,
     name: str = None,
     description: str = None,
 ) -> Analysis:
     """Store analysis."""
     if isinstance(timeexecuted, str):
         # format is mandatory, e.g.: Oct-16-2016)
         # in settings.py set USE_TZ = False
         try:
             date_format = "%b-%d-%Y"
             timeexecuted = datetime.strptime(timeexecuted, date_format)
         except IntegrityError as e:
             raise ImportingError(e)
     else:
         timeexecuted = datetime.now()
     # create assay object
     try:
         analysis = Analysis.objects.create(
             algorithm=algorithm,
             name=name,
             description=description,
             sourcename=sourcename,
             program=program,
             programversion=programversion,
             timeexecuted=timeexecuted,
         )
         self.store_analysisprop(
             analysis=analysis,
             type_id=self.cvterm_contained_in.cvterm_id,
             value=filename,
         )
     except IntegrityError as e:
         raise ImportingError(e)
     except ObjectDoesNotExist as e:
         raise ImportingError(e)
     return analysis
Ejemplo n.º 15
0
 def store_projectprop(self,
                       project: Project,
                       type_id: int,
                       value: str,
                       rank: int = 0) -> None:
     """Store analysisprop."""
     try:
         projectprop, created = Projectprop.objects.get_or_create(
             project=project, type_id=type_id, value=value, rank=rank)
     except IntegrityError as e:
         raise ImportingError(e)
Ejemplo n.º 16
0
    def store_organism_publication(self, organism: str, doi: str) -> None:
        """Store organism publication."""
        organism_obj = retrieve_organism(organism)

        try:
            doi_obj = Dbxref.objects.get(accession=doi, db__name="DOI")
            pub_obj = Pub.objects.get(PubDbxref_pub_Pub__dbxref=doi_obj)
        except ObjectDoesNotExist:
            raise ImportingError("{} not registered.", doi)

        OrganismPub.objects.get_or_create(organism=organism_obj, pub=pub_obj)
Ejemplo n.º 17
0
    def add_sequence_to_feature(self, seq_obj: SeqRecord, soterm: str) -> None:
        """Store Biopython SeqRecord."""
        try:
            feature_id = retrieve_feature_id(accession=seq_obj.id, soterm=soterm)
        except ObjectDoesNotExist:
            raise ImportingError("The feature {} does NOT exist.".format(seq_obj.id))

        feature_obj = Feature.objects.get(feature_id=feature_id)
        feature_obj.md5 = md5(str(seq_obj.seq).encode()).hexdigest()
        feature_obj.residues = seq_obj.seq
        feature_obj.save()
Ejemplo n.º 18
0
    def store_feature_publication(self, feature: str, soterm: str,
                                  doi: str) -> None:
        """Store feature publication."""
        feature_id = retrieve_feature_id(accession=feature, soterm=soterm)
        try:
            doi_obj = Dbxref.objects.get(accession=doi.lower(), db__name="DOI")
            pub_obj = Pub.objects.get(PubDbxref_pub_Pub__dbxref=doi_obj)
        except ObjectDoesNotExist:
            raise ImportingError("{} not registered.".format(doi))

        FeaturePub.objects.get_or_create(feature_id=feature_id, pub=pub_obj)
Ejemplo n.º 19
0
    def store_biomaterial(
        self,
        name: str,
        filename: str,
        db: str = None,
        acc: str = None,
        organism: Union[str, Organism] = None,
        description: str = None,
    ) -> Biomaterial:
        """Store biomaterial."""
        # db is not mandatory
        try:
            biodb, created = Db.objects.get_or_create(name=db)
        except IntegrityError:
            biodb = None
        # e.g.: acc is the "GSMxxxx" sample accession from GEO
        try:
            biodbxref, created = Dbxref.objects.get_or_create(db=biodb,
                                                              accession=acc)
        except IntegrityError:
            biodbxref = None
        # organism is mandatory
        if isinstance(organism, Organism):
            organism_id = organism.organism_id
        else:
            try:
                self.organism = retrieve_organism(organism)
                organism_id = self.organism.organism_id
            except IntegrityError:
                organism_id = None

        # get cvterm for condition - TODO
        # import required ontology,
        # check http://obi-ontology.org/
        # or: https://www.bioontology.org/
        # #######
        try:
            # made name mandatory (it is not regarding the schema definition)
            biomaterial, created = Biomaterial.objects.get_or_create(
                name=name,
                taxon_id=organism_id,
                dbxref=biodbxref,
                description=description,
                defaults={"biosourceprovider_id": None},
            )
            self.store_biomaterialprop(
                biomaterial=biomaterial,
                type_id=self.cvterm_contained_in.cvterm_id,
                value=filename,
            )
        except IntegrityError as e:
            raise ImportingError(e)
        return biomaterial
Ejemplo n.º 20
0
 def store_project(self, name: str, filename: str) -> Project:
     """Store project."""
     try:
         project, created = Project.objects.get_or_create(name=name)
         self.store_projectprop(
             project=project,
             type_id=self.cvterm_contained_in.cvterm_id,
             value=filename,
         )
     except IntegrityError as e:
         raise ImportingError(e)
     return project
Ejemplo n.º 21
0
 def store_analysisprop(self,
                        analysis: Analysis,
                        type_id: int,
                        value: str = None,
                        rank: int = 0) -> None:
     """Store analysisprop."""
     try:
         Analysisprop.objects.create(analysis=analysis,
                                     type_id=type_id,
                                     value=value,
                                     rank=rank)
     except IntegrityError as e:
         raise ImportingError(e)
Ejemplo n.º 22
0
 def store_assayprop(self,
                     assay: Assay,
                     type_id: int,
                     value: str,
                     rank: int = 0) -> None:
     """Store analysisprop."""
     try:
         Assayprop.objects.get_or_create(assay=assay,
                                         type_id=type_id,
                                         value=value,
                                         rank=rank)
     except IntegrityError as e:
         raise ImportingError(e)
Ejemplo n.º 23
0
    def __init__(self, phylotree_name: str, organism_db: str) -> None:
        """Execute the init function."""
        try:
            Phylotree.objects.get(name=phylotree_name)
            raise ImportingError(
                "Phylotree {} already exists".format(phylotree_name))
        except ObjectDoesNotExist:
            pass

        try:
            self.db, created = Db.objects.get_or_create(name=organism_db)
            dbxref, created = Dbxref.objects.get_or_create(
                db=self.db, accession="taxonomy")
            self.phylotree = Phylotree.objects.create(dbxref=dbxref,
                                                      name=phylotree_name)

            self.level_db, created = Db.objects.get_or_create(
                name="species_taxonomy")
            self.level_cv, created = Cv.objects.get_or_create(name="taxonomy")
            self.level_cvterms: Dict[str, Cvterm] = dict()
        except IntegrityError as e:
            raise ImportingError(e)
Ejemplo n.º 24
0
 def store_biomaterialprop(self,
                           biomaterial: Biomaterial,
                           type_id: int,
                           value: str,
                           rank: int = 0) -> None:
     """Store analysisprop."""
     try:
         biomaterialprop, created = Biomaterialprop.objects.get_or_create(
             biomaterial=biomaterial,
             type_id=type_id,
             value=value,
             rank=rank)
     except IntegrityError as e:
         raise ImportingError(e)
Ejemplo n.º 25
0
 def store_biomaterial_treatment(self,
                                 biomaterial: Biomaterial,
                                 treatment: Treatment,
                                 rank: int = 0) -> None:
     """Store biomaterial_treatment."""
     # treatment and biomaterial are mandatory
     try:
         (
             biomaterialtreatment,
             created,
         ) = BiomaterialTreatment.objects.get_or_create(
             biomaterial=biomaterial, treatment=treatment, rank=rank)
     except IntegrityError as e:
         raise ImportingError(e)
Ejemplo n.º 26
0
 def store_treatment(
     self, name: str, biomaterial: Biomaterial, rank: int = 0
 ) -> Treatment:
     """Store treatment."""
     try:
         treatment, created = Treatment.objects.get_or_create(
             biomaterial=biomaterial,
             type_id=self.cvterm_null.cvterm_id,
             name=name,
             rank=rank,
         )
     except IntegrityError as e:
         raise ImportingError(e)
     return treatment
Ejemplo n.º 27
0
 def store_assay_biomaterial(self,
                             assay: Assay,
                             biomaterial: Biomaterial,
                             rank: int = 0) -> None:
     """Store assay_biomaterial."""
     try:
         (assaybiomaterial,
          created) = AssayBiomaterial.objects.get_or_create(
              assay=assay,
              biomaterial=biomaterial,
              rank=rank,
              defaults={"channel_id": None},
          )
     except IntegrityError as e:
         raise ImportingError(e)
Ejemplo n.º 28
0
 def store_analysisfeature(
     self,
     analysis: Analysis,
     feature: Union[str, Feature],
     organism: Union[str, Organism],
     rawscore: float = None,
     normscore: float = None,
     feature_db: str = "GFF_SOURCE",
     significance: float = None,
     identity: float = None,
 ) -> None:
     """Store analysisfeature (expression counts for a given feature)."""
     if isinstance(organism, Organism):
         pass
     else:
         try:
             organism = retrieve_organism(organism)
         except IntegrityError as e:
             raise ImportingError(e)
     # retrieve feature
     if isinstance(feature, Feature):
         feature_id = feature.feature_id
     else:
         feature_id = retrieve_feature_id(accession=feature, soterm="mRNA")
     # finally create analysisfeature
     try:
         Analysisfeature.objects.create(
             feature_id=feature_id,
             analysis=analysis,
             rawscore=rawscore,
             normscore=normscore,
             significance=significance,
             identity=identity,
         )
     except IntegrityError as e:
         raise ImportingError(e)
Ejemplo n.º 29
0
 def retrieve_subject_from_hsp(self, hsp: hsp.HSP) -> int:
     """Retrieve the subject feature from searchio hsp."""
     try:
         subject_feature_id = retrieve_feature_id(accession=hsp.hit_id,
                                                  soterm=self.so_subject)
     except ObjectDoesNotExist as e1:
         try:
             subject_id = self.retrieve_id_from_description(
                 hsp.hit_description)
             subject_feature_id = retrieve_feature_id(
                 accession=subject_id, soterm=self.so_subject)
         except ObjectDoesNotExist as e2:
             raise ImportingError(
                 e1, e2, "Subject {} {}".format(hsp.hit_id,
                                                hsp.hit_description))
     return subject_feature_id
Ejemplo n.º 30
0
 def retrieve_query_from_hsp(self, hsp: hsp.HSP) -> int:
     """Retrieve the query feature from searchio hsp."""
     try:
         query_feature_id = retrieve_feature_id(accession=hsp.query_id,
                                                soterm=self.so_query)
     except ObjectDoesNotExist as e1:
         try:
             query_id = self.retrieve_id_from_description(
                 hsp.query_description)
             query_feature_id = retrieve_feature_id(accession=query_id,
                                                    soterm=self.so_query)
         except ObjectDoesNotExist as e2:
             raise ImportingError(
                 e1, e2, "Query {} {}".format(hsp.query_id,
                                              hsp.query_description))
     return query_feature_id