def store_quantification(self, analysis: Analysis, assayacc: str, assaydb: str = "SRA") -> None: """Store quantification to link assay accession to analysis.""" # first try to get from Assay dbxref (e.g.: "SRR12345" - from SRA/NCBI) try: db_assay = Db.objects.get(name=assaydb) dbxref_assay = Dbxref.objects.get(accession=assayacc, db=db_assay) assay = Assay.objects.get(dbxref=dbxref_assay) # then searches name except ObjectDoesNotExist: assay = Assay.objects.get(name=assayacc) # then gives up except IntegrityError as e: raise ImportingError(e) try: acquisition = Acquisition.objects.create(assay=assay, name=assayacc) except IntegrityError as e: raise ImportingError(e) try: Quantification.objects.create(acquisition=acquisition, analysis=analysis) except IntegrityError as e: raise ImportingError(e)
def __init__(self, filename: str, doi: str = None, description: str = None, url: str = None) -> None: """Execute the init function.""" # Save DB file info self.db, created = Db.objects.get_or_create(name="FASTA_SOURCE", description=description, url=url) self.filename = filename # Retrieve sequence ontology object self.cvterm_contained_in = Cvterm.objects.get(name="contained in", cv__name="relationship") # Retrieve DOI's Dbxref dbxref_doi = None self.pub_dbxref_doi = None if doi: try: dbxref_doi = Dbxref.objects.get(accession=doi) except ObjectDoesNotExist as e: raise ImportingError(e) try: self.pub_dbxref_doi = PubDbxref.objects.get(dbxref=dbxref_doi) except ObjectDoesNotExist as e: raise ImportingError(e)
def store_biopython_seq_record( self, seq_obj: SeqRecord, soterm: str, organism: str, ignore_residues: bool = False, ) -> None: """Store Biopython SeqRecord.""" soterm_obj = Cvterm.objects.get(name=soterm, cv__name="sequence") organism_obj = retrieve_organism(organism) try: dbxref, created = Dbxref.objects.get_or_create( db=self.db, accession=seq_obj.id) Dbxrefprop.objects.get_or_create( dbxref=dbxref, type_id=self.cvterm_contained_in.cvterm_id, value=self.filename, rank=0, ) retrieve_feature_id(accession=seq_obj.id, soterm=soterm) raise ImportingError("The sequence {} is already " "registered.".format(seq_obj.id)) except ObjectDoesNotExist: residues = seq_obj.seq m = md5(str(seq_obj.seq).encode()).hexdigest() if ignore_residues is True: residues = "" name = None if seq_obj.description != "<unknown description>": name = seq_obj.description # storing feature feature = Feature( dbxref=dbxref, organism=organism_obj, uniquename=seq_obj.id, name=name, residues=residues, seqlen=len(seq_obj.seq), md5checksum=m, type=soterm_obj, is_analysis=False, is_obsolete=False, timeaccessioned=datetime.now(timezone.utc), timelastmodified=datetime.now(timezone.utc), ) feature.save() # DOI: try to link sequence to publication's DOI if feature and self.pub_dbxref_doi: try: FeaturePub.objects.create( feature=feature, pub_id=self.pub_dbxref_doi.pub_id) except IntegrityError as e: raise ImportingError(e)
def __init__(self, filecontent: str, doi: str = None) -> None: """Execute the init function.""" # initialization of lists/sets to store ignored attributes, and # ignored goterms self.db_null, created = Db.objects.get_or_create(name="null") null_dbxref, created = Dbxref.objects.get_or_create( db=self.db_null, accession="null" ) null_cv, created = Cv.objects.get_or_create(name="null") null_cvterm, created = Cvterm.objects.get_or_create( cv=null_cv, name="null", definition="", dbxref=null_dbxref, is_obsolete=0, is_relationshiptype=0, ) if filecontent == "genome": self.filter = VALID_GENOME_ATTRS elif filecontent == "polymorphism": self.filter = VALID_POLYMORPHISM_ATTRS elif filecontent == "qtl": self.filter = VALID_QTL_ATTRS else: raise ImportingError( "Attributes type required: (eg. genome, polymorphism, qtl)" ) # Retrieve DOI's Dbxref dbxref_doi = None pub_dbxref_doi = None if doi: try: dbxref_doi = Dbxref.objects.get(accession=doi) except ObjectDoesNotExist: raise ImportingError("{} not registered.".format(doi)) try: pub_dbxref_doi = PubDbxref.objects.get(dbxref=dbxref_doi) except ObjectDoesNotExist: raise ImportingError("{} not registered.".format(doi)) try: self.pub = Pub.objects.get(pub_id=pub_dbxref_doi.pub_id) except ObjectDoesNotExist: raise ImportingError("{} not registered.".format(doi)) else: self.pub, created = Pub.objects.get_or_create( miniref="null", uniquename="null", type_id=null_cvterm.cvterm_id, is_obsolete=False, ) self.ignored_attrs: Set[str] = set() self.ignored_goterms: Set[str] = set()
def __init__(self, source: str, filename: str, doi: str = None) -> None: """Execute the init function.""" # initialization of lists/sets to store ignored attributes, # ignored goterms, and relationships self.cache: Dict[str, str] = dict() self.usedcache = 0 self.relationships: List[Dict[str, str]] = list() self.ignored_attrs: Set[str] = set() self.ignored_goterms: Set[str] = set() try: self.db, created = Db.objects.get_or_create(name=source.upper()) self.filename = filename except IntegrityError as e: raise ImportingError(e) self.db_null, created = Db.objects.get_or_create(name="null") null_dbxref, created = Dbxref.objects.get_or_create(db=self.db_null, accession="null") null_cv, created = Cv.objects.get_or_create(name="null") null_cvterm, created = Cvterm.objects.get_or_create( cv=null_cv, name="null", definition="", dbxref=null_dbxref, is_obsolete=0, is_relationshiptype=0, ) self.pub, created = Pub.objects.get_or_create( miniref="null", uniquename="null", type_id=null_cvterm.cvterm_id, is_obsolete=False, ) self.cvterm_contained_in = Cvterm.objects.get(name="contained in", cv__name="relationship") self.aa_cvterm = Cvterm.objects.get(name="polypeptide", cv__name="sequence") self.so_term_protein_match = Cvterm.objects.get(name="protein_match", cv__name="sequence") # Retrieve DOI's Dbxref dbxref_doi = None self.pub_dbxref_doi = None if doi: try: dbxref_doi = Dbxref.objects.get(accession=doi) except ObjectDoesNotExist: raise ImportingError("{} not registered.".format(doi)) try: self.pub_dbxref_doi = PubDbxref.objects.get(dbxref=dbxref_doi) except ObjectDoesNotExist: raise ImportingError("{} not registered.".format(doi))
def insert_organism( genus: str, species: str = "spp.", infraspecific_name: str = None, abbreviation: str = None, common_name: str = None, comment: str = None, ) -> None: """Insert organism.""" try: spp = Organism.objects.get(genus=genus, species=species, infraspecific_name=infraspecific_name) if spp is not None: raise ImportingError("Organism already registered ({} {})!".format( genus, species)) except ObjectDoesNotExist: organism = Organism.objects.create( abbreviation=abbreviation, genus=genus, species=species, common_name=common_name, infraspecific_name=infraspecific_name, comment=comment, ) organism.save()
def _nullfields(self, fields: list) -> None: counter = 0 for field in fields: if field is None or field == "": raise ImportingError( "Found null or empty field in position {}".format(counter)) counter += 1
def store_assay_project(self, assay: Assay, project: Project) -> None: """Store assay_project.""" try: assayproject, created = AssayProject.objects.get_or_create( assay=assay, project=project) except IntegrityError as e: raise ImportingError(e)
def handle(self, file: str, verbosity: int = 0, **options): """Execute the main function.""" # get cvterm for contained in try: cvterm = Cvterm.objects.get(name="contained in", cv__name="relationship") except IntegrityError as e: raise ImportingError(e) filename = os.path.basename(file) if verbosity > 0: self.stdout.write("Removing ...") try: FeatureRelationship.objects.filter( FeatureRelationshipprop_feature_relationship_FeatureRelationship__value =filename, FeatureRelationshipprop_feature_relationship_FeatureRelationship__type =cvterm, ).delete() if verbosity > 0: self.stdout.write(self.style.SUCCESS("Done")) except IntegrityError as e: raise CommandError( "It's not possible to delete every record. You must " "delete relationships loaded after '{}' that might " "depend on it. {}".format(filename, e)) except ObjectDoesNotExist: raise CommandError( "Cannot remove '{}' (not registered)".format(filename))
def __init__(self, filecontent: str) -> None: """Execute the init function.""" # initialization of lists/sets to store ignored attributes, and # ignored goterms self.db_null, created = Db.objects.get_or_create(name="null") null_dbxref, created = Dbxref.objects.get_or_create(db=self.db_null, accession="null") null_cv, created = Cv.objects.get_or_create(name="null") null_cvterm, created = Cvterm.objects.get_or_create( cv=null_cv, name="null", definition="", dbxref=null_dbxref, is_obsolete=0, is_relationshiptype=0, ) self.pub, created = Pub.objects.get_or_create( miniref="null", uniquename="null", type_id=null_cvterm.cvterm_id, is_obsolete=False, ) if filecontent == "genome": self.filter = VALID_GENOME_ATTRS elif filecontent == "polymorphism": self.filter = VALID_POLYMORPHISM_ATTRS elif filecontent == "qtl": self.filter = VALID_QTL_ATTRS else: raise ImportingError( "Attributes type required: (eg. genome, polymorphism, qtl)") self.ignored_attrs: Set[str] = set() self.ignored_goterms: Set[str] = set()
def _is_readable(self, file_path: str) -> None: """Check file is readable.""" try: f = open(file_path, "r") f.close() except IOError: raise ImportingError("{} is not readable".format(file_path))
def get_phylonode_by_accession(self, accession: int) -> Phylonode: """Get phylonode by dbxref.accession.""" try: organism = self.get_organism_by_accession(accession) phylonode = Phylonode.objects.get( PhylonodeOrganism_phylonode_Phylonode__organism=organism) except ObjectDoesNotExist as e: raise ImportingError(e) return phylonode
def __init__( self, filename: str, program: str, programversion: str, so_query: str, so_subject: str, org_query: str, org_subject: str, input_format: str, algorithm: str = None, name: str = None, description: str = None, ) -> None: """Execute the init function.""" try: self.org_query = retrieve_organism(org_query) self.org_subject = retrieve_organism(org_subject) self.input_format = input_format self.so_query = so_query self.so_subject = so_subject self.so_term_match_part = Cvterm.objects.get(name="match_part", cv__name="sequence") self.ro_term_similarity = Cvterm.objects.get( name="in similarity relationship with", cv__name="relationship") self.cvterm_contained_in = Cvterm.objects.get( name="contained in", cv__name="relationship") self.analysis_loader = AnalysisLoader() self.analysis = self.analysis_loader.store_analysis( algorithm=algorithm, name=name, description=description, sourcename=filename, filename=filename, program=program, programversion=programversion, timeexecuted=datetime.now(), ) except IntegrityError as e: raise ImportingError(e) except ObjectDoesNotExist as e: raise ImportingError(e)
def store_analysis( self, program: str, sourcename: str, programversion: str, filename: str = None, timeexecuted: str = None, algorithm: str = None, name: str = None, description: str = None, ) -> Analysis: """Store analysis.""" if isinstance(timeexecuted, str): # format is mandatory, e.g.: Oct-16-2016) # in settings.py set USE_TZ = False try: date_format = "%b-%d-%Y" timeexecuted = datetime.strptime(timeexecuted, date_format) except IntegrityError as e: raise ImportingError(e) else: timeexecuted = datetime.now() # create assay object try: analysis = Analysis.objects.create( algorithm=algorithm, name=name, description=description, sourcename=sourcename, program=program, programversion=programversion, timeexecuted=timeexecuted, ) self.store_analysisprop( analysis=analysis, type_id=self.cvterm_contained_in.cvterm_id, value=filename, ) except IntegrityError as e: raise ImportingError(e) except ObjectDoesNotExist as e: raise ImportingError(e) return analysis
def store_projectprop(self, project: Project, type_id: int, value: str, rank: int = 0) -> None: """Store analysisprop.""" try: projectprop, created = Projectprop.objects.get_or_create( project=project, type_id=type_id, value=value, rank=rank) except IntegrityError as e: raise ImportingError(e)
def store_organism_publication(self, organism: str, doi: str) -> None: """Store organism publication.""" organism_obj = retrieve_organism(organism) try: doi_obj = Dbxref.objects.get(accession=doi, db__name="DOI") pub_obj = Pub.objects.get(PubDbxref_pub_Pub__dbxref=doi_obj) except ObjectDoesNotExist: raise ImportingError("{} not registered.", doi) OrganismPub.objects.get_or_create(organism=organism_obj, pub=pub_obj)
def add_sequence_to_feature(self, seq_obj: SeqRecord, soterm: str) -> None: """Store Biopython SeqRecord.""" try: feature_id = retrieve_feature_id(accession=seq_obj.id, soterm=soterm) except ObjectDoesNotExist: raise ImportingError("The feature {} does NOT exist.".format(seq_obj.id)) feature_obj = Feature.objects.get(feature_id=feature_id) feature_obj.md5 = md5(str(seq_obj.seq).encode()).hexdigest() feature_obj.residues = seq_obj.seq feature_obj.save()
def store_feature_publication(self, feature: str, soterm: str, doi: str) -> None: """Store feature publication.""" feature_id = retrieve_feature_id(accession=feature, soterm=soterm) try: doi_obj = Dbxref.objects.get(accession=doi.lower(), db__name="DOI") pub_obj = Pub.objects.get(PubDbxref_pub_Pub__dbxref=doi_obj) except ObjectDoesNotExist: raise ImportingError("{} not registered.".format(doi)) FeaturePub.objects.get_or_create(feature_id=feature_id, pub=pub_obj)
def store_biomaterial( self, name: str, filename: str, db: str = None, acc: str = None, organism: Union[str, Organism] = None, description: str = None, ) -> Biomaterial: """Store biomaterial.""" # db is not mandatory try: biodb, created = Db.objects.get_or_create(name=db) except IntegrityError: biodb = None # e.g.: acc is the "GSMxxxx" sample accession from GEO try: biodbxref, created = Dbxref.objects.get_or_create(db=biodb, accession=acc) except IntegrityError: biodbxref = None # organism is mandatory if isinstance(organism, Organism): organism_id = organism.organism_id else: try: self.organism = retrieve_organism(organism) organism_id = self.organism.organism_id except IntegrityError: organism_id = None # get cvterm for condition - TODO # import required ontology, # check http://obi-ontology.org/ # or: https://www.bioontology.org/ # ####### try: # made name mandatory (it is not regarding the schema definition) biomaterial, created = Biomaterial.objects.get_or_create( name=name, taxon_id=organism_id, dbxref=biodbxref, description=description, defaults={"biosourceprovider_id": None}, ) self.store_biomaterialprop( biomaterial=biomaterial, type_id=self.cvterm_contained_in.cvterm_id, value=filename, ) except IntegrityError as e: raise ImportingError(e) return biomaterial
def store_project(self, name: str, filename: str) -> Project: """Store project.""" try: project, created = Project.objects.get_or_create(name=name) self.store_projectprop( project=project, type_id=self.cvterm_contained_in.cvterm_id, value=filename, ) except IntegrityError as e: raise ImportingError(e) return project
def store_analysisprop(self, analysis: Analysis, type_id: int, value: str = None, rank: int = 0) -> None: """Store analysisprop.""" try: Analysisprop.objects.create(analysis=analysis, type_id=type_id, value=value, rank=rank) except IntegrityError as e: raise ImportingError(e)
def store_assayprop(self, assay: Assay, type_id: int, value: str, rank: int = 0) -> None: """Store analysisprop.""" try: Assayprop.objects.get_or_create(assay=assay, type_id=type_id, value=value, rank=rank) except IntegrityError as e: raise ImportingError(e)
def __init__(self, phylotree_name: str, organism_db: str) -> None: """Execute the init function.""" try: Phylotree.objects.get(name=phylotree_name) raise ImportingError( "Phylotree {} already exists".format(phylotree_name)) except ObjectDoesNotExist: pass try: self.db, created = Db.objects.get_or_create(name=organism_db) dbxref, created = Dbxref.objects.get_or_create( db=self.db, accession="taxonomy") self.phylotree = Phylotree.objects.create(dbxref=dbxref, name=phylotree_name) self.level_db, created = Db.objects.get_or_create( name="species_taxonomy") self.level_cv, created = Cv.objects.get_or_create(name="taxonomy") self.level_cvterms: Dict[str, Cvterm] = dict() except IntegrityError as e: raise ImportingError(e)
def store_biomaterialprop(self, biomaterial: Biomaterial, type_id: int, value: str, rank: int = 0) -> None: """Store analysisprop.""" try: biomaterialprop, created = Biomaterialprop.objects.get_or_create( biomaterial=biomaterial, type_id=type_id, value=value, rank=rank) except IntegrityError as e: raise ImportingError(e)
def store_biomaterial_treatment(self, biomaterial: Biomaterial, treatment: Treatment, rank: int = 0) -> None: """Store biomaterial_treatment.""" # treatment and biomaterial are mandatory try: ( biomaterialtreatment, created, ) = BiomaterialTreatment.objects.get_or_create( biomaterial=biomaterial, treatment=treatment, rank=rank) except IntegrityError as e: raise ImportingError(e)
def store_treatment( self, name: str, biomaterial: Biomaterial, rank: int = 0 ) -> Treatment: """Store treatment.""" try: treatment, created = Treatment.objects.get_or_create( biomaterial=biomaterial, type_id=self.cvterm_null.cvterm_id, name=name, rank=rank, ) except IntegrityError as e: raise ImportingError(e) return treatment
def store_assay_biomaterial(self, assay: Assay, biomaterial: Biomaterial, rank: int = 0) -> None: """Store assay_biomaterial.""" try: (assaybiomaterial, created) = AssayBiomaterial.objects.get_or_create( assay=assay, biomaterial=biomaterial, rank=rank, defaults={"channel_id": None}, ) except IntegrityError as e: raise ImportingError(e)
def store_analysisfeature( self, analysis: Analysis, feature: Union[str, Feature], organism: Union[str, Organism], rawscore: float = None, normscore: float = None, feature_db: str = "GFF_SOURCE", significance: float = None, identity: float = None, ) -> None: """Store analysisfeature (expression counts for a given feature).""" if isinstance(organism, Organism): pass else: try: organism = retrieve_organism(organism) except IntegrityError as e: raise ImportingError(e) # retrieve feature if isinstance(feature, Feature): feature_id = feature.feature_id else: feature_id = retrieve_feature_id(accession=feature, soterm="mRNA") # finally create analysisfeature try: Analysisfeature.objects.create( feature_id=feature_id, analysis=analysis, rawscore=rawscore, normscore=normscore, significance=significance, identity=identity, ) except IntegrityError as e: raise ImportingError(e)
def retrieve_subject_from_hsp(self, hsp: hsp.HSP) -> int: """Retrieve the subject feature from searchio hsp.""" try: subject_feature_id = retrieve_feature_id(accession=hsp.hit_id, soterm=self.so_subject) except ObjectDoesNotExist as e1: try: subject_id = self.retrieve_id_from_description( hsp.hit_description) subject_feature_id = retrieve_feature_id( accession=subject_id, soterm=self.so_subject) except ObjectDoesNotExist as e2: raise ImportingError( e1, e2, "Subject {} {}".format(hsp.hit_id, hsp.hit_description)) return subject_feature_id
def retrieve_query_from_hsp(self, hsp: hsp.HSP) -> int: """Retrieve the query feature from searchio hsp.""" try: query_feature_id = retrieve_feature_id(accession=hsp.query_id, soterm=self.so_query) except ObjectDoesNotExist as e1: try: query_id = self.retrieve_id_from_description( hsp.query_description) query_feature_id = retrieve_feature_id(accession=query_id, soterm=self.so_query) except ObjectDoesNotExist as e2: raise ImportingError( e1, e2, "Query {} {}".format(hsp.query_id, hsp.query_description)) return query_feature_id