def create_reference(self): """ Create wikidata references for interpro Items: Q3047275: InterPro Properties: stated in (P248) imported from (P143) software version (P348) publication date (P577) """ # This same reference will be used for everything. Except for a ref to the interpro item itself ref_stated_in = PBB_Core.WDItemID("Q3047275", 'P248', is_reference=True) ref_imported = PBB_Core.WDItemID("Q3047275", 'P143', is_reference=True) ref_version = PBB_Core.WDString(self.version, 'P348', is_reference=True) ref_date = PBB_Core.WDTime(self.date.strftime("+%Y-%m-%dT00:00:00Z"), 'P577', is_reference=True) ref_ipr = PBB_Core.WDString(self.id, "P2926", is_reference=True) self.reference = [ ref_stated_in, ref_imported, ref_version, ref_date, ref_ipr ] for ref in self.reference: ref.overwrite_references = True
def create_reference(self): ref_stated_in = PBB_Core.WDItemID(self.MONDO_WDID, 'P248', is_reference=True) ref_retrieved = PBB_Core.WDTime(self.retrieved.strftime('+%Y-%m-%dT00:00:00Z'), 'P813', is_reference=True) # interpro ID #ref_archive_url = PBB_Core.WDUrl(self.ref_url, 'P1065', is_reference=True) #reference = [ref_stated_in, ref_retrieved, ref_archive_url] reference = [ref_stated_in, ref_retrieved] self.reference = reference
def make_ref_source(source_doc, id_prop, identifier, login=None): """ Reference is made up of: stated_in: if the source has a release #: release edition else, stated in the source link to id: link to identifier in source retrieved: only if source has no release # login: must be passed if you want to be able to create new release items :param source_doc: :param id_prop: :param identifier: :return: """ # source_doc = {'_id': 'uniprot', 'timestamp': '20161006'} # source_doc = {'_id': 'ensembl', 'release': 86, 'timestamp': '20161005'} source = source_doc['_id'] if source not in source_items: raise ValueError( "Unknown source for reference creation: {}".format(source)) if id_prop not in prop_ids: raise ValueError( "Unknown id_prop for reference creation: {}".format(id_prop)) link_to_id = PBB_Core.WDString(value=str(identifier), prop_nr=prop_ids[id_prop], is_reference=True) if "release" in source_doc: source_doc['release'] = str(source_doc['release']) title = "{} Release {}".format(source_doc['_id'], source_doc['release']) description = "Release {} of {}".format(source_doc['release'], source_doc['_id']) edition_of_wdid = source_items[source_doc['_id']] release = PBB_Helpers.Release( title, description, source_doc['release'], edition_of_wdid=edition_of_wdid).get_or_create(login) stated_in = PBB_Core.WDItemID(value=release, prop_nr='P248', is_reference=True) reference = [stated_in, link_to_id] else: date_string = source_doc['timestamp'] retrieved = datetime.strptime(date_string, "%Y%m%d") stated_in = PBB_Core.WDItemID(value=source_items[source], prop_nr='P248', is_reference=True) retrieved = PBB_Core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) reference = [stated_in, retrieved, link_to_id] return reference
def make_ref(retrieved, genome_id): refs = [ PBB_Core.WDItemID(value='Q20641742', prop_nr='P248', is_reference=True), # stated in ncbi gene PBB_Core.WDString(value=genome_id, prop_nr='P2249', is_reference=True), # Link to Refseq Genome ID PBB_Core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) ] return refs
def make_reference(source, id_prop, identifier, retrieved): reference = [ PBB_Core.WDItemID(value=source_items[source], prop_nr='P248', is_reference=True), # stated in PBB_Core.WDString(value=str(identifier), prop_nr=prop_ids[id_prop], is_reference=True), # Link to ID PBB_Core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) ] return reference
def create_protein_ipr(uniprot_id, uniprot_wdid, families, has_part, release_info, login): """ Create interpro relationships to one protein :param uniprot_id: uniprot ID of the protein to modify :type uniprot_id: str :param uniprot_wdid: wikidata ID of the protein :param families: list of ipr wd ids the protein is a (P279) subclass of :param has_part: list of ipr wd ids the protein has (P527) has part :return: """ date = release_info['date'] version = release_info['version'] # create ref ref_stated_in = PBB_Core.WDItemID("Q3047275", 'P248', is_reference=True) ref_imported = PBB_Core.WDItemID("Q3047275", 'P143', is_reference=True) ref_version = PBB_Core.WDString(version, 'P348', is_reference=True) ref_date = PBB_Core.WDTime(date.strftime("+%Y-%m-%dT00:00:00Z"), 'P577', is_reference=True) ref_ipr = PBB_Core.WDString( "http://www.ebi.ac.uk/interpro/protein/{}".format(uniprot_id), "P854", is_reference=True) reference = [ref_stated_in, ref_imported, ref_version, ref_date, ref_ipr] for ref in reference: ref.overwrite_references = True statements = [] if families: for f in families: statements.append( PBB_Core.WDItemID(value=f, prop_nr='P279', references=[reference])) if has_part: for hp in has_part: statements.append( PBB_Core.WDItemID(value=hp, prop_nr='P527', references=[reference])) item = PBB_Core.WDItemEngine(wd_item_id=uniprot_wdid, data=statements, server=SERVER, append_value=["P279", "P527", "P361"]) # print(item.get_wd_json_representation()) try: item.write(login) except WDApiError as e: print(e) PBB_Core.WDItemEngine.log( 'ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id=uniprot_id, exception_type=type(e), message=e.__str__(), wd_id=uniprot_wdid, duration=datetime.now())) return PBB_Core.WDItemEngine.log( 'INFO', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'. format(main_data_id=uniprot_id, exception_type='', message='created protein interpro relationships: {}'.format([ (x.prop_nr, x.value) for x in statements ]), wd_id=uniprot_wdid, duration=datetime.now()))
def __init__(self, object): """ :type self: object """ self.start = object["start"] self.entrezgene = object["entrezgene"] self.uniprotwikidataids = object["uniprotwikidataids"] gene_annotations = self.annotate_gene() self.genomeInfo = object["speciesInfo"][str(gene_annotations['taxid'])] self.content = object self.name = gene_annotations["name"] self.logincreds = object["logincreds"] if "_timestamp" in gene_annotations.keys(): self.annotationstimestamp = gene_annotations["_timestamp"] self.wdid = object["wdid"] # symbol: self.symbol = gene_annotations["symbol"] print(self.symbol) # HGNC if "HGNC" in gene_annotations: if isinstance(gene_annotations["HGNC"], list): self.hgnc = gene_annotations["HGNC"] else: self.hgnc = [gene_annotations["HGNC"]] else: self.hgnc = None # Ensembl Gene & transcript if "ensembl" in gene_annotations: if "gene" in gene_annotations["ensembl"]: if isinstance(gene_annotations["ensembl"]["gene"], list): self.ensembl_gene = gene_annotations["ensembl"]["gene"] else: self.ensembl_gene = [gene_annotations["ensembl"]["gene"]] else: self.ensembl_gene = None if "transcript" in gene_annotations["ensembl"]: if isinstance(gene_annotations["ensembl"]["transcript"], list): self.ensembl_transcript = gene_annotations["ensembl"]["transcript"] else: self.ensembl_transcript = [gene_annotations["ensembl"]["transcript"]] else: self.ensembl_transcript = None # Homologene if "homologene" in gene_annotations: if isinstance(gene_annotations["homologene"]["id"], list): self.homologene = [str(i) for i in gene_annotations["homologene"]["id"]] else: self.homologene = [str(gene_annotations["homologene"]["id"])] else: self.homologene = None # Refseq if "refseq" in gene_annotations: if "rna" in gene_annotations["refseq"]: if isinstance(gene_annotations["refseq"]["rna"], list): self.refseq_rna = gene_annotations["refseq"]["rna"] else: self.refseq_rna = [gene_annotations["refseq"]["rna"]] else: self.refseq_rna = None else: self.refseq_rna = None # MGI if "MGI" in gene_annotations: if isinstance(gene_annotations["MGI"], list): self.MGI = gene_annotations["MGI"] else: self.MGI = [gene_annotations["MGI"]] else: self.MGI = None self.chromosome = None self.startpost = None self.endpos = None if "genomic_pos" in gene_annotations: if isinstance(gene_annotations["genomic_pos"], list): self.chromosome = [] self.startpos = [] self.endpos = [] for i in range(len(gene_annotations["genomic_pos"])): if gene_annotations["genomic_pos"][i]["chr"] in ProteinBoxBotKnowledge.chromosomes[ self.genomeInfo["name"]].keys(): self.chromosome.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][ gene_annotations["genomic_pos"][i]["chr"]]) self.startpos.append(gene_annotations["genomic_pos"][i]["start"]) self.endpos.append(gene_annotations["genomic_pos"][i]["end"]) else: self.chromosome = [] self.startpos = [] self.endpos = [] if gene_annotations["genomic_pos"]["chr"] in ProteinBoxBotKnowledge.chromosomes[ self.genomeInfo["name"]].keys(): self.chromosome.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][ gene_annotations["genomic_pos"]["chr"]]) self.startpos.append(gene_annotations["genomic_pos"]["start"]) self.endpos.append(gene_annotations["genomic_pos"]["end"]) self.encodes = None if "uniprot" in gene_annotations.keys(): if "Swiss-Prot" in gene_annotations["uniprot"].keys(): if isinstance(gene_annotations["uniprot"]["Swiss-Prot"], list): self.encodes = [] for uniprot in gene_annotations["uniprot"]["Swiss-Prot"]: self.encodes.append(uniprot) else: self.encodes = [gene_annotations["uniprot"]["Swiss-Prot"]] self.chromosomeHg19 = None self.startposHg19 = None self.endposHg19 = None if "genomic_pos_hg19" in gene_annotations: if isinstance(gene_annotations["genomic_pos_hg19"], list): self.chromosomeHg19 = [] self.startposHg19 = [] self.endposHg19 = [] for i in range(len(gene_annotations["genomic_pos_hg19"])): if gene_annotations["genomic_pos_hg19"][i]["chr"] in ProteinBoxBotKnowledge.chromosomes[ self.genomeInfo["name"]].keys(): self.chromosomeHg19.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][ gene_annotations["genomic_pos_hg19"][i]["chr"]]) self.startposHg19.append(gene_annotations["genomic_pos_hg19"][i]["start"]) self.endposHg19.append(gene_annotations["genomic_pos_hg19"][i]["end"]) else: self.chromosomeHg19 = [] self.startposHg19 = [] self.endposHg19 = [] if gene_annotations["genomic_pos_hg19"]["chr"] in ProteinBoxBotKnowledge.chromosomes[ self.genomeInfo["name"]].keys(): self.chromosomeHg19.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][ gene_annotations["genomic_pos_hg19"]["chr"]]) self.startposHg19.append(gene_annotations["genomic_pos_hg19"]["start"]) self.endposHg19.append(gene_annotations["genomic_pos_hg19"]["end"]) # type of Gene if "type_of_gene" in gene_annotations: self.type_of_gene = [] if gene_annotations["type_of_gene"] == "ncRNA": self.type_of_gene.append("Q427087") if gene_annotations["type_of_gene"] == "snRNA": self.type_of_gene.append("Q284578") if gene_annotations["type_of_gene"] == "snoRNA": self.type_of_gene.append("Q284416") if gene_annotations["type_of_gene"] == "rRNA": self.type_of_gene.append("Q215980") if gene_annotations["type_of_gene"] == "tRNA": self.type_of_gene.append("Q201448") if gene_annotations["type_of_gene"] == "pseudo": self.type_of_gene.append("Q277338") if gene_annotations["type_of_gene"] == "protein-coding": self.type_of_gene.append("Q20747295") else: self.type_of_gene = None # Reference section # Prepare references refStatedIn = PBB_Core.WDItemID(value=self.genomeInfo["release"], prop_nr='P248', is_reference=True) refStatedIn.overwrite_references = True refImported = PBB_Core.WDItemID(value='Q20641742', prop_nr='P143', is_reference=True) refImported.overwrite_references = True timeStringNow = strftime("+%Y-%m-%dT00:00:00Z", gmtime()) refRetrieved = PBB_Core.WDTime(timeStringNow, prop_nr='P813', is_reference=True) refRetrieved.overwrite_references = True gene_reference = [refStatedIn, refImported, refRetrieved] refStatedInEnsembl = PBB_Core.WDItemID(value= 'Q21996330', prop_nr='P248', is_reference=True) refStatedInEnsembl.overwrite_references = True refImportedEnsembl = PBB_Core.WDItemID(value='Q1344256', prop_nr='P143', is_reference=True) refImportedEnsembl.overwrite_references = True ensembl_reference = [refStatedInEnsembl, refImportedEnsembl, refRetrieved] genomeBuildQualifier = PBB_Core.WDItemID(value=self.genomeInfo["genome_assembly"], prop_nr='P659', is_qualifier=True) genomeBuildPreviousQualifier = PBB_Core.WDItemID(value=self.genomeInfo["genome_assembly_previous"], prop_nr='P659', is_qualifier=True) prep = dict() prep['P703'] = [PBB_Core.WDItemID(value=self.genomeInfo['wdid'], prop_nr='P703', references=[copy.deepcopy(gene_reference)])] if self.genomeInfo["name"] == "human": prep['P353'] = [ PBB_Core.WDString(value=self.symbol, prop_nr='P353', references=[copy.deepcopy(gene_reference)])] prep['P351'] = [ PBB_Core.WDString(value=str(self.entrezgene), prop_nr='P351', references=[copy.deepcopy(gene_reference)])] prep['P279'] = [PBB_Core.WDItemID(value='Q7187', prop_nr='P279', references=[copy.deepcopy(gene_reference)])] if "type_of_gene" in vars(self): if self.type_of_gene != None: for i in range(len(self.type_of_gene)): prep['P279'].append(PBB_Core.WDItemID(value=self.type_of_gene[i], prop_nr='P279', references=[copy.deepcopy(gene_reference)])) if "ensembl_gene" in vars(self): if self.ensembl_gene != None: prep['P594'] = [] for ensemblg in self.ensembl_gene: prep['P594'].append( PBB_Core.WDString(value=ensemblg, prop_nr='P594', references=[copy.deepcopy(gene_reference)])) if "ensembl_transcript" in vars(self): if self.ensembl_transcript != None: prep['P704'] = [] for ensemblt in self.ensembl_transcript: prep['P704'].append( PBB_Core.WDString(value=ensemblt, prop_nr='P704', references=[copy.deepcopy(gene_reference)])) if "encodes" in vars(self): if self.encodes != None: prep['P688'] = [] for uniprot in self.encodes: if uniprot in self.uniprotwikidataids.keys(): prep['P688'].append(PBB_Core.WDItemID(value=self.uniprotwikidataids[uniprot], prop_nr='P688', references=[copy.deepcopy(gene_reference)])) if "hgnc" in vars(self): if self.hgnc != None: prep['P354'] = [] for hugo in self.hgnc: prep['P354'].append( PBB_Core.WDString(value=hugo, prop_nr='P354', references=[copy.deepcopy(gene_reference)])) if "homologene" in vars(self): if self.homologene != None: prep['P593'] = [] for ortholog in self.homologene: prep['P593'].append( PBB_Core.WDString(value=ortholog, prop_nr='P593', references=[copy.deepcopy(gene_reference)])) if "refseq_rna" in vars(self): if self.refseq_rna != None: prep['P639'] = [] for refseq in self.refseq_rna: prep['P639'].append( PBB_Core.WDString(value=refseq, prop_nr='P639', references=[copy.deepcopy(gene_reference)])) if "chromosome" in vars(self): prep['P1057'] = [] if self.chromosome != None: for chrom in list(set(self.chromosome)): prep['P1057'].append( PBB_Core.WDItemID(value=chrom, prop_nr='P1057', references=[copy.deepcopy(gene_reference)])) if "startpos" in vars(self): if not 'P644' in prep.keys(): prep['P644'] = [] if self.startpos != None: for pos in self.startpos: prep['P644'].append( PBB_Core.WDString(value=str(pos), prop_nr='P644', references=[copy.deepcopy(ensembl_reference)], qualifiers=[copy.deepcopy(genomeBuildQualifier)])) if "endpos" in vars(self): if not 'P645' in prep.keys(): prep['P645'] = [] if self.endpos != None: for pos in self.endpos: prep['P645'].append( PBB_Core.WDString(value=str(pos), prop_nr='P645', references=[copy.deepcopy(ensembl_reference)], qualifiers=[copy.deepcopy(genomeBuildQualifier)])) if "startposHg19" in vars(self): if not 'P644' in prep.keys(): prep['P644'] = [] if self.startposHg19 != None: for pos in self.startposHg19: prep['P644'].append( PBB_Core.WDString(value=str(pos), prop_nr='P644', references=[copy.deepcopy(ensembl_reference)], qualifiers=[copy.deepcopy(genomeBuildPreviousQualifier)])) if "endposHg19" in vars(self): if not 'P644' in prep.keys(): prep['P645'] = [] if self.endposHg19 != None: for pos in self.endposHg19: prep['P645'].append( PBB_Core.WDString(value=str(pos), prop_nr='P645', references=[copy.deepcopy(ensembl_reference)], qualifiers=[copy.deepcopy(genomeBuildPreviousQualifier)])) if "MGI" in vars(self): prep['P671'] = [] if self.MGI != None: for mgi in self.MGI: prep['P671'].append(PBB_Core.WDString(value=mgi, prop_nr='P671', references=[copy.deepcopy(gene_reference)])) if "alias" in gene_annotations.keys(): if isinstance(gene_annotations["alias"], list): self.synonyms = [] for alias in gene_annotations["alias"]: self.synonyms.append(alias) else: self.synonyms = [gene_annotations["alias"]] self.synonyms.append(self.symbol) print(self.synonyms) else: self.synonyms = None data2add = [] for key in prep.keys(): for statement in prep[key]: data2add.append(statement) print(statement.prop_nr, statement.value) if self.wdid != None: # if self.encodes != None: wdPage = PBB_Core.WDItemEngine(self.wdid, item_name=self.name, data=data2add, server="www.wikidata.org", domain="genes") if wdPage.get_description() == "": wdPage.set_description(description=self.genomeInfo['name'] + ' gene', lang='en') if wdPage.get_description(lang='fr') == "" or wdPage.get_description(lang='fr') == "gène": wdPage.set_description(description="Un gène " + self.genomeInfo['fr-name'], lang='fr') if wdPage.get_description(lang='nl') == "" or wdPage.get_description(lang='nl') == "gen": wdPage.set_description(description="Een "+ self.genomeInfo['nl-name']+ " gen", lang='nl') if self.synonyms != None: wdPage.set_aliases(aliases=self.synonyms, lang='en', append=True) print(self.wdid) self.wd_json_representation = wdPage.get_wd_json_representation() PBB_Debug.prettyPrint(self.wd_json_representation) PBB_Debug.prettyPrint(data2add) # print(self.wd_json_representation) wdPage.write(self.logincreds) print("aa") else: #if self.encodes != None: wdPage = PBB_Core.WDItemEngine(item_name=self.name, data=data2add, server="www.wikidata.org", domain="genes") if wdPage.get_description() != "": wdPage.set_description(description=self.genomeInfo['name'] + ' gene', lang='en') if wdPage.get_description(lang='fr') == "" or wdPage.get_description(lang='fr') == "gène": wdPage.setdescription(description="Un gène " + self.genomeInfo['fr-name'], lang='fr') if wdPage.get_description(lang='nl') == "" or wdPage.get_description(lang='nl') == "gen": wdPage.setdescription(description="Een "+ self.genomeInfo['nl-name']+ " gen", lang='nl') if self.synonyms != None: wdPage.set_aliases(aliases=self.synonyms, lang='en', append=True) self.wd_json_representation = wdPage.get_wd_json_representation() PBB_Debug.prettyPrint(self.wd_json_representation) PBB_Debug.prettyPrint(data2add) # print(self.wd_json_representation) self.wdid = wdPage.write(self.logincreds) PBB_Core.WDItemEngine.log('INFO', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'.format( main_data_id=str(self.entrezgene), exception_type='', message=f.name, wd_id=self.wdid, duration=time.time()-self.start ))