def reference_store(source='', identifier=''): """ :param source: database source to be referenced (key name from source_qids) :param ref_type: type of WD reference statement (imported from, stated in) (key names from prop_ids) :return: PBB_Core reference object for database source """ source_items = {'uniprot': 'Q905695', 'ncbi_gene': 'Q20641742', 'ncbi_taxonomy': 'Q13711410', 'swiss_prot': 'Q2629752', 'trembl': 'Q22935315'} prop_ids = {'uniprot': 'P352', 'ncbi_gene': 'P351', 'ncbi_taxonomy': 'P685', 'ncbi_locus_tag': 'P2393' } refs = [PBB_Core.WDItemID(value=source_items[source], prop_nr='P248', is_reference=True), PBB_Core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True), PBB_Core.WDString(value=identifier, prop_nr=prop_ids[source], is_reference=True), PBB_Core.WDTime(str(strftime("+%Y-%m-%dT00:00:00Z", gmtime())), prop_nr='P813', is_reference=True) ] for ref in refs: ref.overwrite_references = True return refs
def make_reference(self, stated_in, source_element, source_element_name, source_element_prop, date=time.strftime('+%Y-%m-%dT00:00:00Z'), date_property='P813'): ref = [[ PBB_Core.WDItemID(value=stated_in, prop_nr='P248', is_reference=True), # stated in PBB_Core.WDString(value=source_element, prop_nr=source_element_prop, is_reference=True), # source element PBB_Core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True), # language of work PBB_Core.WDMonolingualText(value=source_element_name, language='en', prop_nr='P1476', is_reference=True), PBB_Core.WDTime(time=date, prop_nr=date_property, is_reference=True) # publication date ]] # this will overwrite all existing references of a WD claim value. for x in ref[0]: x.overwrite_references = True return ref
def protein_item_statements(): """ construct list of referenced statements to pass to PBB_Core Item engine :return: """ uniprot_ref = wdo.reference_store(source='uniprot', identifier=uniprot) WD_String_CLAIMS = { 'P637': str(gene_record['refseq']['protein']), # set refseq protein id 'P352': uniprot # Set uniprot ID } WD_Item_CLAIMS = { 'P703': [spec_strain.iloc[0]['wd_qid'] ], # get strain taxid qid from strain record 'P279': ['Q8054'], # subclass of protein } statements = [] #generate go term claims for gt in gene_record['GOTERMS']: goprop = go_props[gt[1]] govalue = wdo.WDSparqlQueries( prop='P686', string=gt[0]).wd_prop2qid() # Get GeneOntology Item by GO ID evprop = 'P459' try: evvalue = go_evidence_codes[gt[2]] evstat = PBB_Core.WDItemID(value=evvalue, prop_nr=evprop, is_qualifier=True) statements.append( PBB_Core.WDItemID(value=govalue, prop_nr=goprop, references=[uniprot_ref], qualifiers=[evstat])) except Exception as e: statements.append( PBB_Core.WDItemID(value=govalue, prop_nr=goprop, references=[uniprot_ref])) # generate list of pbb core value objects for all valid claims for k, v in WD_Item_CLAIMS.items(): if v: for i in v: statements.append( PBB_Core.WDItemID(value=i, prop_nr=k, references=[uniprot_ref])) for k, v in WD_String_CLAIMS.items(): if v: statements.append( PBB_Core.WDString(value=v, prop_nr=k, references=[uniprot_ref])) return statements
def gene_item_statements(): """ construct list of referenced statements to past to PBB_Core Item engine :return: """ # creates reference object for WD gene item claim ncbi_gene_reference = wdo.reference_store( source='ncbi_gene', identifier=gene_record['_id']) # claims for datatype string. WD_String_CLAIMS = { 'P351': str(gene_record['_id']), 'P2393': gene_record['locus_tag'], } WD_Genome_Annotation_Claims = { 'P644': str(int(gene_record['genomic_pos']['start'])), 'P645': str(int(gene_record['genomic_pos']['end'])), } # claims for datytpe item WD_Item_CLAIMS = { 'P703': spec_strain.iloc[0]['wd_qid'], 'P279': 'Q7187', } # convert integer representation of strand to corresponding WD item (Forward Strand/Reverse Strand) if gene_record['genomic_pos']['strand'] == 1: WD_Item_CLAIMS['P2548'] = 'Q22809680' elif gene_record['genomic_pos']['strand'] == -1: WD_Item_CLAIMS['P2548'] = 'Q22809711' chromosome = gene_record['genomic_pos']['chr'] rs_chrom = PBB_Core.WDString(value=chromosome, prop_nr='P2249', is_qualifier=True) statements = [] # process to pbb_Core data value object and append to statments for each valid item in each datatype dict # WDItemID datatype for k, v in WD_Item_CLAIMS.items(): statements.append( PBB_Core.WDItemID(value=v, prop_nr=k, references=[ncbi_gene_reference])) # WDString datatype for k, v in WD_String_CLAIMS.items(): statements.append( PBB_Core.WDString(value=v, prop_nr=k, references=[ncbi_gene_reference])) for k, v in WD_Genome_Annotation_Claims.items(): statements.append( PBB_Core.WDString(value=v, prop_nr=k, references=[ncbi_gene_reference], qualifiers=[rs_chrom])) return statements
def create_reference(self): first_ref = PBB_Core.WDItemID(value='Q905695', prop_nr='P248', is_reference=True) first_ref.overwrite_references = True return [ first_ref, PBB_Core.WDString(value=self.uniprot, prop_nr='P352', is_reference=True), PBB_Core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z', time.gmtime()), prop_nr='P813', is_reference=True), PBB_Core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True), # language of work ]
def create_xref_statement(self, value, xref_dict): for prop_nr, v in xref_dict.items(): qualifiers = [] if v: for p, vv in v.items(): qualifiers.append( PBB_Core.WDItemID(value=vv, prop_nr=p, is_qualifier=True)) return PBB_Core.WDItemID(value=value, prop_nr=prop_nr, qualifiers=qualifiers, references=[self.create_reference()])
def create_reference(self): return [ PBB_Core.WDItemID(value=self.ontology_ref_item, prop_nr='P248', is_reference=True), PBB_Core.WDItemID(value='Q22230760', prop_nr='P143', is_reference=True), PBB_Core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z', time.gmtime()), prop_nr='P813', is_reference=True), PBB_Core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True), # language of work ]
def __init__(self, object): self.logincreds = object["logincreds"] self.name = object["uberonLabel"] self.uberon = object["uberon"] self.uberon_id = self.uberon.replace("http://purl.obolibrary.org/obo/UBERON_", "") self.wikidata_id = object["wikidata_id"] self.start = object["start"] self.graph = object["graph"] subcls = URIRef("http://www.w3.org/2000/01/rdf-schema#subClassOf") id = URIRef("http://www.geneontology.org/formats/oboInOwl#id") hasExactSyn = URIRef("http://www.geneontology.org/formats/oboInOwl#hasExactSynonym") print(self.uberon_id) print(self.name) refStatedIn = PBB_Core.WDItemID(21552738, prop_nr='P248', is_reference=True) refStatedIn.overwrite_references = True refImported = PBB_Core.WDItemID(value=7876491, prop_nr='P143', is_reference=True) refImported.overwrite_references = True timeStringNow = strftime("+%Y-%m-%dT00:00:00Z", gmtime()) refRetrieved = PBB_Core.WDTime(timeStringNow, prop_nr='P813', is_reference=True) refRetrieved.overwrite_references = True ub_reference = [refStatedIn, refImported, refRetrieved] if self.uberon_id in self.wikidata_id.keys(): self.wdid = self.wikidata_id[self.uberon_id.replace("UBERON:", "")] else: self.wdid = None self.synonyms = [] for synonym in self.graph.objects(URIRef(self.uberon), hasExactSyn): self.synonyms.append(str(synonym)) prep = dict() prep["P279"] = [PBB_Core.WDItemID(value='Q4936952', prop_nr='P279', references=[copy.deepcopy(ub_reference)])] prep["P1554"] = [PBB_Core.WDString(value=self.uberon_id, prop_nr='P1554', references=[copy.deepcopy(ub_reference)])] print(self.uberon) prep["P1709"] = [PBB_Core.WDUrl(value=self.uberon, prop_nr='P1709', references=[copy.deepcopy(ub_reference)])] data2add = [] for key in prep.keys(): for statement in prep[key]: data2add.append(statement) print(statement.prop_nr, statement.value) if self.wdid is not None: wdPage = PBB_Core.WDItemEngine(self.wdid, item_name=self.name, data=data2add, server="www.wikidata.org", domain="anatomical_structure",append_value=['P279']) else: wdPage = PBB_Core.WDItemEngine(item_name=self.name, data=data2add, server="www.wikidata.org", domain="anatomical_structure", append_value=['P279']) if len(self.synonyms) >0: wdPage.set_aliases(aliases=self.synonyms, lang='en', append=True) print(self.synonyms) for syn in self.synonyms: print(syn) wdPage.write(self.logincreds) print("======") sys.exit()
def interwiki_link(entrez, name): # Query wikidata for Q-item id (cid) cid_query = """ SELECT ?cid WHERE { ?cid wdt:P351 ?entrez_id . FILTER(?entrez_id ='""" + str(entrez) + """') . } """ wikidata_results = PBB_Core.WDItemEngine.execute_sparql_query( prefix=settings.PREFIX, query=cid_query)['results']['bindings'] cid = '' for x in wikidata_results: cid = x['cid']['value'].split('/')[-1] # create interwiki link username = models.CharField(max_length=200, blank=False) password = models.CharField(max_length=200, blank=False) # create your login object with your user and password (or the ProteinBoxBot account?) login_obj = PBB_login.WDLogin(user=username, pwd=password) # load the gene Wikidata object wd_gene_item = PBB_Core.WDItemEngine(wd_item_id=cid) # set the interwiki link to the correct Wikipedia page wd_gene_item.set_sitelink(site='enwiki', title=name) # write the changes to the item wd_gene_item.write(login_obj)
def main(): """ This function undo gene to protein merges. For that, a query searches for WD items which have the Entrez gene ID (P351) and Uniprot ID (P352) on one item. Bases on that, it generates instances of MergeDefender and undoes the merges. :return: None """ print(sys.argv[1]) # pwd = input('Password:'******'ProteinBoxBot', pwd=sys.argv[1]) conflict_set_1 = {'P351'} conflict_set_2 = {'P352'} likely_merged_ids = PBB_Core.WDItemList(wdquery='CLAIM[351] AND CLAIM[352]') print(likely_merged_ids.wditems['items']) for count, x in enumerate(likely_merged_ids.wditems['items']): print('\n', count) print('Q{}'.format(x)) try: MergeDefender(login, merge_target='Q{}'.format(x), conflict_set_1=conflict_set_1, conflict_set_2=conflict_set_2) except Exception as e: traceback.print_exc() PBB_Core.WDItemEngine.log('ERROR', '{main_data_id}, "{exception_type}", "{message}"'.format( main_data_id=x, exception_type=type(e), message=e.__str__(), ))
def __init__(self): self.start = time.time() self.content = ET.fromstring(self.download_disease_ontology()) self.logincreds = PBB_login.WDLogin(PBB_settings.getWikiDataUser(), PBB_settings.getWikiDataPassword()) # self.updateDiseaseOntologyVersion() # Get all WikiData entries that contain a WikiData ID print("Getting all terms with a Disease Ontology ID in WikiData") doWikiData_id = dict() DoInWikiData = PBB_Core.WDItemList("CLAIM[699]", "699") print("Getting latest version of Disease Ontology from Github") r = requests.get( "https://api.github.com/repos/DiseaseOntology/HumanDiseaseOntology/git/refs" ) test = r.json() sha = test[0]["object"]["sha"] githubReferenceUrl = "https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/" + sha + "/src/ontology/doid.owl" for diseaseItem in DoInWikiData.wditems["props"]["699"]: doWikiData_id[str(diseaseItem[2])] = diseaseItem[ 0] # diseaseItem[2] = DO identifier, diseaseItem[0] = WD identifier for doClass in self.content.findall( './/owl:Class', DiseaseOntology_settings.getDoNameSpaces()): try: disVars = [] disVars.append(doClass) disVars.append(githubReferenceUrl) disVars.append(doWikiData_id) disVars.append(self.logincreds) disVars.append(self.start) diseaseClass = disease(disVars) print("do_id: " + diseaseClass.do_id) print(diseaseClass.wdid) print(diseaseClass.name) print(diseaseClass.synonyms) print(diseaseClass.xrefs) except Exception as e: PBB_Core.WDItemEngine.log( 'ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id=diseaseClass.do_id, exception_type=type(e), message=e.__str__(), wd_id='-', duration=time.time() - self.start)) f = open('/tmp/Diseaseexceptions.txt', 'a') # f.write("Unexpected error:", sys.exc_info()[0]+'\n') f.write(diseaseClass.do_id + "\n") #f.write(diseaseClass.wd_json_representation) traceback.print_exc(file=f) f.close()
def print_item(qid): wd_item = PBB_Core.WDItemEngine(wd_item_id=qid, use_sparql=True) label = wd_item.get_label() description = wd_item.get_description() aliases = wd_item.get_aliases() sitelinks_string = extract_sitelinks( wd_item.get_wd_json_representation()['sitelinks']) statement_print = '' for stmt in wd_item.statements: # retrieve English prop label and store in prop_label dict to minimize traffic prop_nr = stmt.get_prop_nr() prop_label = '' if prop_nr not in prop_store: prop_item = PBB_Core.WDItemEngine(wd_item_id=prop_nr) prop_label = prop_item.get_label() prop_store[prop_nr] = prop_label else: prop_label = prop_store[prop_nr] item_label = stmt.get_value() item_id = '' if isinstance(stmt, PBB_Core.WDItemID): item_id = item_label # print(item_id) item = PBB_Core.WDItemEngine(wd_item_id='Q{}'.format(item_label)) item_label = '{} (QID: Q{})'.format(item.get_label(), item_id) statement_print += 'Prop: {0:.<40} value: {1} \n '.format( '{} ({})'.format(prop_label, prop_nr), item_label) output = ''' Item QID: {4} Item: {0} / {1} / {2} {3} {5} '''.format(label, description, aliases, statement_print, qid, sitelinks_string) print(output)
def main(): pwd = input('Password:'******'ProteinBoxBot', pwd=pwd) # for mouse genes # LabelReplacement(PBB_Core.WDItemList('CLAIM[351] and CLAIM[703:83310]').wditems['items'], {'gène': 'gène de souris'}, # 'fr', login) # for human genes LabelReplacement(PBB_Core.WDItemList('CLAIM[351] and CLAIM[703:5]').wditems['items'], {'gène': 'gène humain'}, 'fr', login)
def protein_item_statements(): """ construct list of referenced statements to past to PBB_Core Item engine :return: """ uniprot_ref = wdo.reference_store(source='uniprot', identifier=uniprot) WD_String_CLAIMS = { 'P637': str(gene_record['refseq']['protein']), #'P2393': gene_record['locus_tag'], 'P352': uniprot #'P591': str(gene_record['EC number']) } WD_Item_CLAIMS = { 'P703': [spec_strain.iloc[0]['wd_qid']], 'P279': ['Q8054'], 'P680': [], # molecular function 'P681': [], # cellular component 'P682': [] # biological process } for gt in gene_record['GOTERMS']: gtids = parse_go_terms(gt) WD_Item_CLAIMS[gtids[1]].append(gtids[0]) statements = [] # generate list of pbb core value objects for all valid claims for k, v in WD_Item_CLAIMS.items(): if v: for i in v: statements.append( PBB_Core.WDItemID(value=i, prop_nr=k, references=[uniprot_ref])) for k, v in WD_String_CLAIMS.items(): if v: statements.append( PBB_Core.WDString(value=v, prop_nr=k, references=[uniprot_ref])) return statements
def __init__(self, object): self.logincreds = object["logincreds"] self.source = object["source"] self.ortholog = object["ortholog"] self.species = object["speciesWdID"] # Prepare references refStatedInHomologeneBuild = PBB_Core.WDItemID(value='Q20976936', prop_nr='P248', is_reference=True) refImportedFromHomologen = PBB_Core.WDItemID(value='Q468215', prop_nr='P143', is_reference=True) timeStringNow = strftime("+%Y-%m-%dT00:00:00Z", gmtime()) refRetrieved = PBB_Core.WDTime(timeStringNow, prop_nr='P813', is_reference=True) homologene_reference = [[ refStatedInHomologeneBuild, refImportedFromHomologen, refRetrieved ]] # Prepare qualifiers humanQualifier = PBB_Core.WDItemID(value='Q5', prop_nr='P703', is_qualifier=True) mouseQualifier = PBB_Core.WDItemID(value='Q83310', prop_nr='P703', is_qualifier=True) # Prepare the items to add if self.species == "Q5": orthologValue = PBB_Core.WDItemID(value=self.ortholog, prop_nr='P684', references=homologene_reference, qualifiers=[humanQualifier]) elif self.species == "Q83310": orthologValue = PBB_Core.WDItemID(value=self.ortholog, prop_nr='P684', references=homologene_reference, qualifiers=[mouseQualifier]) wdPage = PBB_Core.WDItemEngine(wd_item_id=self.source, data=[orthologValue], server="www.wikidata.org", domain="genes") print(wdPage.wd_json_representation) wdPage.write(self.logincreds)
def generate_refs(iuphar_ligand): ref_list = [[]] ref_list[0].extend([ PBB_Core.WDItemID(value='Q2793172', prop_nr='P248', is_reference=True), # stated in PBB_Core.WDString(value=iuphar_ligand, prop_nr='P595', is_reference=True), # source element ]) ref_list[0].extend([ PBB_Core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True), # language of work # PBB_Core.WDMonolingualText(value=source_element_name, language='en', # prop_nr='P1476', is_reference=True), PBB_Core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) # publication date ]) return ref_list
def __init__(self): self.start = time.time() self.logincreds = PBB_login.WDLogin(PBB_settings.getWikiDataUser(), PBB_settings.getWikiDataPassword()) # Get all WikiData entries that contain a WikiData ID print("Getting all terms with a Gene Ontology ID in WikiData") goWikiData_id = dict() goInWikiData = PBB_Core.WDItemList("CLAIM[686]", "686") for goItem in goInWikiData.wditems["props"]["686"]: goWikiData_id[str(goItem[2])] = goItem[ 0] # diseaseItem[2] = go identifier, diseaseItem[0] = go identifier print(len(goWikiData_id.keys())) sys.exit() graph = rdflib.Graph() goUrl = requests.get("http://purl.obolibrary.org/obo/go.owl") print("ja") graph.parse(data=goUrl.text, format="application/rdf+xml") cls = URIRef("http://www.w3.org/2002/07/owl#Class") subcls = URIRef("http://www.w3.org/2000/01/rdf-schema#subClassOf") counter = 0 for gouri in graph.subjects(RDF.type, cls): try: counter = counter + 1 print(counter) goVars = dict() goVars["uri"] = gouri goVars["label"] = graph.label(URIRef(gouri)) goVars["wikidata_id"] = goWikiData_id goVars["logincreds"] = self.logincreds goVars["start"] = self.start goVars["graph"] = graph if "GO" in gouri: goClass = goTerm(goVars) except Exception as e: print(traceback.format_exc()) PBB_Core.WDItemEngine.log( 'ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id=gouri, exception_type=type(e), message=e.__str__(), wd_id='-', duration=time.time() - self.start))
def __init__(self): self.content = json.loads(self.download_mouse_proteins()) # print self.content["results"]["bindings"] self.protein_count = len(self.content["results"]["bindings"]) self.proteins = self.content["results"]["bindings"] self.logincreds = PBB_login.WDLogin(PBB_settings.getWikiDataUser(), PBB_settings.getWikiDataPassword()) uniprotWikidataIds = dict() print "Getting all proteins with a uniprot ID in Wikidata" InWikiData = PBB_Core.WDItemList("CLAIM[703:83310] AND CLAIM[352]", "352") r0 = requests.get( "http://sparql.uniprot.org/sparql?query=PREFIX+up%3a%3chttp%3a%2f%2fpurl.uniprot.org%2fcore%2f%3e+%0d%0aPREFIX+taxonomy%3a+%3chttp%3a%2f%2fpurl.uniprot.org%2ftaxonomy%2f%3e%0d%0aSELECT+DISTINCT+*%0d%0aWHERE%0d%0a%7b%0d%0a%09%09%3fprotein+a+up%3aProtein+.%0d%0a++%09%09%3fprotein+rdfs%3alabel+%3fprotein_label+.%0d%0a++++++++%3fprotein+up%3aorganism+taxonomy%3a10090+.%0d%0a%7d&format=srj" ) for proteinItem in InWikiData.wditems["props"]["352"]: try: uniprotWikidataIds[str(proteinItem[2])] = proteinItem[0] r = requests.get( "http://sparql.uniprot.org/sparql?query=PREFIX+up%3a%3chttp%3a%2f%2fpurl.uniprot.org%2fcore%2f%3e%0d%0aPREFIX+taxonomy%3a%3chttp%3a%2f%2fpurl.uniprot.org%2ftaxonomy%2f%3e%0d%0aPREFIX+database%3a%3chttp%3a%2f%2fpurl.uniprot.org%2fdatabase%2f%3e%0d%0aSELECT+%3funiprot+%3fplabel+%3fecName+%3fupversion%0d%0a+++++++(group_concat(distinct+%3falias%3b+separator%3d%22%3b+%22)+as+%3fupalias)%0d%0a+++++++(group_concat(distinct+%3fpdb%3b+separator%3d%22%3b+%22)+as+%3fpdbid)%0d%0a+++++++(group_concat(distinct+%3frefseq%3b+separator%3d%22%3b+%22)+as+%3frefseqid)%0d%0a+++++++(group_concat(distinct+%3fensP%3b+separator%3d%22%3b+%22)+as+%3fensemblp)%0d%0aWHERE%0d%0a%7b%0d%0a%09%09VALUES+%3funiprot+%7b%3chttp%3a%2f%2fpurl.uniprot.org%2funiprot%2f" + str(proteinItem[2]) + "%3e%7d%0d%0a++++++++%3funiprot+rdfs%3alabel+%3fplabel+.%0d%0a++++++++%3funiprot+up%3aversion+%3fupversion+.+%0d%0a++++++++optional%7b%3funiprot+up%3aalternativeName+%3fupAlias+.%0d%0a++++++++%3fupAlias+up%3aecName+%3fecName+.%7d%0d%0a++++++++%0d%0a++++++++OPTIONAL%7b+%3funiprot+up%3aalternativeName+%3fupAlias+.%0d%0a++++++++++%7b%3fupAlias+up%3afullName+%3falias+.%7d+UNION%0d%0a++++++++%7b%3fupAlias+up%3ashortName+%3falias+.%7d%7d%0d%0a++++++++%3funiprot+up%3aversion+%3fupversion+.%0d%0a++++++++OPTIONAL%7b%3funiprot+rdfs%3aseeAlso+%3fpdb+.%0d%0a++++++++%3fpdb+up%3adatabase+database%3aPDB+.%7d%0d%0a++++++++OPTIONAL%7b%3funiprot+rdfs%3aseeAlso+%3frefseq+.%0d%0a++++++++%3frefseq+up%3adatabase+database%3aRefSeq+.%7d++%0d%0a++++++++OPTIONAL%7b%3funiprot+rdfs%3aseeAlso+%3fensT+.%0d%0a++++++++%3fensT+up%3adatabase+database%3aEnsembl+.%0d%0a++++++++%3fensT+up%3atranslatedTo+%3fensP+.%7d%0d%0a%7d%0d%0agroup+by+%3fupAlias+%3funiprot+%3fplabel+%3fecName+%3fupversion&format=srj" ) # r = requests.get("http://sparql.uniprot.org/sparql?query=PREFIX+up%3a%3chttp%3a%2f%2fpurl.uniprot.org%2fcore%2f%3e%0d%0aPREFIX+taxonomy%3a%3chttp%3a%2f%2fpurl.uniprot.org%2ftaxonomy%2f%3e%0d%0aPREFIX+database%3a%3chttp%3a%2f%2fpurl.uniprot.org%2fdatabase%2f%3e%0d%0aSELECT+%3funiprot+%3fplabel+%3fecName+%0d%0a+++++++(group_concat(distinct+%3falias%3b+separator%3d%22%3b+%22)+as+%3fupalias)%0d%0a+++++++(group_concat(distinct+%3fpdb%3b+separator%3d%22%3b+%22)+as+%3fpdbid)%0d%0a+++++++(group_concat(distinct+%3frefseq%3b+separator%3d%22%3b+%22)+as+%3frefseqid)%0d%0a+++++++(group_concat(distinct+%3fensP%3b+separator%3d%22%3b+%22)+as+%3fensemblp)%0d%0aWHERE%0d%0a%7b%0d%0a%09%09VALUES+%3funiprot+%7b%3chttp%3a%2f%2fpurl.uniprot.org%2funiprot%2f"+str(proteinItem[2])+"%3e%7d%0d%0a++++++++%3funiprot+rdfs%3alabel+%3fplabel+.%0d%0a++++++++optional%7b%3funiprot+up%3aalternativeName+%3fupAlias+.%0d%0a++++++++%3fupAlias+up%3aecName+%3fecName+.%7d%0d%0a++++++++%0d%0a++++++++OPTIONAL%7b+%3funiprot+up%3aalternativeName+%3fupAlias+.%0d%0a++++++++++%7b%3fupAlias+up%3afullName+%3falias+.%7d+UNION%0d%0a++++++++%7b%3fupAlias+up%3ashortName+%3falias+.%7d%7d%0d%0a++++++++%3funiprot+up%3aversion+%3fupversion+.%0d%0a++++++++OPTIONAL%7b%3funiprot+rdfs%3aseeAlso+%3fpdb+.%0d%0a++++++++%3fpdb+up%3adatabase+database%3aPDB+.%7d%0d%0a++++++++OPTIONAL%7b%3funiprot+rdfs%3aseeAlso+%3frefseq+.%0d%0a++++++++%3frefseq+up%3adatabase+database%3aRefSeq+.%7d++%0d%0a++++++++OPTIONAL%7b%3funiprot+rdfs%3aseeAlso+%3fensT+.%0d%0a++++++++%3fensT+up%3adatabase+database%3aEnsembl+.%0d%0a++++++++%3fensT+up%3atranslatedTo+%3fensP+.%7d%0d%0a%7d%0d%0agroup+by+%3fupAlias+%3funiprot+%3fplabel+%3fecName&format=srj") print r.text protein = json.loads(r.text) protein["logincreds"] = self.logincreds protein["wdid"] = 'Q' + str(proteinItem[0]) print protein proteinClass = mouse_protein(protein) except: # client = Client('http://*****:*****@sentry.sulab.org/9') # client.captureException() print "There has been an except" print "Unexpected error:", sys.exc_info()[0] f = open('/tmp/exceptions.txt', 'a') # f.write("Unexpected error:", sys.exc_info()[0]+'\n') f.write( str(protein["results"]["bindings"][0]["uniprot"]["value"]) + "\n") traceback.print_exc(file=f) f.close()
def get_wd_search_results(search_string=''): """ Performs a search in WD for a certain WD search string :param search_string: a string which should be searched for in WD :return: returns a list of QIDs found in the search and a list of labels complementary to the QIDs """ try: url = 'https://www.wikidata.org/w/api.php' params = { 'action': 'wbsearchentities', 'language': 'en', 'search': search_string, 'format': 'json', 'limit': '15' } reply = requests.get(url, params=params) search_results = reply.json() if search_results['success'] != 1: raise PBB_Core.WDSearchError('WD search failed') elif len(search_results['search']) == 0: return [] else: id_list = [] id_labels = [] id_descr = [] id_aliases = [] for i in search_results['search']: id_list.append(i['id']) id_labels.append(i['label']) if 'description' in i: id_descr.append(i['description']) else: id_descr.append('') if 'aliases' in i: id_aliases.append(i['aliases']) else: id_aliases.append('') return id_list, id_labels, id_descr, id_aliases except requests.HTTPError as e: print(e)
def merge(merge_to, merge_from, login_obj): data = [PBB_Core.WDBaseDataType.delete_statement(prop_nr='P279')] try: wd_item = PBB_Core.WDItemEngine(wd_item_id=merge_from, data=data) wd_item.set_description(description='', lang='en') wd_item.set_description(description='', lang='de') wd_item.set_description(description='', lang='fr') wd_item.set_description(description='', lang='nl') wd_item.write(login=login_obj) print('merge accepted') merge_reply = PBB_Core.WDItemEngine.merge_items(from_id=merge_from, to_id=merge_to, login_obj=login_obj) pprint.pprint(merge_reply) print('merge completed') except PBB_Core.MergeError as e: pprint.pprint(e) except Exception as e: pprint.pprint(e)
def __init__(self, wd_item_list, replacement_map, lang, login): for count, i in enumerate(wd_item_list): qid = 'Q{}'.format(i) wd_item = PBB_Core.WDItemEngine(wd_item_id=qid) description = wd_item.get_description(lang) if description in replacement_map: print('entered') en_label = '' if 'en' in wd_item.get_wd_json_representation()['labels']: en_label = wd_item.get_wd_json_representation()['labels']['en']['value'] print('\n') print('Label: {}'.format(en_label), 'QID: ', wd_item.wd_item_id) print(count) try: edit_token = login.get_edit_token() cookies = login.get_edit_cookie() params = { 'action': 'wbsetdescription', 'id': qid, 'language': lang, 'value': replacement_map[description], 'token': edit_token, 'bot': '', 'format': 'json', } reply = requests.post('https://www.wikidata.org/w/api.php', data=params, cookies=cookies) # print(reply.text) except requests.HTTPError as e: print(e) except Exception as e: print(e) else: print('No action required for QID: ', wd_item.wd_item_id, ' |count: ', count)
def __init__(self): self.start = time.time() self.logincreds = PBB_login.WDLogin(PBB_settings.getWikiDataUser(), PBB_settings.getWikiDataPassword()) # Get all WikiData entries that contain a WikiData ID print("Getting all terms with a Uberon ID in WikiData") ubWikiData_id = dict() ubInWikiData = PBB_Core.WDItemList("CLAIM[1554]", "1554") for uberonItem in ubInWikiData.wditems["props"]["1554"]: ubWikiData_id[str(uberonItem[2])]=uberonItem[0] # diseaseItem[2] = Uberon identifier, diseaseItem[0] = Uberon identifier graph = rdflib.Graph() ubUrl = requests.get("http://purl.obolibrary.org/obo/uberon.owl") print("ja") graph.parse(data=ubUrl.text, format="application/rdf+xml") cls = URIRef("http://www.w3.org/2002/07/owl#Class") subcls = URIRef("http://www.w3.org/2000/01/rdf-schema#subClassOf") for uberonuri in graph.subjects(RDF.type, cls): try: uberonVars = dict() uberonVars["uberon"] = uberonuri uberonVars["uberonLabel"] = graph.label(URIRef(uberonuri)) uberonVars["wikidata_id"] = ubWikiData_id uberonVars["logincreds"] = self.logincreds uberonVars["start"] = self.start uberonVars["graph"] = graph if "UBERON" in uberonuri: uberonClass = uberonTerm(uberonVars) except Exception as e: print(traceback.format_exc()) PBB_Core.WDItemEngine.log('ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'.format( main_data_id=uberonuri, exception_type=type(e), message=e.__str__(), wd_id='-', duration=time.time() - self.start ))
def generate_refs(ref_source_id): ref_list = [[]] if ref_source_id.startswith('C'): ref_list[0].extend([ PBB_Core.WDItemID(value='Q6120337', prop_nr='P248', is_reference=True), # stated in PBB_Core.WDString(value=ref_source_id, prop_nr='P592', is_reference=True), # source element ]) elif ref_source_id.startswith('N'): ref_list[0].extend([ PBB_Core.WDItemID(value='Q21008030', prop_nr='P248', is_reference=True), # stated in PBB_Core.WDString(value=ref_source_id, prop_nr='P2115', is_reference=True), # source element ]) ref_list[0].extend([ PBB_Core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True), # language of work # PBB_Core.WDMonolingualText(value=source_element_name, language='en', # prop_nr='P1476', is_reference=True), PBB_Core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) # publication date ]) return ref_list
def __init__(self, login): self.login_obj = login image_data = pd.read_csv( './image_data/gene_wiki_images_with_preferred.txt', encoding='utf-8', sep='\t', dtype={'entrez': np.str}) wdq_results = PBB_Core.WDItemList('CLAIM[351] and CLAIM[703:5]', '351').wditems wd_entrez_ids = list(map(lambda z: z[2], wdq_results['props']['351'])) entrez_qid_list = list( map(lambda z: 'Q{}'.format(z[0]), wdq_results['props']['351'])) print(len(wd_entrez_ids)) for index in image_data.index: start = time.time() # print(image_data.loc[index, 'other_images']) image_names = image_data.loc[index, 'other_images'] preferred_image = image_data.loc[index, 'primary_image'] image_file_extension = ['.png', '.jpg', '.jpeg', '.pdf'] if pd.notnull(preferred_image) and '|' in preferred_image: for splt in preferred_image.split('|'): for ending in image_file_extension: if ending in splt: preferred_image = splt break entrez = image_data.loc[index, 'entrez'] # print(entrez) protein_images = [] protein_image_value_store = [] genex_images = [] genex_value_store = [] if entrez not in wd_entrez_ids: PBB_Core.WDItemEngine.log( 'ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id=entrez, exception_type='', message='Entrez ID not yet in Wikidata!!', wd_id='', duration=time.time() - start)) continue else: curr_qid = entrez_qid_list[wd_entrez_ids.index(entrez)] if pd.isnull(image_names): PBB_Core.WDItemEngine.log( 'WARNING', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id=entrez, exception_type='', message='No images available for this Entrez ID', wd_id=curr_qid, duration=time.time() - start)) continue for sub_string in image_names.split('|'): if 'PBB GE ' in sub_string: value = sub_string[5:] # if value[-6:-4] == 'tn': # value = value[:-6] + 'fs' + value[-4:] # Gene Expression reference: https://www.wikidata.org/wiki/Q21074956 genex_images.append(value) genex_value_store.append( PBB_Core.WDCommonsMedia(value=value, prop_nr='P692')) elif 'PDB ' in sub_string: value = sub_string[5:] protein_images.append(value) protein_image_value_store.append( PBB_Core.WDCommonsMedia(value, prop_nr='')) entrez_id_value = PBB_Core.WDString(value=entrez, prop_nr='P351') data = [entrez_id_value] data.extend(genex_value_store) if pd.notnull(preferred_image): data.append( PBB_Core.WDCommonsMedia(value=preferred_image, prop_nr='P18')) try: gene_item = PBB_Core.WDItemEngine(wd_item_id=curr_qid, domain='genes', data=data) # pprint.pprint(gene_item.get_wd_json_representation()) gene_item.write(self.login_obj) PBB_Core.WDItemEngine.log( 'INFO', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id=entrez, exception_type='', message='success', wd_id=curr_qid, duration=time.time() - start)) print(index, 'success', curr_qid, entrez, gene_item.get_label(lang='en')) except Exception as e: print(index, 'error', curr_qid, entrez) PBB_Core.WDItemEngine.log( 'ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id=entrez, exception_type=type(e), message=e.__str__(), wd_id=curr_qid, duration=time.time() - start))
def cleanup_obsolete_edges(ontology_id, core_property_nr, login, current_node_qids=(), obsolete_term=False): filter_props_string = '' if not obsolete_term: for x in OBOImporter.obo_wd_map.values(): prop_nr = list(x.keys())[0] filter_props_string += 'Filter (?p = wdt:{})\n'.format(prop_nr) query = ''' SELECT DISTINCT ?qid ?p ?onto_qid WHERE {{ {{ SELECT DISTINCT ?onto_qid WHERE {{ ?onto_qid wdt:{2} '{0}' . }} }} ?qid ?p [wdt:{2} '{0}']. {1} }} ORDER BY ?qid '''.format(ontology_id, filter_props_string, core_property_nr) print(query) sr = PBB_Core.WDItemEngine.execute_sparql_query(query=query) for occurrence in sr['results']['bindings']: if 'statement' in occurrence['qid']['value']: continue start = time.time() qid = occurrence['qid']['value'].split('/')[-1] if qid in current_node_qids: continue prop_nr = occurrence['p']['value'].split('/')[-1] wd_onto_qid = occurrence['onto_qid']['value'].split('/')[-1] wd_item_id = PBB_Core.WDItemID(value=wd_onto_qid, prop_nr=prop_nr) setattr(wd_item_id, 'remove', '') try: wd_item = PBB_Core.WDItemEngine(wd_item_id=qid, data=[wd_item_id]) wd_item.write(login=login) PBB_Core.WDItemEngine.log( 'INFO', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id='{}'.format(ontology_id), exception_type='', message='successfully removed obsolete edges', wd_id=qid, duration=time.time() - start)) except Exception as e: print(e) PBB_Core.WDItemEngine.log( 'ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id='{}'.format(ontology_id), exception_type=type(e), message=e.__str__(), wd_id=qid, duration=time.time() - start)) if obsolete_term: data = [ PBB_Core.WDString(value=ontology_id, prop_nr=core_property_nr, rank='deprecated'), ] start = time.time() try: wd_item = PBB_Core.WDItemEngine(item_name='obo', domain='obo', data=data, use_sparql=True) if wd_item.create_new_item: return qid = wd_item.write(login=login) PBB_Core.WDItemEngine.log( 'INFO', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id='{}'.format(ontology_id), exception_type='', message='successfully obsoleted the ', wd_id=qid, duration=time.time() - start)) except Exception as e: print(e) PBB_Core.WDItemEngine.log( 'ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id='{}'.format(ontology_id), exception_type=type(e), message=e.__str__(), wd_id='', duration=time.time() - start))
def get_item_qid(go_id, data=()): start = time.time() if self.use_prefix: id_string = '{}:{}'.format(self.ontology, go_id) else: id_string = go_id # for efficiency reasons, skip if item already had a root write performed if go_id in self.local_qid_onto_map and self.local_qid_onto_map[go_id]['had_root_write'] \ and 'qid' in self.local_qid_onto_map[go_id]: return self.local_qid_onto_map[go_id]['qid'], False, False try: data = list(data) r = OBOImporter.ols_session.get( url=self.base_url + '{}_{}'.format(self.ontology, go_id), headers=self.headers) go_term_data = r.json() label = go_term_data['label'].replace('_', ' ') description = go_term_data['description'][0] if go_term_data['is_obsolete']: OBOImporter.cleanup_obsolete_edges( ontology_id=id_string, login=self.login_obj, core_property_nr=self.core_property_nr, obsolete_term=True) return None, None, None # get parent ontology term info so item can be populated with description, etc. data.append( PBB_Core.WDString(value=id_string, prop_nr=self.core_property_nr, references=[self.create_reference()])) exact_match_string = 'http://purl.obolibrary.org/obo/{}_{}'.format( self.ontology, go_id) data.append( PBB_Core.WDUrl(value=exact_match_string, prop_nr='P2888')) # add xrefs if go_term_data['obo_xref']: for xref in go_term_data['obo_xref']: if xref['database'] in OBOImporter.xref_props: wd_prop = OBOImporter.xref_props[xref['database']] else: continue xref_value = xref['id'] data.append( PBB_Core.WDExternalID( value=xref_value, prop_nr=wd_prop, references=[self.create_reference()])) if go_term_data['obo_synonym']: for syn in go_term_data['obo_synonym']: if syn['type'] in OBOImporter.obo_synonyms: wd_prop = OBOImporter.obo_synonyms[syn['type']] else: continue syn_value = syn['name'] data.append( PBB_Core.WDExternalID( value=syn_value, prop_nr=wd_prop, references=[self.create_reference()])) if go_id in self.local_qid_onto_map: wd_item = PBB_Core.WDItemEngine( wd_item_id=self.local_qid_onto_map[go_id]['qid'], domain='obo', data=data, fast_run=self.fast_run, fast_run_base_filter=self.fast_run_base_filter) else: wd_item = PBB_Core.WDItemEngine( item_name='test', domain='obo', data=data, fast_run=self.fast_run, fast_run_base_filter=self.fast_run_base_filter) wd_item.set_label(label=label) wd_item.set_description(description=description[0:250]) # if len(description) <= 250: # wd_item.set_description(description=description) # else: # wd_item.set_description(description='Gene Ontology term') if go_term_data['synonyms'] is not None and len( go_term_data['synonyms']) > 0: aliases = [] for alias in go_term_data['synonyms']: if len(alias) <= 250: aliases.append(alias) wd_item.set_aliases(aliases=aliases) new_msg = '' if wd_item.create_new_item: new_msg = ': created new {} term'.format(self.ontology) qid = wd_item.write(login=self.login_obj) if go_id not in self.local_qid_onto_map: self.local_qid_onto_map[go_id] = { 'qid': qid, 'had_root_write': False, } if go_id == current_root_id: self.local_qid_onto_map[go_id]['had_root_write'] = True self.local_qid_onto_map[go_id]['parents'] = list(parents) self.local_qid_onto_map[go_id]['children'] = list(children) current_node_qids.append(qid) print('QID created or retrieved', qid) PBB_Core.WDItemEngine.log( 'INFO', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id='{}:{}'.format(self.ontology, go_id), exception_type='', message='success{}'.format(new_msg), wd_id=qid, duration=time.time() - start)) return qid, go_term_data['obo_xref'], wd_item.require_write except Exception as e: print(e) # traceback.print_exc(e) PBB_Core.WDItemEngine.log( 'ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id='{}:{}'.format(self.ontology, go_id), exception_type=type(e), message=e.__str__(), wd_id='', duration=time.time() - start)) return None, None, None
def write_term(self, current_root_id, parents, children): print('current_root', current_root_id, parents, children) current_node_qids = [] def get_item_qid(go_id, data=()): start = time.time() if self.use_prefix: id_string = '{}:{}'.format(self.ontology, go_id) else: id_string = go_id # for efficiency reasons, skip if item already had a root write performed if go_id in self.local_qid_onto_map and self.local_qid_onto_map[go_id]['had_root_write'] \ and 'qid' in self.local_qid_onto_map[go_id]: return self.local_qid_onto_map[go_id]['qid'], False, False try: data = list(data) r = OBOImporter.ols_session.get( url=self.base_url + '{}_{}'.format(self.ontology, go_id), headers=self.headers) go_term_data = r.json() label = go_term_data['label'].replace('_', ' ') description = go_term_data['description'][0] if go_term_data['is_obsolete']: OBOImporter.cleanup_obsolete_edges( ontology_id=id_string, login=self.login_obj, core_property_nr=self.core_property_nr, obsolete_term=True) return None, None, None # get parent ontology term info so item can be populated with description, etc. data.append( PBB_Core.WDString(value=id_string, prop_nr=self.core_property_nr, references=[self.create_reference()])) exact_match_string = 'http://purl.obolibrary.org/obo/{}_{}'.format( self.ontology, go_id) data.append( PBB_Core.WDUrl(value=exact_match_string, prop_nr='P2888')) # add xrefs if go_term_data['obo_xref']: for xref in go_term_data['obo_xref']: if xref['database'] in OBOImporter.xref_props: wd_prop = OBOImporter.xref_props[xref['database']] else: continue xref_value = xref['id'] data.append( PBB_Core.WDExternalID( value=xref_value, prop_nr=wd_prop, references=[self.create_reference()])) if go_term_data['obo_synonym']: for syn in go_term_data['obo_synonym']: if syn['type'] in OBOImporter.obo_synonyms: wd_prop = OBOImporter.obo_synonyms[syn['type']] else: continue syn_value = syn['name'] data.append( PBB_Core.WDExternalID( value=syn_value, prop_nr=wd_prop, references=[self.create_reference()])) if go_id in self.local_qid_onto_map: wd_item = PBB_Core.WDItemEngine( wd_item_id=self.local_qid_onto_map[go_id]['qid'], domain='obo', data=data, fast_run=self.fast_run, fast_run_base_filter=self.fast_run_base_filter) else: wd_item = PBB_Core.WDItemEngine( item_name='test', domain='obo', data=data, fast_run=self.fast_run, fast_run_base_filter=self.fast_run_base_filter) wd_item.set_label(label=label) wd_item.set_description(description=description[0:250]) # if len(description) <= 250: # wd_item.set_description(description=description) # else: # wd_item.set_description(description='Gene Ontology term') if go_term_data['synonyms'] is not None and len( go_term_data['synonyms']) > 0: aliases = [] for alias in go_term_data['synonyms']: if len(alias) <= 250: aliases.append(alias) wd_item.set_aliases(aliases=aliases) new_msg = '' if wd_item.create_new_item: new_msg = ': created new {} term'.format(self.ontology) qid = wd_item.write(login=self.login_obj) if go_id not in self.local_qid_onto_map: self.local_qid_onto_map[go_id] = { 'qid': qid, 'had_root_write': False, } if go_id == current_root_id: self.local_qid_onto_map[go_id]['had_root_write'] = True self.local_qid_onto_map[go_id]['parents'] = list(parents) self.local_qid_onto_map[go_id]['children'] = list(children) current_node_qids.append(qid) print('QID created or retrieved', qid) PBB_Core.WDItemEngine.log( 'INFO', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id='{}:{}'.format(self.ontology, go_id), exception_type='', message='success{}'.format(new_msg), wd_id=qid, duration=time.time() - start)) return qid, go_term_data['obo_xref'], wd_item.require_write except Exception as e: print(e) # traceback.print_exc(e) PBB_Core.WDItemEngine.log( 'ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id='{}:{}'.format(self.ontology, go_id), exception_type=type(e), message=e.__str__(), wd_id='', duration=time.time() - start)) return None, None, None dt = [] parent_qids = [] write_reqired = [] for parent_id in parents: pi, o, w = get_item_qid(parent_id) write_reqired.append(w) if pi: parent_qids.append(pi) dt.append( PBB_Core.WDItemID(value=pi, prop_nr='P279', references=[self.create_reference()])) for edge in self.term_graph['edges']: if edge['uri'] in self.obo_wd_map and edge[ 'uri'] != 'http://www.w3.org/2000/01/rdf-schema#subClassOf': go = edge['target'].split('_')[-1] if go != current_root_id: xref_dict = self.obo_wd_map[edge['uri']] elif edge['uri'] in self.rev_prop_map and edge['source'].split( '_')[-1] != current_root_id: xref_dict = self.obo_wd_map[self.rev_prop_map[edge['uri']]] go = edge['source'].split('_')[-1] else: continue pi, o, w = get_item_qid(go_id=go) write_reqired.append(w) dt.append( self.create_xref_statement(value=pi, xref_dict=xref_dict)) root_qid, obsolete, w = get_item_qid(go_id=current_root_id, data=dt) if obsolete and not any(write_reqired): if self.use_prefix: id_string = '{}:{}'.format(self.ontology, current_root_id) else: id_string = current_root_id OBOImporter.cleanup_obsolete_edges( ontology_id=id_string, login=self.login_obj, core_property_nr=self.core_property_nr, current_node_qids=current_node_qids) print('----COUNT----:', len(self.local_qid_onto_map)) f = open('temp_{}_onto_map.json'.format(self.ontology), 'w') f.write(json.dumps(self.local_qid_onto_map)) f.close()
def __init__(self, object): # Uniprot self.logincreds = object["logincreds"] self.version = object["results"]["bindings"][0]["upversion"]["value"] self.wdid = object["wdid"] self.uniprot = object["results"]["bindings"][0]["uniprot"]["value"] print self.uniprot self.uniprotId = object["results"]["bindings"][0]["uniprot"][ "value"].replace("http://purl.uniprot.org/uniprot/", "").replace(" ", "") self.name = object["results"]["bindings"][0]["plabel"]["value"] if "ecName" in object["results"]["bindings"][0].keys(): print object["results"]["bindings"][0]["ecName"]["value"] self.ecname = object["results"]["bindings"][0]["ecName"]["value"] self.alias = [] for syn in object["results"]["bindings"][0]["upalias"]["value"].split( ";"): self.alias.append(syn) if "pdbid" in object["results"]["bindings"][0].keys(): if object["results"]["bindings"][0]["pdbid"]["value"] != "": self.pdb = [] for pdbId in object["results"]["bindings"][0]["pdbid"][ "value"].split(";"): self.pdb.append( pdbId.replace("http://rdf.wwpdb.org/pdb/", "").replace(" ", "")) if "refseq" in object["results"]["bindings"][0].keys(): self.refseq = [] for refseqId in object["results"]["bindings"][0]["refseqid"][ "value"].split(";"): self.refseq.append( refseqId.replace("http://purl.uniprot.org/refseq/", "").replace(" ", "")) self.ensemblp = [] for ensP in object["results"]["bindings"][0]["ensemblp"][ "value"].split(";"): self.ensemblp.append( ensP.replace("http://purl.uniprot.org/ensembl/", "").replace(" ", "")) protein_reference = { 'ref_properties': [u'P143', 'TIMESTAMP'], 'ref_values': [u'Q905695', 'TIMESTAMP'] } print vars(self) references = dict() data2add = dict() # P279 = subclass of data2add["P279"] = ["8054"] references['P279'] = [copy.deepcopy(protein_reference)] # P703 = found in taxon data2add["P703"] = ["83310"] references['P703'] = [copy.deepcopy(protein_reference)] # P352 = UniprotID data2add["P352"] = [self.uniprotId] references['P352'] = [copy.deepcopy(protein_reference)] # P591 = EC number if "ecname" in vars(self): data2add["P591"] = [self.ecname] references['P591'] = [copy.deepcopy(protein_reference)] # P638 = PDBID if "pdb" in vars(self): print "len pdb = " + str(len(self.pdb)) print self.pdb if len(self.pdb) > 0: data2add['P638'] = self.pdb references['P638'] = [] for i in range(len(self.pdb)): references['P638'].append(copy.deepcopy(protein_reference)) # P637 = Refseq Protein ID if "refseq" in vars(self): if len(self.refseq) > 0: data2add['P637'] = self.refseq references['P637'] = [] for i in range(len(self.refseq)): references['P637'].append(copy.deepcopy(protein_reference)) # P705 = Ensembl Protein ID if "ensemblp" in vars(self): if len(self.ensemblp) > 0: data2add['P705'] = self.ensemblp references['P705'] = [] for i in range(len(self.ensemblp)): references['P705'].append(copy.deepcopy(protein_reference)) wdPage = PBB_Core.WDItemEngine(wd_item_id=self.wdid, item_name=self.name, data=data2add, server="www.wikidata.org", references=references, domain="proteins") self.wd_json_representation = wdPage.get_wd_json_representation() PBB_Debug.prettyPrint(self.wd_json_representation) wdPage.write(self.logincreds)
def encodes(gene_record, login): """ identifies microbial gene and protein items and links them via encodes (P688) and encoded by (P702) functions :param gene_record: gene record from MGI_UNIP_MERGER() :return: links gene and protein wikidata items. """ uniprot = str(list(gene_record['uniprot'].values())[0]) start = time.time() # find gene and protein qids gene_qid = wdo.WDSparqlQueries(prop='P351', string=gene_record['_id']).wd_prop2qid() protein_qid = wdo.WDSparqlQueries(prop='P352', string=uniprot).wd_prop2qid() print(gene_qid, protein_qid) # if a gene or protein item is not found skip this one if gene_qid is not None and protein_qid is not None: print('gene {} and protein {} found'.format(gene_qid, protein_qid)) # generate reference and claim values for each item ncbi_gene_reference = wdo.reference_store( source='ncbi_gene', identifier=gene_record['_id']) gene_encodes = [ PBB_Core.WDItemID(value=protein_qid, prop_nr='P688', references=[ncbi_gene_reference]) ] protein_encoded_by = [ PBB_Core.WDItemID(value=gene_qid, prop_nr='P702', references=[ncbi_gene_reference]) ] # find and write items success_count = 0 wd_encodes_item = PBB_Core.WDItemEngine(wd_item_id=gene_qid, data=gene_encodes) #pprint.pprint(wd_encodes_item.get_wd_json_representation()) try: wd_encodes_item = PBB_Core.WDItemEngine(wd_item_id=gene_qid, data=gene_encodes) wd_encodes_item.write(login) PBB_Core.WDItemEngine.log( 'INFO', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id=gene_record['_id'], exception_type='', message='encodes claim written successfully', wd_id=wd_encodes_item.wd_item_id, duration=time.time() - start)) print('gene success') success_count += 1 except Exception as e: print(e) PBB_Core.WDItemEngine.log( 'ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id=gene_record['_id'], exception_type=type(e), message=e.__str__(), wd_id='', duration=time.time() - start)) try: wd_encoded_by_item = PBB_Core.WDItemEngine(wd_item_id=protein_qid, data=protein_encoded_by) wd_encoded_by_item.write(login) PBB_Core.WDItemEngine.log( 'INFO', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id=uniprot, exception_type='', message='encoded by claim written successfully', wd_id=wd_encoded_by_item.wd_item_id, duration=time.time() - start)) print('protein success') success_count += 1 except Exception as e: print(e) PBB_Core.WDItemEngine.log( 'ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id=gene_record['_id'], exception_type=type(e), message=e.__str__(), wd_id='', duration=time.time() - start)) if success_count == 2: return 'success' end = time.time() print('Time elapsed:', end - start)
def get_item_qid(go_id, data=()): start = time.time() # for efficiency reasons, skip if item already had a root write performed if go_id in self.local_qid_onto_map and self.local_qid_onto_map[go_id]['had_root_write'] \ and 'qid' in self.local_qid_onto_map[go_id]: return self.local_qid_onto_map[go_id]['qid'] try: data = list(data) r = requests.get(url=self.base_url + '{}_{}'.format(self.ontology, go_id), headers=self.headers) go_term_data = r.json() label = go_term_data['label'] description = go_term_data['description'][0] if go_term_data['is_obsolete']: OBOImporter.cleanup_obsolete_edges( ontology_id='{}:{}'.format(self.ontology, go_id), login=self.login_obj, core_property_nr=self.core_property_nr, obsolete_term=True) return None # get parent ontology term info so item can be populated with description, etc. data.append( PBB_Core.WDString(value='GO:{}'.format(go_id), prop_nr=self.core_property_nr, references=[self.create_reference()])) print(data) if go_id in self.local_qid_onto_map: wd_item = PBB_Core.WDItemEngine( wd_item_id=self.local_qid_onto_map[go_id]['qid'], domain='obo', data=data, use_sparql=True) else: wd_item = PBB_Core.WDItemEngine(item_name='test', domain='obo', data=data, use_sparql=True) wd_item.set_label(label=label) if len(description) <= 250: wd_item.set_description(description=description) else: wd_item.set_description(description='Gene Ontology term') if go_term_data['synonyms'] is not None and len( go_term_data['synonyms']) > 0: aliases = [] for alias in go_term_data['synonyms']: if len(alias) <= 250: aliases.append(alias) wd_item.set_aliases(aliases=aliases) new_msg = '' if wd_item.create_new_item: new_msg = ': created new GO term' qid = wd_item.write(login=self.login_obj) if go_id not in self.local_qid_onto_map: self.local_qid_onto_map[go_id] = { 'qid': qid, 'had_root_write': False, } if go_id == current_root_id: self.local_qid_onto_map[go_id]['had_root_write'] = True self.local_qid_onto_map[go_id]['parents'] = list(parents) self.local_qid_onto_map[go_id]['children'] = list(children) current_node_qids.append(qid) print('QID created or retrieved', qid) PBB_Core.WDItemEngine.log( 'INFO', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id='{}:{}'.format(self.ontology, go_id), exception_type='', message='success{}'.format(new_msg), wd_id=qid, duration=time.time() - start)) return qid except Exception as e: print(e) PBB_Core.WDItemEngine.log( 'ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}' .format(main_data_id='{}:{}'.format(self.ontology, go_id), exception_type=type(e), message=e.__str__(), wd_id='', duration=time.time() - start)) return None