def write_claims_to_item(qid, json_data, wdi_login): """ Write new claims to an item. Args: qid (str): Wikidata Identifier json_data (List[Dict]): Data from request wdi_login (WDLogin): Returns (Response): """ # Build statements data = [ build_statement(claim_data.get('pid'), claim_data.get('value'), claim_data.get('type'), claim_data.get('qualifiers'), claim_data.get('references')) for claim_data in json_data ] props = [statement.prop_nr for statement in data] # Get wikidata item item = WDItemEngine(wd_item_id=qid, mediawiki_api_url=MEDIAWIKI_API_URL, data=data, append_value=props) qid = item.write(wdi_login) return jsonify({ "message": f"Successfully Contributed {len(data)} Statement(s) " f"to Wikidata Item '{item.get_label()}' ({qid}).", "status": "success", })
def _create_statement(self, entity: wdi_core.WDItemEngine, predicate: TripleElement, objct: TripleElement) -> wdi_core.WDItemEngine: statement = objct.to_wdi_datatype(prop_nr=predicate.id) data = [statement] entity.update(data=data, append_value=[predicate.id]) return entity
def _remove_statement(self, entity: wdi_core.WDItemEngine, predicate: TripleElement, _) -> wdi_core.WDItemEngine: statement_to_remove = wdi_core.WDBaseDataType.delete_statement( predicate.id) data = [statement_to_remove] entity.update(data=data) return entity
def _add_related_link_to_entity(self, entity: wdi_core.WDItemEngine, uri: str): """ adds related link which is the original URI to the entity :param entity: wikibase item :param uri: item's URI :return: update the item with the related link prop """ rel_link = wdi_core.WDUrl(value=uri, prop_nr=self._related_link_prop) entity.update([rel_link], append_value=[self._related_link_prop])
def _remove_alias(self, entity: wdi_core.WDItemEngine, objct: LiteralElement) -> wdi_core.WDItemEngine: lang = get_lang_from_literal(objct) logging.debug("Removing alias @%s of %s", lang, entity) curr_aliases = entity.get_aliases(lang) try: curr_aliases.remove(objct.content) entity.set_aliases(curr_aliases, lang, append=False) except ValueError: logging.warning( "Alias %s@%s does not exist for object %s. Skipping removal...", objct.content, lang, entity.wd_item_id) return entity
def parse_node(self, item: wdi_core.WDItemEngine): type_statements = [s for s in item.statements if s.get_prop_nr() == self.type_pid] if len(type_statements) != 1: return None node_template = dict() node_template[':LABEL'] = self.qid_dbxref["Q" + str(type_statements[0].get_value())] node_template['id:ID'] = self.qid_dbxref[item.wd_item_id] node_template['preflabel'] = self.undo_id_parenthesis(item.get_label()) node_template['name'] = item.get_label() node_template['description'] = item.get_description() node_template['synonyms:IGNORE'] = "|".join(item.get_aliases()) return node_template
def second_pass(self): for oparl_object, values in self.missing_links.items(): print("ADDMISSING", oparl_object) wd_item_id = self.id_mapping.get(oparl_object) claims = [] for (key, value) in values: claim = self.create_single_claim(value, self.type_mapping[key]["type"], self.type_mapping[key]["property"]) claims.append(claim) print("CLAIM", claim) wd_item = WDItemEngine(wd_item_id=wd_item_id, item_name=None, domain="", data=claims, server=self.server, base_url_template=self.base_url_template) wd_item.write(self.login) self.save_any("id", self.id_mapping)
def execute_sparql_query(query_text, endpoint='http://avalanche.scripps.edu:9999/bigdata/sparql'): # Execute the qurey result = WDItemEngine.execute_sparql_query(query_text, endpoint=endpoint, as_dataframe=True) # Don't do any processing if empy result if len(result) == 0: return result # Enforce the proper column order and return col_order = get_col_order(query_text) return parse_result_uris(result)[col_order]
def get_property_types(): # {'CommonsMedia', 'Time', 'Quantity', 'WikibaseProperty', 'WikibaseItem', 'GlobeCoordinate', # 'String', 'ExternalId', 'Math', 'Monolingualtext', 'TabularData', 'Url', 'GeoShape'} query = "SELECT ?p ?pt WHERE {?p wikibase:propertyType ?pt}" results = WDItemEngine.execute_sparql_query(query)['results']['bindings'] results = [{k: v['value'] for k, v in item.items()} for item in results] prop_wdtype = { x['p'].split("/")[-1]: x['pt'].split("#")[-1] for x in results } return prop_wdtype
def _try_write(self, entity: wdi_core.WDItemEngine, **kwargs) -> ModificationResult: try: eid = entity.write(self._local_login, **kwargs) return ModificationResult(successful=True, res=eid) except wdi_core.WDApiError as err: logger.warning(err.wd_error_msg['error']) err_code = err.wd_error_msg['error']['code'] msg = err.wd_error_msg['error']['info'] if err_code == ERR_CODE_LANGUAGE: logger.warning("Language was not recognized. Skipping it...") return ModificationResult(successful=False, message=msg)
def search_result_list(string): """ Use wikidataintegrator to generate a list of similar items based on a text search and returns a list of (qid, Label, description, aliases) dictionaries """ result_qid_list = WDItemEngine.get_wd_search_results(string, language=LANG) output = [] for qid in result_qid_list[:10]: item = item_detail_parse(qid, with_claims=False) if item: output.append(item) return output
def check_existing_label(self, label_str): query_file = open('query_templates/CHECK_EXISTING_LABEL_QUERY.rq', 'r') query_template = query_file.read() QUERY = query_template QUERY = QUERY.replace("#QID#", self.Organism_QID).replace("#LABEL#", label_str) results = WDItemEngine.execute_sparql_query( QUERY)['results']['bindings'] query_file.close() if len(results) == 0: return False else: return True
def get_item_json(qid): """ Get item json dictionary from qid Args: qid (str): Wikidata Identifier, ex: "Q1234" Returns: Dict: Returned value of WDItemEngine().wd_json_representation """ try: item = WDItemEngine(wd_item_id=qid) return item.wd_json_representation except (ValueError, ConnectionAbortedError, Exception): logging.exception("Exception reading QID: %s", qid) return None
def get_wikidata_complexes(): """Gets all Wikidata items with a Complex Portal ID property""" get_macromolecular = """ SELECT ?item ?ComplexPortalID WHERE { ?item wdt:P7718 ?ComplexPortalID . }""" wikidata_complexes = WDItemEngine.execute_sparql_query( get_macromolecular, as_dataframe=True).replace({"http://www.wikidata.org/entity/": ""}, regex=True) return wikidata_complexes
def push_elem(self, oparl_object): print("PROESSING", oparl_object["id"]) oparl_id = oparl_object["id"] if oparl_id in self.id_mapping.keys(): wd_item_id = self.id_mapping.get(oparl_id) item_name = None domain = "" else: wd_item_id = "" item_name = oparl_id domain = None claims = self.get_claims(oparl_object) wd_item = WDItemEngine(wd_item_id=wd_item_id, item_name=item_name, domain=domain, data=claims, server=self.server, base_url_template=self.base_url_template) wd_item.set_label(oparl_id) returned = wd_item.write(self.login) self.id_mapping[oparl_id] = returned if item_name: print("CREATED", "http://{}/index.php?title=Item:{}".format(self.server, returned)) else: print("UPDATED", "http://{}/index.php?title=Item:{}".format(self.server, returned))
def index_single_object(self, oparl_object, full_pass): oparl_id = oparl_object.get_id() if full_pass: claims = self.get_claims(oparl_object, debug_url=oparl_id) else: id_claim = WDUrl(value=oparl_id, prop_nr=self.mapping["id"]["property"]) type_claim = WDUrl(value=oparl_object.get_oparl_type(), prop_nr=self.mapping["type"]["property"]) claims = [id_claim, type_claim] if self.url_to_item_id.has(oparl_id): wd_item_id = self.url_to_item_id.get(oparl_id) item_name = None domain = "" else: print("Creating new item") wd_item_id = "" item_name = oparl_id domain = None wd_item = WDItemEngine(wd_item_id=wd_item_id, item_name=item_name, domain=domain, data=claims, server=self.server, base_url_template=self.base_url_template) wd_item.set_label(oparl_id) returned = wd_item.write(self.wikibase_login) self.url_to_item_id.set(oparl_id, returned) print(oparl_id) print("http://{}/index.php?title=Item:{}".format( self.server, returned)) print()
def get_interacted_RNA_references(self): interacted_RNA_references = [] row_nums = 0 query_file = open('ALL_INTERACTED_SRNA_QUERY.rq', 'r') query_template = query_file.read() query_file.close() QUERY = query_template QUERY = QUERY.replace("#QID#", self.QID) results = WDItemEngine.execute_sparql_query( QUERY)['results']['bindings'] if len(results) != 0: for result in results: row_nums += 1 interacted_RNA_references.append([ row_nums, result['rnaLabel']['value'], result['propLabel']['value'], result['targetLabel']['value'], f"{result['quote']['value']}" '</br><a target="_self" href="Article_Viewer.html?article_PMCID=' f"{result['PMCID']['value']}" f""e={urllib.parse.quote_plus(result['quote']['value'])}" '">Read this in the article</a>', '<div class="form-control"><a target="_blank" href="' f"{result['rna']['value']}" '"><img src="static/images/Interact_logo_Wikidata.png" ' 'height="30px" class="rounded"></a></div>' ]) else: return "Query returns nothing." data_tbl_cols = [ '#', 'sRNA', 'Type of Regulation', 'Target Gene', 'Quote', 'Source' ] data_tbl_df = pd.DataFrame(interacted_RNA_references, columns=data_tbl_cols) pd.set_option('display.max_colwidth', -1) data_tbl = data_tbl_df.to_html(index=False, escape=False, bold_rows=False, max_rows=None, max_cols=None, table_id="data_tbl", justify="center") data_tbl = data_tbl.replace('border="1" ', "") data_tbl = data_tbl.replace( 'class="dataframe" ', 'class="display responsive no-wrap" ' 'style="font-family: Courier New; font-size: 13px;"') final_html = f"<div><h4>Referenced items: {self.get_wd_label()}</h4></div>{data_tbl}" return final_html
def search_result_list(search_string): """ Use wikidataintegrator to generate a list of similar items. This is based on a text search and returns a list of (qid, Label, description, aliases) dictionaries. """ result_qid_list = WDItemEngine.get_wd_search_results( search_string=search_string, language=WIKIDATA_LANG, mediawiki_api_url=MEDIAWIKI_API_URL) output = [] for qid in result_qid_list[:10]: item = item_detail_parse(qid, with_claims=False) if item: output.append(item) return output
def get_wd_label(self): query_file = open('Label_Fetch_Query.rq', 'r') query_template = query_file.read() QUERY = query_template QUERY = QUERY.replace("#QID#", self.QID) results = WDItemEngine.execute_sparql_query( QUERY)['results']['bindings'] item = "" if len(results) == 0: print("Query returns no items for the specified Q-ID.") elif len(results) == 1: for result in results: item = result['label']['value'] else: print("Query returns more that Item for the same Q-ID.") query_file.close() return item
def get_item_json(qid): """ Get item json dictionary from qid. Args: qid (str): Wikidata Identifier, ex: "Q1234" Returns: Dict: Returned value of WDItemEngine().wd_json_representation """ try: item = WDItemEngine(wd_item_id=qid, mediawiki_api_url=MEDIAWIKI_API_URL, sparql_endpoint_url=SPARQL_ENDPOINT_URL) return item.wd_json_representation except (ValueError, ConnectionAbortedError, KeyError): logging.exception("Exception reading QID: %s", qid) return None
def search_for_drug(drug_name): query = """SELECT ?item ?label WHERE { SERVICE wikibase:mwapi { bd:serviceParam wikibase:api "EntitySearch" . bd:serviceParam wikibase:endpoint "www.wikidata.org" . bd:serviceParam mwapi:search "***s***" . bd:serviceParam mwapi:language "en" . ?item wikibase:apiOutputItem mwapi:item . ?label wikibase:apiOutputItem mwapi:label } ?item (wdt:P279|wdt:P31) wd:Q11173 . SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } }""".replace("***s***", drug_name) results = [{ k: v['value'].replace("http://www.wikidata.org/entity/", "") for k, v in x.items() } for x in WDItemEngine.execute_sparql_query(query)['results']['bindings']] return results
def get_QID(self, query): item_QID = [] results = WDItemEngine.execute_sparql_query( query)['results']['bindings'] if len(results) == 0: item_QID.append("NOT_FOUND_IN_WD") elif len(results) == 1: for result in results: item_QID.append(result['item']['value'].replace( "http://www.wikidata.org/entity/", "")) else: for result in results: item_QID.append(result['item']['value'].replace( "http://www.wikidata.org/entity/", "")) print( "Warning: Query returns more than one item for the same gene name! Selected: " + result['item']['value']) return item_QID
def get_wikidata_item_by_propertyvalue(property, value): """Gets a Wikidata item for a determined property-value pair Args: property (str): The property to search value (str): The value of said property """ query_result = WDItemEngine.execute_sparql_query( f'SELECT distinct ?item WHERE {{ ?item wdt:{property} "{value}" }}') try: match = query_result["results"]["bindings"][0] except IndexError: print(f"Couldn't find item for {value}") return pd.np.NaN qid = match["item"]["value"] qid = qid.split("/")[4] return qid
def get_QID_for_duplicated(self, locus_tag): item_QID = "" query_file = open('query_templates/FIND_ITEM_BY_LOCUS_TAG.rq', 'r') query_template = query_file.read() QUERY = query_template QUERY = QUERY.replace("#QID#", self.Organism_QID).replace( "#LOCUS_TAG#", locus_tag) results = WDItemEngine.execute_sparql_query( QUERY)['results']['bindings'] if len(results) == 0: print("Query returns no items for the specified Q-ID.") elif len(results) == 1: for result in results: item_QID = result['item']['value'] else: for result in results: item_QID = result['item']['value'] break print("Query returns more that Item for the same Q-ID.") query_file.close() return item_QID
def get_existing(cls): # get existing combinations: query_str = """SELECT ?item ?itemLabel (GROUP_CONCAT(?part; separator=";") as ?f) WHERE { ?item wdt:P527 ?part . ?item wdt:P31|wdt:P279 wd:Q1304270 . SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } GROUP BY ?item ?itemLabel""" results = WDItemEngine.execute_sparql_query( query_str)['results']['bindings'] qid_combo = { x['item']['value'].replace("http://www.wikidata.org/entity/", ""): frozenset([ y.replace("http://www.wikidata.org/entity/", "") for y in x['f']['value'].split(";") ]) for x in results } combo_qid = {v: k for k, v in qid_combo.items()} assert len(combo_qid) == len(qid_combo) cls.combo_qid = combo_qid cls.qid_combo = qid_combo
def get_doid_qid_map(): # get a good QID to DOID map, using exact match only query = """ SELECT distinct ?disease ?doid ?mrt WHERE { ?disease p:P699 ?s_doid . ?s_doid ps:P699 ?doid . OPTIONAL {?s_doid pq:P4390 ?mrt} . } """ df = WDItemEngine.execute_sparql_query(query, as_dataframe=True) df.disease = df.disease.str.replace("http://www.wikidata.org/entity/", "") df = df[df.mrt.isnull() | (df.mrt == "http://www.wikidata.org/entity/Q39893449")] df.drop_duplicates(subset=['disease', 'doid'], inplace=True) # make sure one doid goes with one qid bad1 = df[df.duplicated("disease", keep=False)] bad2 = df[df.duplicated("doid", keep=False)] # get rid of these baddies df = df[~df.index.isin(bad1.index)] df = df[~df.index.isin(bad2.index)] doid_qid = dict(zip(df.doid, df.disease)) return doid_qid
def get_wikidata_do_mesh(): # get mesh xrefs, and including mapping relation type # {'DOID:0050856': {'skos:broadMatch_D019958'}} query = """ select ?item ?doid ?mesh ?mesh_rt where { ?item wdt:P699 ?doid . ?item p:P486 ?mesh_s . ?mesh_s ps:P486 ?mesh . optional { ?mesh_s pq:P4390 ?mesh_rt } }""" results = WDItemEngine.execute_sparql_query(query)['results']['bindings'] results = [{k: v['value'].replace("http://www.wikidata.org/entity/", "") for k, v in item.items()} for item in results] df = pd.DataFrame(results) df['mesh_rt'] = df.apply(lambda row: QID_MAP_REL_TYPE_CURIE[row.mesh_rt] + "_MESH:" + row.mesh, axis=1) df['_item'] = df['item'] r = df.groupby("_item").aggregate(lambda x: set(y for y in x if not pd.isnull(y))).to_dict("records") wd = {list(x['doid'])[0]: x for x in r} wd = {k: v['mesh_rt'] for k, v in wd.items()} wd = {k: v for k, v in wd.items() if v} return wd
def _temp_route_oauth_write_test(): # One-off test to ensure pipes are running, add an alias to WikiDP item identity = identify_user() for key in identity.keys(): logging.info('KEY: %s VALUE: %s', key, identity.get(key)) item = WDItemEngine(wd_item_id="Q51139559") item.set_aliases(['WikiDP Application'], append=True) # verify the api is working by getting this item assert item.get_label() == "Wikidata for Digital Preservation" wdi_login = get_wdi_login() # verify edit token exists, this is what WDI calls assert wdi_login.get_edit_token() assert "user" in identity.get('groups') # verify user in user group # verify user in user group assert "autoconfirmed" in identity.get('groups') assert "edit" in identity.get('rights') # verify user in user group assert "editpage" in identity.get('grants') # verify user in user group updated = item.write(wdi_login) # fails due to no permissions return jsonify(updated)
def item(): wd_item = WDItemEngine( wd_item_id="Q14911732", mediawiki_api_url='https://www.wikidataaaaaa.org/w/api.php', search_only=True) print(wd_item.get_label('en'))
def process_query_string(query): result = WDItemEngine.execute_sparql_query(query) bindings = result['results'].get('bindings') return format_wikidata_bindings(bindings)