def get_predicates(): """ get_predicates Get a list of predicates used in statements issued by the knowledge source :rtype: List[Predicate] """ # Possible types: {'CommonsMedia', 'Time', 'Quantity', 'WikibaseProperty', 'WikibaseItem', 'GlobeCoordinate', # 'String', 'ExternalId', 'Math', 'Monolingualtext', 'TabularData', 'Url', 'GeoShape'} query = """SELECT ?p ?pt ?pLabel ?d ?aliases WHERE { { SELECT ?p ?pt ?d (GROUP_CONCAT(DISTINCT ?alias; separator="|") as ?aliases) WHERE { ?p wikibase:propertyType ?pt . OPTIONAL {?p skos:altLabel ?alias FILTER (LANG (?alias) = "en")} OPTIONAL {?p schema:description ?d FILTER (LANG (?d) = "en") .} } GROUP BY ?p ?pt ?d } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } }""" results = execute_sparql_query(query)['results']['bindings'] results = [{k: v['value'] for k, v in item.items()} for item in results] print(results[0]) items = [{ 'id': "wd:" + x['p'].split("/")[-1], 'name': x['pLabel'], 'definition': x.get("d", ""), 'aliases': x['aliases'].split("|") if x['aliases'] else [], 'ptype': x['pt'].replace("http://wikiba.se/ontology#", "") } for x in results] # note: 'aliases' and 'ptype' are not in the official spec! return items
def get_equiv_item(curie): """ From a curie, get the wikidata item get_equiv_item("PMID:10028264") :param curie: :return: """ try: pid, value = cu.parse_curie(curie) except ValueError as e: print(e) return [] prop_direct = "<http://www.wikidata.org/prop/direct/{}>".format( pid.split("/")[-1]) query_str = "SELECT ?item WHERE {{ ?item {} '{}' }}".format( prop_direct, value) d = execute_sparql_query(query_str)['results']['bindings'] equiv_qids = list( set(chain(*[{v['value'] for k, v in x.items()} for x in d]))) equiv_qids = [ "wd:" + x.replace("http://www.wikidata.org/entity/", "") for x in equiv_qids ] return equiv_qids
def get_all_types(): """ Get all semantic group types, and their counts. :return: {"id": [], "frequency": xx} for all entity types in garbanzo """ agg = {} for (entity_id, group_name) in qid_semgroup.items(): if isinstance(group_name, str): group_name = [group_name] for group in group_name: if entity_id != 'Q5': # Q5 = human, can't do a count query_str = """SELECT (COUNT (DISTINCT ?type) AS ?count) WHERE {{?type wdt:P31 wd:{0}}}""".format( entity_id) agg[entity_id] = { 'sum': int( execute_sparql_query(query_str)['results']['bindings'] [0]['count']['value']), 'group': group } # ret = [{'id': '{} wd:{}'.format(v['group'], k), 'frequency': v['sum']} for k,v in agg.items()] ret = [{ 'id': 'wd:{}'.format(k), 'frequency': v['sum'] } for k, v in agg.items()] return ret
def get_concept_details(qid): """ This will form the "details" for the GET /translator/concepts/{conceptId} endpoint :param qid: :return: """ query_str = """SELECT distinct ?prop ?propLabel ?value ?valueLabel WHERE {{ {} ?p ?value . ?prop wikibase:directClaim ?p . SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" . }} }}""".format(always_curie(qid)) d = execute_sparql_query(query_str)['results']['bindings'] results = [{k: v['value'] for k, v in item.items()} for item in d] for result in results: result['prop'] = result['prop'].replace( "http://www.wikidata.org/entity/", "wd:") result['tag'] = result['prop'] result['value'] = result['value'].replace( "http://www.wikidata.org/entity/", "wd:") return results
def _query_statements(s, t=None, relations=None, direction="f"): """ if direction = f (forward), s is source, t is target if direction = r (reverse), s and t are reversed if t is not given, target is unconstrained if relations is not given, relations are unconstrained """ assert direction in {"f", "r"}, "direction must be 'f' or 'r'" s = set(map(always_curie, s)) t = set(map(always_curie, t)) if t else set() relations = set(map(always_curie, relations)) if relations else set() s_str = " ".join(s) t_str = " ".join(t) r_str = " ".join(relations) if direction == "r": s_str, t_str = t_str, s_str query_str = """ SELECT ?s ?sLabel ?r ?rLabel ?t ?tLabel ?id (GROUP_CONCAT(?stype) as ?stypes) (GROUP_CONCAT(?ttype) as ?ttypes) WHERE {{ {source_filter} {target_filter} {relation_filter} ?s ?propertyclaim ?id . ?r wikibase:claim ?propertyclaim . ?id ?b ?t . OPTIONAL {{?s wdt:P31 ?stype}} OPTIONAL {{?t wdt:P31 ?ttype}} FILTER(regex(str(?b), "http://www.wikidata.org/prop/statement" )) SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" }} }} GROUP BY ?s ?sLabel ?r ?rLabel ?t ?tLabel ?id""" query_str = query_str.format( source_filter="values ?s {" + s_str + "}" if s_str else "", target_filter="values ?t {" + t_str + "}" if t_str else "", relation_filter="values ?r {" + r_str + "}" if r_str else "") d = execute_sparql_query(query_str)['results']['bindings'] results = [{k: v['value'] for k, v in item.items()} for item in d] # remove non item statements results = [ x for x in results if "http://www.wikidata.org/entity/" in x['s'] and "http://www.wikidata.org/entity/" in x['t'] ] for result in results: result['s'] = result['s'].replace("http://www.wikidata.org/entity/", "wd:") result['r'] = result['r'].replace("http://www.wikidata.org/entity/", "wd:") result['t'] = result['t'].replace("http://www.wikidata.org/entity/", "wd:") result['id'] = result['id'].replace( "http://www.wikidata.org/entity/statement/", "wds:").replace("-", "$", 1) sType = [ x.replace("http://www.wikidata.org/entity/", "wd:") for x in result['stypes'].split(" ") ] if result['stypes'] else [] tType = [ x.replace("http://www.wikidata.org/entity/", "wd:") for x in result['ttypes'].split(" ") ] if result['ttypes'] else [] result['sSemanticGroup'] = " ".join( get_semgroups_from_qids(sType)) if sType else "" result['tSemanticGroup'] = " ".join( get_semgroups_from_qids(tType)) if tType else "" results = [x for x in results if x['id'].startswith("wds:Q")] data = [{ 'id': s['id'], 'subject': { 'id': s['s'], 'name': s['sLabel'], 'semanticGroup': s['sSemanticGroup'] }, 'predicate': { 'id': s['r'], 'name': s['rLabel'] }, 'object': { 'id': s['t'], 'name': s['tLabel'], 'semanticGroup': s['tSemanticGroup'] }, } for s in results] #if direction == "r": # for d in data: # d['subject'], d['object'] = d['object'], d['subject'] return data