def query(form_params): namespaces = get_namespaces() sparql_store = SPARQLStore("https://yago-knowledge.org/sparql/query") query_string = prepare_query(form_params) result = sparql_store.query(query_string, initNs=namespaces) # for row in list(result): # print(row) return result
def sparql(self, query, sites=None): if sites is None: res = self._sparql_endpoint.query(query) return pd.DataFrame.from_records( list(res), columns=[str(c) for c in res.vars]) dfs = [] for site in sites: ep = SPARQLStore(f"{self._endpoint}/sparql?site={site}") res = ep.query(query) df = pd.DataFrame.from_records(list(res), columns=[str(c) for c in res.vars]) df["site"] = site dfs.append(df) if len(dfs) == 0: return pd.DataFrame() return functools.reduce(lambda x, y: pd.concat([x, y], axis=0), dfs)
class RemoteGraph(SparqlQueryable): """ RemoteGraph is used for accessing remote SPARQL endpoints. """ def __init__(self, *, endpoint: str): self.graph = SPARQLStore(endpoint) def query(self, *, sparql: str) -> Result: """ Query the remote graph using the API endpoint. :param sparql: A string containing valid SPARQL to query the graph. :return: A Result containing the result from calling the SPARQL query. """ try: result = self.graph.query(sparql) except ResultException: # SPARQLStore raises an exception when no result is found result = Graph() return result
def test_counting_graph_and_store_queries(self): q = """ SELECT ?s WHERE { ?s ?p ?o . } LIMIT 5 """ g = Graph("SPARQLStore") g.open(self.path) c = 0 for r in g.query(q): c += 1 assert c == 5, "Graph(\"SPARQLStore\") didn't return 5 records" from rdflib.plugins.stores.sparqlstore import SPARQLStore st = SPARQLStore(query_endpoint=self.path) c = 0 for r in st.query(q): c += 1 assert c == 5, "SPARQLStore() didn't return 5 records"
class NanoPubTripleStore(object): RSA = Namespace(ns_dict['RSA']) HG19 = Namespace(ns_dict['HG19']) NP = Namespace(ns_dict['NP']) def __init__(self, endpoint): self.store = SPARQLStore(endpoint) self.dataset = Dataset() def _get_resources_by_context(self, context): g = self.dataset.graph(context) results = self.store.query("select ?s ?p ?o where {GRAPH <%s> {?s ?p ?o}}" % context) for s, p, o in results: self.dataset.add_quad((s, p, o, g)) def get_nanopub(self, base): self._get_resources_by_context(base) self._get_resources_by_context(base + '#assertion') self._get_resources_by_context(base + '#publicationInfo') self._get_resources_by_context(base + '#provenance') bind_namespaces(self.dataset, base) return self.dataset.serialize(base=base, format='trig')
class TwksClient: """ Client for the TWKS server. The client mirrors the primary TWKS API: CRUD operations on nanopublications, querying assertions and nanopublications via SPARQL. """ def __init__(self, *, server_base_url=None): """ Construct a TWKS client. :param server_base_url: base URL of the server, excluding path e.g., http://localhost:8080" """ if not server_base_url: server_base_url = "http://localhost:8080" self.__server_base_url = server_base_url assertions_sparql_query_endpoint = server_base_url + "/sparql/assertions" self.assertions_sparql_store = SPARQLStore(endpoint=assertions_sparql_query_endpoint, ) # query_endpoint=assertions_sparql_query_endpoint) nanopublications_sparql_query_endpoint = server_base_url + "/sparql/nanopublications" self.nanopublications_sparql_store = SPARQLStore(endpoint=nanopublications_sparql_query_endpoint, ) # query_endpoint=nanopublications_sparql_query_endpoint) def delete_nanopublication(self, nanopublication_uri: str) -> bool: """ Delete a nanopublication by its URI :param nanopublication_uri: nanopublication URI :return: True if the nanopublication was deleted, else False """ request = urllib.request.Request(url=self.__nanopublication_url(nanopublication_uri), method="DELETE") try: with urllib.request.urlopen(request) as _: return True except HTTPError as e: if e.code == 404: return False else: raise def dump(self) -> None: """ Tell the server to dump the contents of the store to its (local) disk. """ request = urllib.request.Request(url=self.__server_base_url + "/dump", method="POST") with urllib.request.urlopen(request) as _: return def get_assertions(self, store='default') -> rdflib.Graph: """ Get the union of all assertions in the store, as a new Graph. :param store: store for the returned Graph """ request = urllib.request.Request(url=self.__server_base_url + "/assertions", headers={"Accept": "text/trig"}, method="GET") with urllib.request.urlopen(request) as f: response_trig = f.read() result = rdflib.Graph(store=store) result.parse(format="trig", data=response_trig) return result def get_nanopublication(self, nanopublication_uri: str) -> Optional[Nanopublication]: """ Get a nanopublication by its URI. :param nanopublication_uri: nanopublication URI :return: the nanopublication if present, else None """ request = urllib.request.Request(url=self.__nanopublication_url(nanopublication_uri), headers={"Accept": "text/trig"}) try: with urllib.request.urlopen(request) as f: response_trig = f.read() return Nanopublication.parse(format="trig", data=response_trig) except HTTPError as e: if e.code == 404: return None else: raise def get_ontology_assertions(self, ontology_uris: Set[URIRef], store='default') -> rdflib.Graph: """ Get the union of all assertions in the store, as a new Graph. :param store: store for the returned Graph """ if not ontology_uris: return rdflib.Graph(store=store) url = self.__server_base_url + "/assertions/ontology?" + urlencode( tuple(("uri", str(ontology_uri)) for ontology_uri in ontology_uris)) # print(url) request = urllib.request.Request(url=url, headers={"Accept": "text/trig"}, method="GET") with urllib.request.urlopen(request) as f: response_trig = f.read() result = rdflib.Graph(store=store) result.parse(format="trig", data=response_trig) return result def __nanopublication_url(self, nanopublication_uri: str) -> str: return self.__server_base_url + "/nanopublication/" + quote(str(nanopublication_uri), safe="") def put_nanopublication(self, nanopublication: Nanopublication) -> None: """ Put a nanopublication. :param nanopublication: the nanopublication """ request = urllib.request.Request(url=self.__server_base_url + "/nanopublication", data=nanopublication.serialize(format="trig").encode("utf-8"), headers={"Content-Type": "text/trig; charset=utf-8"}, method="PUT") with urllib.request.urlopen(request) as _: pass def query_assertions(self, query: str, **kwds): """ Query (only) the assertions in the store. :param query: SPARQL query string :param kwds: see rdflib.SPARQLStore.query :return: depends on query type """ return self.assertions_sparql_store.query(query=query, **kwds) def query_nanopublications(self, query: str, **kwds): """ Query all nanopublications in the store. :param query: SPARQL query string :param kwds: see rdflib.SPARQLStore.query :return: depends on query type """ return self.nanopublications_sparql_store.query(query=query, **kwds)
class Client: def __init__(self, endpoint, apikey=None): self._endpoint = endpoint.strip("/") self._sparql_endpoint = SPARQLStore(f"{self._endpoint}/sparql") self._apikey = apikey # self._cache = sqlite3.connect(".mortar_cache.db") # cur = self._cache.cursor() # cur.execute('''CREATE TABLE IF NOT EXISTS downloaded(time TIMESTAMP, query STRING, data BLOB)''') def load_csv(self, filename): logging.info( f"Uploading {filename} to {self._endpoint}/insert_streaming") with open(filename, "r") as f: with io.StringIO() as buf: w = csv.writer(buf) r = csv.DictReader(f) registered = False for row in r: if not registered: source = quote(row["site"]) name = quote(row["label"]) uri = quote(row["id"]) btype = quote(row.get("type", BRICK.Point)) units = quote(row.get("units", "unknown")) registered = True w.writerow([row["time"], row["value"]]) if self._apikey: url = f"{self._endpoint}/insert/csv?source={source}&name={name}&brick_uri={uri}&units={units}&brick_class={btype}&apikey={self._apikey}" else: url = f"{self._endpoint}/insert/csv?source={source}&name={name}&brick_uri={uri}&units={units}&brick_class={btype}" b = io.BytesIO(buf.getvalue().encode("utf8")) resp = requests.post(url, data=b, headers={"Content-Type": "text/csv"}) if not resp.ok: raise Exception(resp.content) def new_stream(self, sourcename, name, units, brick_uri=None, brick_class=None): """ Idempotently registers a new stream and returns a reference to that stream """ d = { "SourceName": sourcename, "Name": name, "Units": units, } if brick_uri is not None: d["BrickURI"] = brick_uri if brick_class is not None: d["BrickClass"] = brick_class logging.info( f"Registering new stream {d} to {self._endpoint}/register_stream") if self._apikey: r = requests.post( f"{self._endpoint}/register_stream?apikey={self._apikey}", json=d) else: r = requests.post(f"{self._endpoint}/register_stream", json=d) if not r.ok: raise Exception(r.content) return Stream(self, d) def add_data(self, sourcename, name, readings): """ Adds data to the stream with the given name Args: sourcename (str): name of the "group" for this name name (str): name of the st ream readings (list): each entry is a (RFC 3339 timestamp, float value) tuple """ logging.info( f"Uploading {len(readings)} readings to {self._endpoint}/insert/data" ) d = { "SourceName": sourcename, "Name": name, "Readings": readings, } if self._apikey: resp = requests.post( f"{self._endpoint}/insert/data?apikey={self._apikey}", json=d) else: resp = requests.post(f"{self._endpoint}/insert/data", json=d) if not resp.ok: raise Exception(resp.content) def load_triple_file(self, source, filename): logging.info( f"Uploading {filename} to {self._endpoint}/insert/metadata") basename = os.path.basename(filename) _, fformat = os.path.splitext(basename) with open(filename, "rb") as f: if self._apikey: resp = requests.post( f"{self._endpoint}/insert/metadata?source={source}&origin={basename}&format={fformat}&apikey={self._apikey}", data=f.read(), ) else: resp = requests.post( f"{self._endpoint}/insert/metadata?source={source}&origin={basename}&format={fformat}", data=f.read(), ) if not resp.ok: raise Exception(resp.content) # def load_graph(self, source, graph): # """ # Args: # graph (rdflib.Graph): graph of triples to insert # """ # logging.info(f"Uploading {filename} to {self._endpoint}/insert/metadata") # basename = os.path.basename(filename) # _, fformat = os.path.splitext(basename) # with open(filename, "rb") as f: # resp = requests.post( # f"{self._endpoint}/insert/metadata?source={source}&origin={basename}&format={format}", # data=f.read(), # ) # if not resp.ok: # raise Exception(resp.content) def sparql(self, query, sites=None): if sites is None: res = self._sparql_endpoint.query(query) return pd.DataFrame.from_records( list(res), columns=[str(c) for c in res.vars]) dfs = [] for site in sites: ep = SPARQLStore(f"{self._endpoint}/sparql?site={site}") res = ep.query(query) df = pd.DataFrame.from_records(list(res), columns=[str(c) for c in res.vars]) df["site"] = site dfs.append(df) if len(dfs) == 0: return pd.DataFrame() return functools.reduce(lambda x, y: pd.concat([x, y], axis=0), dfs) # def get_data_ids(self, ids, source=None, start=None, end=None): # resp = requests.get(f'http://localhost:5001/query?sparql={sparql}&start={start}') # r = pa.ipc.open_stream(resp.content) def data_uris(self, uris, start=None, end=None, agg=None, window=None): parts = [] if start is not None: if isinstance(start, datetime): parts.append( f"start={start.localize().strftime('%Y-%m-%dT%H:%M:%SZ')}") else: parts.append(f"start={start}") else: parts.append("start=1970-01-01T00:00:00Z") for uri in uris: uri = urllib.parse.quote_plus(uri) parts.append(f"uri={uri}") query_string = "&".join(parts) if agg is not None and window is not None: resp = requests.get( f"{self._endpoint}/query?{query_string}&agg={agg}&window={window}" ) else: resp = requests.get(f"{self._endpoint}/query?{query_string}") buf = io.BytesIO(resp.content) # read metadata first r = pa.ipc.open_stream(buf) md = r.read_pandas() # then read data r = pa.ipc.open_stream(buf) df = r.read_pandas() return Dataset(None, md, df) def data_sparql(self, sparql, source=None, start=None, end=None, agg=None, window=None): params = {"sparql": sparql} if agg is not None and window is not None: params["agg"] = agg params["window"] = window if start is not None: if isinstance(start, datetime): params["start"] = start.localize().strftime( "%Y-%m-%dT%H:%M:%SZ") else: params["start"] = start else: params["start"] = "1970-01-01T00:00:00Z" if end is not None: if isinstance(end, datetime): params["end"] = end.localize().strftime("%Y-%m-%dT%H:%M:%SZ") else: params["end"] = end else: params["end"] = "2100-01-01T00:00:00Z" if source is not None: params["source"] = source metadata = self.sparql(sparql, sites=[source] if source is not None else None) resp = requests.get(f"{self._endpoint}/query", params=params) # print(len(resp.content)) buf = pa.decompress(resp.content, decompressed_size=4e10, codec='lz4', asbytes=True) buf = io.BytesIO(buf) # read metadata first r = pa.ipc.open_stream(buf) md = r.read_pandas() # then read data r = pa.ipc.open_stream(buf) df = r.read_pandas() return Dataset(metadata, md, df) def qualify(self, required_queries): """ Calls the Mortar API Qualify command Args: required_queries (list of str): list of queries we want to use to filter sites Returns: sites (list of str): List of site names to be used in a subsequent fetch command """ if isinstance(required_queries, dict): names = list(required_queries.keys()) required_queries = [required_queries[q] for q in names] elif isinstance(required_queries, list): names = None else: raise TypeError("Argument must be a list of queries") res = requests.post(f"{self._endpoint}/qualify", json=required_queries) return QualifyResult(res.json(), names=names) def fetch(self, query): """ Calls the Mortar API Fetch command Args: query (pymortar.FetchRequest): Mortar API fetch struct Returns: views (dict of name to DataFrame): SPARQL query results from FetchRequest views metadata (dict of name to DataFrame): Metadata table describing all data streams dataframes (dict of name to DataFrame): Actual timeseries data """ views = {} dfs = {} metadata = {} for view in query.views: # view.name # view.definition views[view.name] = { "results": self.sparql(view.definition, sites=query.sites), "definition": view.definition, } for df in query.dataFrames: newdfs = [] for ts in df.timeseries: viewquery = views[ts.view]["definition"] datavars = [x.strip("?") for x in ts.dataVars] viewvars = views[ts.view]["results"].columns removevars = set(viewvars).difference(set(datavars)) for var in removevars: viewquery = viewquery.replace(f"?{var}", "", 1) res = self.data_sparql(viewquery, agg=parse_aggfunc(df.aggregation), window=df.window) newdfs.append(res.data) metadata[df.name] = res.streams dfs[df.name] = functools.reduce( lambda x, y: pd.concat([x, y], axis=0), newdfs) return views, metadata, dfs
def fetch_dcat_data(api_url, query): store = SPARQLStore(endpoint=api_url, returnFormat='application/rdf+xml') results = store.query(query, DEBUG=True) return results.graph if results else {}
def test_counting_graph_and_store_queries(self): query = """ SELECT ?s WHERE { ?s ?p ?o . } LIMIT 5 """ g = Graph("SPARQLStore") g.open(self.path) count = 0 response = MockHTTPResponse( 200, "OK", """\ <sparql xmlns="http://www.w3.org/2005/sparql-results#" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.w3.org/2001/sw/DataAccess/rf1/result2.xsd"> <head> <variable name="s"/> </head> <results distinct="false" ordered="true"> <result> <binding name="s"><uri>http://www.openlinksw.com/virtrdf-data-formats#default-iid</uri></binding> </result> <result> <binding name="s"><uri>http://www.openlinksw.com/virtrdf-data-formats#default-iid-nullable</uri></binding> </result> <result> <binding name="s"><uri>http://www.openlinksw.com/virtrdf-data-formats#default-iid-blank</uri></binding> </result> <result> <binding name="s"><uri>http://www.openlinksw.com/virtrdf-data-formats#default-iid-blank-nullable</uri></binding> </result> <result> <binding name="s"><uri>http://www.openlinksw.com/virtrdf-data-formats#default-iid-nonblank</uri></binding> </result> </results> </sparql>""".encode("utf8"), { "Content-Type": ["application/sparql-results+xml; charset=UTF-8"] }, ) self.httpmock.do_get_responses.append(response) result = g.query(query) for _ in result: count += 1 assert count == 5, 'Graph("SPARQLStore") didn\'t return 5 records' from rdflib.plugins.stores.sparqlstore import SPARQLStore st = SPARQLStore(query_endpoint=self.path) count = 0 self.httpmock.do_get_responses.append(response) result = st.query(query) for _ in result: count += 1 assert count == 5, "SPARQLStore() didn't return 5 records" self.assertEqual(self.httpmock.do_get_mock.call_count, 2) for _ in range(2): req = self.httpmock.do_get_requests.pop(0) self.assertRegex(req.path, r"^/sparql") self.assertIn(query, req.path_query["query"][0])