Esempio n. 1
0
def query(form_params):
    namespaces = get_namespaces()
    sparql_store = SPARQLStore("https://yago-knowledge.org/sparql/query")
    query_string = prepare_query(form_params)
    result = sparql_store.query(query_string, initNs=namespaces)
    # for row in list(result):
    #     print(row)
    return result
Esempio n. 2
0
 def sparql(self, query, sites=None):
     if sites is None:
         res = self._sparql_endpoint.query(query)
         return pd.DataFrame.from_records(
             list(res), columns=[str(c) for c in res.vars])
     dfs = []
     for site in sites:
         ep = SPARQLStore(f"{self._endpoint}/sparql?site={site}")
         res = ep.query(query)
         df = pd.DataFrame.from_records(list(res),
                                        columns=[str(c) for c in res.vars])
         df["site"] = site
         dfs.append(df)
     if len(dfs) == 0:
         return pd.DataFrame()
     return functools.reduce(lambda x, y: pd.concat([x, y], axis=0), dfs)
Esempio n. 3
0
class RemoteGraph(SparqlQueryable):
    """
    RemoteGraph is used for accessing remote SPARQL endpoints.
    """
    def __init__(self, *, endpoint: str):
        self.graph = SPARQLStore(endpoint)

    def query(self, *, sparql: str) -> Result:
        """
        Query the remote graph using the API endpoint.

        :param sparql: A string containing valid SPARQL to query the graph.
        :return: A Result containing the result from calling the SPARQL query.
        """
        try:
            result = self.graph.query(sparql)
        except ResultException:
            # SPARQLStore raises an exception when no result is found
            result = Graph()
        return result
Esempio n. 4
0
    def test_counting_graph_and_store_queries(self):
        q = """
            SELECT ?s
            WHERE {
                ?s ?p ?o .
            }
            LIMIT 5
            """
        g = Graph("SPARQLStore")
        g.open(self.path)
        c = 0
        for r in g.query(q):
            c += 1

        assert c == 5, "Graph(\"SPARQLStore\") didn't return 5 records"

        from rdflib.plugins.stores.sparqlstore import SPARQLStore
        st = SPARQLStore(query_endpoint=self.path)
        c = 0
        for r in st.query(q):
            c += 1

        assert c == 5, "SPARQLStore() didn't return 5 records"
Esempio n. 5
0
class NanoPubTripleStore(object):

    RSA = Namespace(ns_dict['RSA'])
    HG19 = Namespace(ns_dict['HG19'])
    NP = Namespace(ns_dict['NP'])

    def __init__(self, endpoint):
        self.store = SPARQLStore(endpoint)
        self.dataset = Dataset()

    def _get_resources_by_context(self, context):
        g = self.dataset.graph(context)
        results = self.store.query("select ?s ?p ?o where {GRAPH <%s> {?s ?p ?o}}" % context)
        for s, p, o in results:
            self.dataset.add_quad((s, p, o, g))

    def get_nanopub(self, base):
        self._get_resources_by_context(base)
        self._get_resources_by_context(base + '#assertion')
        self._get_resources_by_context(base + '#publicationInfo')
        self._get_resources_by_context(base + '#provenance')
        bind_namespaces(self.dataset, base)
        return self.dataset.serialize(base=base, format='trig')
Esempio n. 6
0
class TwksClient:
    """
    Client for the TWKS server.

    The client mirrors the primary TWKS API: CRUD operations on nanopublications, querying assertions and nanopublications via SPARQL.
    """

    def __init__(self, *, server_base_url=None):
        """
        Construct a TWKS client.
        :param server_base_url: base URL of the server, excluding path e.g., http://localhost:8080"
        """
        if not server_base_url:
            server_base_url = "http://localhost:8080"
        self.__server_base_url = server_base_url
        assertions_sparql_query_endpoint = server_base_url + "/sparql/assertions"
        self.assertions_sparql_store = SPARQLStore(endpoint=assertions_sparql_query_endpoint, )
        # query_endpoint=assertions_sparql_query_endpoint)
        nanopublications_sparql_query_endpoint = server_base_url + "/sparql/nanopublications"
        self.nanopublications_sparql_store = SPARQLStore(endpoint=nanopublications_sparql_query_endpoint, )
        # query_endpoint=nanopublications_sparql_query_endpoint)

    def delete_nanopublication(self, nanopublication_uri: str) -> bool:
        """
        Delete a nanopublication by its URI
        :param nanopublication_uri: nanopublication URI
        :return: True if the nanopublication was deleted, else False
        """

        request = urllib.request.Request(url=self.__nanopublication_url(nanopublication_uri), method="DELETE")

        try:
            with urllib.request.urlopen(request) as _:
                return True
        except HTTPError as e:
            if e.code == 404:
                return False
            else:
                raise

    def dump(self) -> None:
        """
        Tell the server to dump the contents of the store to its (local) disk.
        """

        request = urllib.request.Request(url=self.__server_base_url + "/dump", method="POST")

        with urllib.request.urlopen(request) as _:
            return

    def get_assertions(self, store='default') -> rdflib.Graph:
        """
        Get the union of all assertions in the store, as a new Graph.
        :param store: store for the returned Graph
        """

        request = urllib.request.Request(url=self.__server_base_url + "/assertions", headers={"Accept": "text/trig"},
                                         method="GET")

        with urllib.request.urlopen(request) as f:
            response_trig = f.read()
            result = rdflib.Graph(store=store)
            result.parse(format="trig",
                         data=response_trig)
            return result

    def get_nanopublication(self, nanopublication_uri: str) -> Optional[Nanopublication]:
        """
        Get a nanopublication by its URI.
        :param nanopublication_uri: nanopublication URI
        :return: the nanopublication if present, else None
        """

        request = urllib.request.Request(url=self.__nanopublication_url(nanopublication_uri),
                                         headers={"Accept": "text/trig"})

        try:
            with urllib.request.urlopen(request) as f:
                response_trig = f.read()
                return Nanopublication.parse(format="trig",
                                             data=response_trig)
        except HTTPError as e:
            if e.code == 404:
                return None
            else:
                raise

    def get_ontology_assertions(self, ontology_uris: Set[URIRef], store='default') -> rdflib.Graph:
        """
        Get the union of all assertions in the store, as a new Graph.
        :param store: store for the returned Graph
        """

        if not ontology_uris:
            return rdflib.Graph(store=store)

        url = self.__server_base_url + "/assertions/ontology?" + urlencode(
            tuple(("uri", str(ontology_uri)) for ontology_uri in ontology_uris))
        # print(url)

        request = urllib.request.Request(url=url,
                                         headers={"Accept": "text/trig"},
                                         method="GET")

        with urllib.request.urlopen(request) as f:
            response_trig = f.read()
            result = rdflib.Graph(store=store)
            result.parse(format="trig",
                         data=response_trig)
            return result

    def __nanopublication_url(self, nanopublication_uri: str) -> str:
        return self.__server_base_url + "/nanopublication/" + quote(str(nanopublication_uri), safe="")

    def put_nanopublication(self, nanopublication: Nanopublication) -> None:
        """
        Put a nanopublication.

        :param nanopublication: the nanopublication
        """

        request = urllib.request.Request(url=self.__server_base_url + "/nanopublication",
                                         data=nanopublication.serialize(format="trig").encode("utf-8"),
                                         headers={"Content-Type": "text/trig; charset=utf-8"}, method="PUT")
        with urllib.request.urlopen(request) as _:
            pass

    def query_assertions(self, query: str, **kwds):
        """
        Query (only) the assertions in the store.
        :param query: SPARQL query string
        :param kwds: see rdflib.SPARQLStore.query
        :return: depends on query type
        """
        return self.assertions_sparql_store.query(query=query, **kwds)

    def query_nanopublications(self, query: str, **kwds):
        """
        Query all nanopublications in the store.
        :param query: SPARQL query string
        :param kwds: see rdflib.SPARQLStore.query
        :return: depends on query type
        """
        return self.nanopublications_sparql_store.query(query=query, **kwds)
Esempio n. 7
0
class Client:
    def __init__(self, endpoint, apikey=None):
        self._endpoint = endpoint.strip("/")
        self._sparql_endpoint = SPARQLStore(f"{self._endpoint}/sparql")
        self._apikey = apikey
        # self._cache = sqlite3.connect(".mortar_cache.db")
        # cur = self._cache.cursor()
        # cur.execute('''CREATE TABLE IF NOT EXISTS downloaded(time TIMESTAMP, query STRING, data BLOB)''')

    def load_csv(self, filename):
        logging.info(
            f"Uploading {filename} to {self._endpoint}/insert_streaming")
        with open(filename, "r") as f:
            with io.StringIO() as buf:
                w = csv.writer(buf)
                r = csv.DictReader(f)

                registered = False
                for row in r:
                    if not registered:
                        source = quote(row["site"])
                        name = quote(row["label"])
                        uri = quote(row["id"])
                        btype = quote(row.get("type", BRICK.Point))
                        units = quote(row.get("units", "unknown"))
                        registered = True
                    w.writerow([row["time"], row["value"]])

                if self._apikey:
                    url = f"{self._endpoint}/insert/csv?source={source}&name={name}&brick_uri={uri}&units={units}&brick_class={btype}&apikey={self._apikey}"
                else:
                    url = f"{self._endpoint}/insert/csv?source={source}&name={name}&brick_uri={uri}&units={units}&brick_class={btype}"

                b = io.BytesIO(buf.getvalue().encode("utf8"))
                resp = requests.post(url,
                                     data=b,
                                     headers={"Content-Type": "text/csv"})
                if not resp.ok:
                    raise Exception(resp.content)

    def new_stream(self,
                   sourcename,
                   name,
                   units,
                   brick_uri=None,
                   brick_class=None):
        """
        Idempotently registers a new stream and returns a reference to that stream
        """
        d = {
            "SourceName": sourcename,
            "Name": name,
            "Units": units,
        }
        if brick_uri is not None:
            d["BrickURI"] = brick_uri
        if brick_class is not None:
            d["BrickClass"] = brick_class
        logging.info(
            f"Registering new stream {d} to {self._endpoint}/register_stream")
        if self._apikey:
            r = requests.post(
                f"{self._endpoint}/register_stream?apikey={self._apikey}",
                json=d)
        else:
            r = requests.post(f"{self._endpoint}/register_stream", json=d)
        if not r.ok:
            raise Exception(r.content)
        return Stream(self, d)

    def add_data(self, sourcename, name, readings):
        """
        Adds data to the stream with the given name

        Args:
            sourcename (str): name of the "group" for this name
            name (str): name of the st ream
            readings (list): each entry is a (RFC 3339 timestamp, float value) tuple
        """
        logging.info(
            f"Uploading {len(readings)} readings to {self._endpoint}/insert/data"
        )
        d = {
            "SourceName": sourcename,
            "Name": name,
            "Readings": readings,
        }
        if self._apikey:
            resp = requests.post(
                f"{self._endpoint}/insert/data?apikey={self._apikey}", json=d)
        else:
            resp = requests.post(f"{self._endpoint}/insert/data", json=d)
        if not resp.ok:
            raise Exception(resp.content)

    def load_triple_file(self, source, filename):
        logging.info(
            f"Uploading {filename} to {self._endpoint}/insert/metadata")
        basename = os.path.basename(filename)
        _, fformat = os.path.splitext(basename)
        with open(filename, "rb") as f:
            if self._apikey:
                resp = requests.post(
                    f"{self._endpoint}/insert/metadata?source={source}&origin={basename}&format={fformat}&apikey={self._apikey}",
                    data=f.read(),
                )
            else:
                resp = requests.post(
                    f"{self._endpoint}/insert/metadata?source={source}&origin={basename}&format={fformat}",
                    data=f.read(),
                )

            if not resp.ok:
                raise Exception(resp.content)

    # def load_graph(self, source, graph):
    #     """
    #     Args:
    #         graph (rdflib.Graph): graph of triples to insert
    #     """
    #     logging.info(f"Uploading {filename} to {self._endpoint}/insert/metadata")
    #     basename = os.path.basename(filename)
    #     _, fformat = os.path.splitext(basename)
    #     with open(filename, "rb") as f:
    #         resp = requests.post(
    #             f"{self._endpoint}/insert/metadata?source={source}&origin={basename}&format={format}",
    #             data=f.read(),
    #         )
    #         if not resp.ok:
    #             raise Exception(resp.content)

    def sparql(self, query, sites=None):
        if sites is None:
            res = self._sparql_endpoint.query(query)
            return pd.DataFrame.from_records(
                list(res), columns=[str(c) for c in res.vars])
        dfs = []
        for site in sites:
            ep = SPARQLStore(f"{self._endpoint}/sparql?site={site}")
            res = ep.query(query)
            df = pd.DataFrame.from_records(list(res),
                                           columns=[str(c) for c in res.vars])
            df["site"] = site
            dfs.append(df)
        if len(dfs) == 0:
            return pd.DataFrame()
        return functools.reduce(lambda x, y: pd.concat([x, y], axis=0), dfs)

    # def get_data_ids(self, ids, source=None, start=None, end=None):
    #     resp = requests.get(f'http://localhost:5001/query?sparql={sparql}&start={start}')
    #     r = pa.ipc.open_stream(resp.content)

    def data_uris(self, uris, start=None, end=None, agg=None, window=None):
        parts = []
        if start is not None:
            if isinstance(start, datetime):
                parts.append(
                    f"start={start.localize().strftime('%Y-%m-%dT%H:%M:%SZ')}")
            else:
                parts.append(f"start={start}")
        else:
            parts.append("start=1970-01-01T00:00:00Z")

        for uri in uris:
            uri = urllib.parse.quote_plus(uri)
            parts.append(f"uri={uri}")

        query_string = "&".join(parts)
        if agg is not None and window is not None:
            resp = requests.get(
                f"{self._endpoint}/query?{query_string}&agg={agg}&window={window}"
            )
        else:
            resp = requests.get(f"{self._endpoint}/query?{query_string}")

        buf = io.BytesIO(resp.content)
        # read metadata first
        r = pa.ipc.open_stream(buf)
        md = r.read_pandas()
        # then read data
        r = pa.ipc.open_stream(buf)
        df = r.read_pandas()
        return Dataset(None, md, df)

    def data_sparql(self,
                    sparql,
                    source=None,
                    start=None,
                    end=None,
                    agg=None,
                    window=None):
        params = {"sparql": sparql}
        if agg is not None and window is not None:
            params["agg"] = agg
            params["window"] = window
        if start is not None:
            if isinstance(start, datetime):
                params["start"] = start.localize().strftime(
                    "%Y-%m-%dT%H:%M:%SZ")
            else:
                params["start"] = start
        else:
            params["start"] = "1970-01-01T00:00:00Z"

        if end is not None:
            if isinstance(end, datetime):
                params["end"] = end.localize().strftime("%Y-%m-%dT%H:%M:%SZ")
            else:
                params["end"] = end
        else:
            params["end"] = "2100-01-01T00:00:00Z"

        if source is not None:
            params["source"] = source

        metadata = self.sparql(sparql,
                               sites=[source] if source is not None else None)

        resp = requests.get(f"{self._endpoint}/query", params=params)
        # print(len(resp.content))

        buf = pa.decompress(resp.content,
                            decompressed_size=4e10,
                            codec='lz4',
                            asbytes=True)
        buf = io.BytesIO(buf)
        # read metadata first
        r = pa.ipc.open_stream(buf)
        md = r.read_pandas()
        # then read data
        r = pa.ipc.open_stream(buf)
        df = r.read_pandas()
        return Dataset(metadata, md, df)

    def qualify(self, required_queries):
        """
        Calls the Mortar API Qualify command

        Args:
            required_queries (list of str): list of queries we want to use to filter sites

        Returns:
            sites (list of str): List of site names to be used in a subsequent fetch command
        """
        if isinstance(required_queries, dict):
            names = list(required_queries.keys())
            required_queries = [required_queries[q] for q in names]
        elif isinstance(required_queries, list):
            names = None
        else:
            raise TypeError("Argument must be a list of queries")
        res = requests.post(f"{self._endpoint}/qualify", json=required_queries)
        return QualifyResult(res.json(), names=names)

    def fetch(self, query):
        """
        Calls the Mortar API Fetch command

        Args:
            query (pymortar.FetchRequest): Mortar API fetch struct

        Returns:
            views (dict of name to DataFrame): SPARQL query results from FetchRequest views
            metadata (dict of name to DataFrame): Metadata table describing all data streams
            dataframes (dict of name to DataFrame): Actual timeseries data
        """
        views = {}
        dfs = {}
        metadata = {}
        for view in query.views:
            # view.name
            # view.definition
            views[view.name] = {
                "results": self.sparql(view.definition, sites=query.sites),
                "definition": view.definition,
            }
        for df in query.dataFrames:
            newdfs = []
            for ts in df.timeseries:
                viewquery = views[ts.view]["definition"]
                datavars = [x.strip("?") for x in ts.dataVars]
                viewvars = views[ts.view]["results"].columns
                removevars = set(viewvars).difference(set(datavars))
                for var in removevars:
                    viewquery = viewquery.replace(f"?{var}", "", 1)
                res = self.data_sparql(viewquery,
                                       agg=parse_aggfunc(df.aggregation),
                                       window=df.window)
                newdfs.append(res.data)
            metadata[df.name] = res.streams
            dfs[df.name] = functools.reduce(
                lambda x, y: pd.concat([x, y], axis=0), newdfs)
        return views, metadata, dfs
Esempio n. 8
0
def fetch_dcat_data(api_url, query):
    store = SPARQLStore(endpoint=api_url, returnFormat='application/rdf+xml')
    results = store.query(query, DEBUG=True)
    return results.graph if results else {}
Esempio n. 9
0
    def test_counting_graph_and_store_queries(self):
        query = """
            SELECT ?s
            WHERE {
                ?s ?p ?o .
            }
            LIMIT 5
            """
        g = Graph("SPARQLStore")
        g.open(self.path)
        count = 0
        response = MockHTTPResponse(
            200,
            "OK",
            """\
        <sparql xmlns="http://www.w3.org/2005/sparql-results#" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.w3.org/2001/sw/DataAccess/rf1/result2.xsd">
        <head>
        <variable name="s"/>
        </head>
        <results distinct="false" ordered="true">
        <result>
        <binding name="s"><uri>http://www.openlinksw.com/virtrdf-data-formats#default-iid</uri></binding>
        </result>
        <result>
        <binding name="s"><uri>http://www.openlinksw.com/virtrdf-data-formats#default-iid-nullable</uri></binding>
        </result>
        <result>
        <binding name="s"><uri>http://www.openlinksw.com/virtrdf-data-formats#default-iid-blank</uri></binding>
        </result>
        <result>
        <binding name="s"><uri>http://www.openlinksw.com/virtrdf-data-formats#default-iid-blank-nullable</uri></binding>
        </result>
        <result>
        <binding name="s"><uri>http://www.openlinksw.com/virtrdf-data-formats#default-iid-nonblank</uri></binding>
        </result>
        </results>
        </sparql>""".encode("utf8"),
            {
                "Content-Type":
                ["application/sparql-results+xml; charset=UTF-8"]
            },
        )

        self.httpmock.do_get_responses.append(response)

        result = g.query(query)
        for _ in result:
            count += 1

        assert count == 5, 'Graph("SPARQLStore") didn\'t return 5 records'

        from rdflib.plugins.stores.sparqlstore import SPARQLStore

        st = SPARQLStore(query_endpoint=self.path)
        count = 0
        self.httpmock.do_get_responses.append(response)
        result = st.query(query)
        for _ in result:
            count += 1

        assert count == 5, "SPARQLStore() didn't return 5 records"

        self.assertEqual(self.httpmock.do_get_mock.call_count, 2)
        for _ in range(2):
            req = self.httpmock.do_get_requests.pop(0)
            self.assertRegex(req.path, r"^/sparql")
            self.assertIn(query, req.path_query["query"][0])