def async_updateOtherCachedFormats(obj, scheduled_at, endpoint, query, _type, accept): """ Async that updates json, xml, xmlschema exports """ if obj.last_scheduled_at == scheduled_at: timeout = max(getattr(obj, 'timeout', 10), 10) try: new_result = run_with_timeout(timeout, raw_query_and_get_result, endpoint, query, accept=accept) except QueryTimeout: new_result = "" logger.warning("Query received timeout: %s with %s\n %s \n %s", "/".join(obj.getPhysicalPath()), _type, endpoint, query) return fieldName = "sparql_results_cached_" + _type mutator = obj.Schema().getField(fieldName).getMutator(obj) try: result = new_result['result'].read() except Exception: result = "" logger.warn( "Unable to read result from query: %s with %s\n %s \n %s", "/".join(obj.getPhysicalPath()), _type, endpoint, query) mutator(result)
def count_sparql_results(self, timeout, func, *query): """ :param timeout: Sparql query timeout :type timeout: int :param func: query_and_get_result function used to query Sparql :type func: function :param query: Endpoint url and sparql query :type query: tuple :return: Dict with Sparql results :rtype: dict """ endpoint = query[0] sparql_query = query[1] split_query = sparql_query.split("SELECT") # attempt to wrap original select within a count in order to get # row length without having to save them since we are only concerned # with results count and not the results themselves count_query = split_query[0] + "SELECT (COUNT (*) as ?row_count) " \ "WHERE{{ SELECT" + "".join(split_query[1:]) \ + "}}" result_len = 0 try: result = sparql.query(endpoint, count_query, timeout=timeout) fetched_results = result.fetchall() for entry in fetched_results: first_result = entry[0] result_len = int(first_result.value) break except Exception: result = run_with_timeout(timeout, func, *query) result_len = len(result.get('result', {}).get('rows', {})) return result_len
def async_updateOtherCachedFormats(obj, scheduled_at, endpoint, query, _type, accept): """ Async that updates json, xml, xmlschema exports """ if obj.last_scheduled_at == scheduled_at: timeout = max(getattr(obj, 'timeout', 10), 10) try: new_result = run_with_timeout( timeout, raw_query_and_get_result, endpoint, query, accept=accept ) except QueryTimeout: new_result = "" logger.warning( "Query received timeout: %s with %s\n %s \n %s", "/".join(obj.getPhysicalPath()), _type, endpoint, query ) return fieldName = "sparql_results_cached_" + _type mutator = obj.Schema().getField(fieldName).getMutator(obj) try: result = new_result['result'].read() except Exception: result = "" logger.warn( "Unable to read result from query: %s with %s\n %s \n %s", "/".join(obj.getPhysicalPath()), _type, endpoint, query ) mutator(result)
def count_sparql_results(self, timeout, func, *query): """ :param timeout: Sparql query timeout :type timeout: int :param func: query_and_get_result function used to query Sparql :type func: function :param query: Endpoint url and sparql query :type query: tuple :return: Dict with Sparql results :rtype: dict """ endpoint = query[0] sparql_query = query[1] split_query = sparql_query.split("SELECT") # attempt to wrap original select within a count in order to get # row length without having to save them since we are only concerned # with results count and not the results themselves count_query = split_query[0] + "SELECT (COUNT (*) as ?row_count) " \ "WHERE{{ SELECT" + "".join(split_query[1:]) \ + "}}" result_len = 0 try: result = sparql.query(endpoint, count_query, timeout=timeout) fetched_results = result.fetchall() for entry in fetched_results: first_result = entry[0] result_len = int(first_result.value) break except Exception: result = run_with_timeout(timeout, func, *query) result_len = len(result.get('result', {}).get('rows', {})) return result_len
def preview(self): """preview""" tmp_query = self.request.get("sparql_query", "") tmp_query = "\n".join(x for x in tmp_query.splitlines() if not x.strip().startswith("#")) tmp_arg_spec = self.request.get("arg_spec", "") tmp_endpoint = self.request.get("endpoint", "") tmp_timeout = int(self.request.get("timeout", "0")) arg_spec = parse_arg_spec(tmp_arg_spec) missing, arg_values = map_arg_values(arg_spec, self.request.form) error = None if missing: error = "" for missing_arg in missing: error = error + "<div>Argument '%s' missing</div>" % missing_arg else: result = {} data = [] error = None try: m = re.search(r"limit\s(\d+)", tmp_query, re.IGNORECASE) if m: tmp_query = tmp_query[:m.start(1)]+'10'+tmp_query[m.end(1):] else: tmp_query = tmp_query + " LIMIT 5" cooked_query = interpolate_query(tmp_query, arg_values) args = (tmp_endpoint, cooked_query) result, error = {}, None result = run_with_timeout(tmp_timeout, query_and_get_result, *args) data = result.get('result') error = error or result.get('exception') except Exception: import traceback error = traceback.format_exc() if error: return "<blockquote class='sparql-error'> %s </blockquote>" % error result = [] result.append(u"<table class='sparql-results'>") result.append(u"<thead>") result.append(u"<tr>") for var_name in data.get('var_names', []): result.append(u"<th> %s </th>" %var_name) result.append(u"</tr>") result.append(u"</thead>") result.append(u"<tbody>") for row in data.get('rows', []): result.append(u"<tr class='row_0'>") for value in row: try: result.append(u"<td> %s </td>" %cgi.escape(value.n3())) except Exception, err: logger.debug(err) result.append(u"<td> %s </td>" %value) result.append(u"</tr>")
def updateLastWorkingResults(self, **arg_values): """ update cached last working results of a query """ cached_result = self.getSparqlCacheResults() cooked_query = interpolate_query(self.query, arg_values) args = (self.endpoint_url, cooked_query) try: new_result = run_with_timeout( max(getattr(self, 'timeout', 10), 10), query_and_get_result, *args) except QueryTimeout: new_result = {'exception': "query has ran - an timeout has" " been received"} force_save = False if new_result.get("result", {}) != {}: if new_result != cached_result: if len(new_result.get("result", {}).get("rows", {})) > 0: force_save = True else: if len(cached_result.get('result', {}).get('rows', {})) \ == 0: force_save = True pr = getToolByName(self, 'portal_repository') comment = "query has run - no result changes" if force_save: self.setSparqlCacheResults(new_result) new_sparql_results = [] rows = new_result.get('result', {}).get('rows', {}) if rows: for row in rows: for val in row: new_sparql_results.append(unicode(val) + " | ") new_sparql_results[-1] = new_sparql_results[-1][0:-3] new_sparql_results_str = "".join(new_sparql_results) + "\n" self.setSparql_results(new_sparql_results_str) comment = "query has run - result changed" if self.portal_type in pr.getVersionableContentTypes(): comment = comment.encode('utf') try: oldSecurityManager = getSecurityManager() newSecurityManager(None, SpecialUsers.system) pr.save(obj=self, comment=comment) setSecurityManager(oldSecurityManager) except FileTooLargeToVersionError: commands = view.getCommandSet('plone') commands.issuePortalMessage( """Changes Saved. Versioning for this file has been disabled because it is too large.""", msgtype="warn") if new_result.get('exception', None): cached_result['exception'] = new_result['exception'] self.setSparqlCacheResults(cached_result)
def preview(self): """preview""" tmp_query = self.request.get("sparql_query", "") tmp_query = "\n".join(x for x in tmp_query.splitlines() if not x.strip().startswith("#")) tmp_arg_spec = self.request.get("arg_spec", "") tmp_endpoint = self.request.get("endpoint", "") tmp_timeout = int(self.request.get("timeout", "0")) arg_spec = parse_arg_spec(tmp_arg_spec) missing, arg_values = map_arg_values(arg_spec, self.request.form) error = None if missing: error = "" for missing_arg in missing: error = error + "<div>Argument '%s' missing</div>" % missing_arg else: result = [] data = [] error = None try: m = re.search(r"limit\s(\d+)", tmp_query, re.IGNORECASE) if m: tmp_query = tmp_query[:m.start(1)] + '10' + tmp_query[ m.end(1):] else: tmp_query = tmp_query + " LIMIT 5" cooked_query = interpolate_query(tmp_query, arg_values) args = (tmp_endpoint, cooked_query) result, error = {}, None result = run_with_timeout(tmp_timeout, query_and_get_result, *args) data = result.get('result', {}) error = error or result.get('exception') except Exception: import traceback error = traceback.format_exc() if error: return "<code class='sparql-error'><pre> %s </pre></code>" % error result = [u"<table class='sparql-results'>", u"<thead>", u"<tr>"] for var_name in data.get('var_names', []): result.append(u"<th> %s </th>" % var_name) result.append(u"</tr>") result.append(u"</thead>") result.append(u"<tbody>") for row in data.get('rows', []): result.append(u"<tr class='row_0'>") for value in row: try: result.append(u"<td> %s </td>" % cgi.escape(value.n3())) except Exception, err: logger.debug(err) result.append(u"<td> %s </td>" % value) result.append(u"</tr>")
def getQueryResults(self, argument=None): """Returns the results for the arguments's query""" results = [] if argument != None: arg_query = self.getQueryMap()[argument] query_args = (self.context.endpoint_url, arg_query) data = run_with_timeout(10, query_and_get_result, *query_args) if 'result' in data: return data['result']['rows'] return results
def getQueryResults(self, argument=None): """Returns the results for the arguments's query""" results = [] if argument != None: arg_query = self.getQueryMap()[argument] query_args = (self.context.endpoint_url, arg_query) data = run_with_timeout(10, query_and_get_result, *query_args) if 'result' in data: return data['result']['rows'] return results
def __call__(self): self.results = [] self.can_save = False is_post = self.request.method == 'POST' if is_post and ('get_data' in self.request.form): endpoint_url = self.context.getEndpoint_url() query = self.request.form.get('query') if not query: return self.index() cooked_query = interpolate_query(query, {}) args = (endpoint_url, cooked_query) results = run_with_timeout(20, query_and_get_result, *args) print results self.results = sparql2json(results) elif is_post and 'relabel' in self.request.form: blacklist = ['query', 'relabel', 'save'] remap = [(k, v) for k, v in self.request.form.items() if (v and (k not in blacklist))] query = self.request.form.get('query') sm = IStatusMessage(self.request) if not query: sm.add(u"Need a query.", type='warning') return self.index() if len(remap) != len(self.choices): sm.add(u"You don't have enough mapped columns", type='warning') return self.index() else: for rep, sub in remap: rx = r'(\?' + rep + ')(?!\w)' query = re.sub(rx, '?' + sub, query) self.query = query # override the method self.can_save = True elif is_post and 'save' in self.request.form: self.context.setSparql_query(self.query()) return self.request.response.redirect(self.context.absolute_url()) return self.index()
def loadSparql(): """ Load data from mock http """ sparql = Sparql(0) sparql.endpoint_url = "http://localhost:" + str( PORT) + "/sparql-results.xml" sparql.sparql_query = mock_sparql_query sparql.timeout = None sparql.arg_spec = "" args = "" arg_spec = parse_arg_spec(sparql.arg_spec) arg_values = map_arg_values(arg_spec, args)[1] cooked_query = interpolate_query(sparql.query, arg_values) query_args = (sparql.endpoint_url, cooked_query) data = run_with_timeout(10, query_and_get_result, *query_args) return data
def loadSparql(): """ Load data from mock http """ sparql = Sparql(0) sparql.endpoint_url = "http://localhost:"+str(PORT)+"/sparql-results.xml" sparql.sparql_query = mock_sparql_query sparql.timeout = None sparql.arg_spec = "" args = "" arg_spec = parse_arg_spec(sparql.arg_spec) arg_values = map_arg_values(arg_spec, args)[1] cooked_query = interpolate_query(sparql.query, arg_values) query_args = (sparql.endpoint_url, cooked_query) data = run_with_timeout( 10, query_and_get_result, *query_args) return data
def updateLastWorkingResults(self, **arg_values): """ update cached last working results of a query (json exhibit) """ cached_result = self.getSparqlCacheResults() cooked_query = interpolate_query(self.query, arg_values) args = (self.endpoint_url, cooked_query) try: new_result = run_with_timeout( max(getattr(self, 'timeout', 10), 10), query_and_get_result, *args) except QueryTimeout: new_result = { 'exception': "query has ran - an timeout has" " been received" } force_save = False if new_result.get("result", {}) != {}: if new_result != cached_result: if new_result.get("result", {}).get("rows", {}): force_save = True else: if not cached_result.get('result', {}).get('rows', {}): force_save = True pr = getToolByName(self, 'portal_repository') comment = "query has run - no result changes" if force_save: self.setSparqlCacheResults(new_result) self._updateOtherCachedFormats(self.last_scheduled_at, self.endpoint_url, cooked_query) new_sparql_results = [] rows = new_result.get('result', {}).get('rows', {}) if rows: for row in rows: for val in row: new_sparql_results.append(unicode(val) + " | ") new_sparql_results[-1] = new_sparql_results[-1][0:-3] new_sparql_results_str = "".join(new_sparql_results) + "\n" self.setSparql_results(new_sparql_results_str) comment = "query has run - result changed" if self.portal_type in pr.getVersionableContentTypes(): comment = comment.encode('utf') try: oldSecurityManager = getSecurityManager() newSecurityManager(None, SpecialUsers.system) pr.save(obj=self, comment=comment) setSecurityManager(oldSecurityManager) except FileTooLargeToVersionError: commands = view.getCommandSet('plone') commands.issuePortalMessage( """Changes Saved. Versioning for this file has been disabled because it is too large.""", msgtype="warn") if new_result.get('exception', None): cached_result['exception'] = new_result['exception'] self.setSparqlCacheResults(cached_result)