def process_sparql_query_text(resp, raw_query_uri, raw_repo_uri, call_name, extraMetadata): try: query_metadata = gquery.get_metadata(resp) except Exception as e: glogger.error("Could not parse query at {}".format(raw_query_uri)) glogger.error(e) return None tags = query_metadata['tags'] if 'tags' in query_metadata else [] glogger.debug("Read query tags: " + ', '.join(tags)) summary = query_metadata['summary'] if 'summary' in query_metadata else "" glogger.debug("Read query summary: " + summary) description = query_metadata['description'] if 'description' in query_metadata else "" glogger.debug("Read query description: " + description) method = query_metadata['method'].lower() if 'method' in query_metadata else "get" if method not in ['get', 'post', 'head', 'put', 'delete', 'options', 'connect']: method = "get" pagination = query_metadata['pagination'] if 'pagination' in query_metadata else "" glogger.debug("Read query pagination: " + str(pagination)) enums = query_metadata['enumerate'] if 'enumerate' in query_metadata else [] glogger.debug("Read query enumerates: " + ', '.join(enums)) mime = query_metadata['mime'] if 'mime' in query_metadata else "" glogger.debug("Read endpoint dump MIME type: " + str(mime)) # endpoint = query_metadata['endpoint'] if 'endpoint' in query_metadata else "" endpoint = gquery.guess_endpoint_uri("", raw_repo_uri) glogger.debug("Read query endpoint: " + endpoint) try: parameters = gquery.get_parameters(resp, endpoint) except Exception as e: print traceback.print_exc() glogger.error("Could not parse parameters of query {}".format(raw_query_uri)) return None glogger.debug("Read request parameters") # glogger.debug(parameters) # TODO: do something intelligent with the parameters! # As per #3, prefetching IRIs via SPARQL and filling enum params = [] for v, p in parameters.items(): param = {} param['name'] = p['name'] param['type'] = p['type'] param['required'] = p['required'] param['in'] = "query" param['description'] = "A value of type {} that will substitute {} in the original query".format(p['type'], p['original']) if p['enum']: param['enum'] = p['enum'] params.append(param) # If this query allows pagination, add page number as parameter if pagination: pagination_param = {} pagination_param['name'] = "page" pagination_param['type'] = "int" pagination_param['in'] = "query" pagination_param['description'] = "The page number for this paginated query ({} results per page)".format(pagination) params.append(pagination_param) item_properties = {} if query_metadata['type'] != 'SelectQuery': # TODO: Turn this into a nicer thingamajim glogger.warning("This is not a SelectQuery, don't really know what to do!") summary += "WARNING: non-SELECT queries are not really treated properly yet" # just continue with empty item_properties else: # We now know it is a SELECT query for pv in query_metadata['variables']: item_properties[pv] = { "name": pv, "type": "object", "required": ["type", "value"], "properties": { "type": { "type": "string" }, "value": { "type": "string" }, "xml:lang": { "type": "string" }, "datatype": { "type": "string" } } } item = { 'call_name': call_name, 'method': method, 'tags': tags, 'summary': summary, 'description': description, 'params': params, 'item_properties': item_properties, 'query': query_metadata['query'] } for extraField in extraMetadata: if extraField in query_metadata: item[extraField] = query_metadata[extraField] return item
def query(user, repo, query_name, content=None): glogger.debug("Got request at endpoint /" + user + "/" + repo + "/" + query_name) glogger.debug("Request accept header: " + request.headers["Accept"]) raw_repo_uri = static.GITHUB_RAW_BASE_URL + user + "/" + repo + "/master/" # The URIs of all candidates raw_sparql_query_uri = raw_repo_uri + query_name + ".rq" raw_alt_sparql_query_uri = raw_repo_uri + query_name + ".sparql" raw_tpf_query_uri = raw_repo_uri + query_name + ".tpf" raw_sparql_query = requests.get(raw_sparql_query_uri) raw_alt_sparql_query = requests.get(raw_alt_sparql_query_uri) raw_tpf_query = requests.get(raw_tpf_query_uri) # Call name implemented with SPARQL query if raw_sparql_query.status_code == 200 or raw_alt_sparql_query.status_code == 200: if raw_sparql_query.status_code == 200: raw_sparql_query = raw_sparql_query.text else: raw_sparql_query = raw_alt_sparql_query.text endpoint = gquery.guess_endpoint_uri(raw_sparql_query, raw_repo_uri) glogger.debug("=====================================================") glogger.debug("Sending query to SPARQL endpoint: {}".format(endpoint)) glogger.debug("=====================================================") query_metadata = gquery.get_metadata(raw_sparql_query) pagination = query_metadata["pagination"] if "pagination" in query_metadata else "" # Rewrite query using parameter values rewritten_query = gquery.rewrite_query(raw_sparql_query, request.args, endpoint) # Rewrite query using pagination paginated_query = gquery.paginate_query(rewritten_query, request.args) resp = None # If we have a mime field, we load the remote dump and query it locally if "mime" in query_metadata and query_metadata["mime"]: g = Graph() try: query_metadata = gquery.get_metadata(raw_sparql_query) g.parse(endpoint, format=query_metadata["mime"]) except Exception as e: glogger.error(e) results = g.query(paginated_query, result="sparql") # glogger.debug("Results of SPARQL query against locally loaded dump:") # Prepare return format as requested resp_string = "" # glogger.debug("Requested formats: {}".format(request.headers['Accept'])) # if content: # glogger.debug("Requested formats from extension: {}".format(static.mimetypes[content])) if "application/json" in request.headers["Accept"] or ( content and "application/json" in static.mimetypes[content] ): resp_string = results.serialize(format="json") elif "text/csv" in request.headers["Accept"] or (content and "text/csv" in static.mimetypes[content]): resp_string = results.serialize(format="csv") # elif 'text/html' in request.headers['Accept']: # resp_string = results.serialize(format='html') else: return "Unacceptable requested format", 415 del g resp = make_response(resp_string) # If there's no mime type, the endpoint is an actual SPARQL endpoint else: # Preapre HTTP request headers = {"Accept": request.headers["Accept"]} if content: headers = {"Accept": static.mimetypes[content]} data = {"query": paginated_query} response = requests.get(endpoint, params=data, headers=headers) glogger.debug("Response header from endpoint: " + response.headers["Content-Type"]) # Response headers resp = make_response(response.text) resp.headers["Server"] = "grlc/1.0.0" resp.headers["Content-Type"] = request.headers["Content-Type"] # If the query is paginated, set link HTTP headers if pagination: # Get number of total results count = gquery.count_query_results(rewritten_query, endpoint) page = 1 if "page" in request.args: page = int(request.args["page"]) next_url = re.sub("page=[0-9]+", "page={}".format(page + 1), request.url) prev_url = re.sub("page=[0-9]+", "page={}".format(page - 1), request.url) first_url = re.sub("page=[0-9]+", "page=1", request.url) last_url = re.sub("page=[0-9]+", "page={}".format(count / pagination), request.url) else: next_url = request.url + "?page={}".format(page + 1) prev_url = request.url + "?page={}".format(page - 1) first_url = request.url + "?page={}".format(page) last_url = request.url + "?page={}".format(count / pagination) if page == 1: resp.headers["Link"] = "<{}>; rel=next, <{}>; rel=last".format(next_url, last_url) elif page == count / pagination: resp.headers["Link"] = "<{}>; rel=prev, <{}>; rel=first".format(prev_url, first_url) else: resp.headers["Link"] = "<{}>; rel=next, <{}>; rel=prev, <{}>; rel=first, <{}>; rel=last".format( next_url, prev_url, first_url, last_url ) return resp # Call name implemented with TPF query elif raw_tpf_query.status_code == 200: raw_tpf_query = raw_tpf_query.text endpoint = gquery.guess_endpoint_uri(raw_tpf_query, raw_repo_uri) glogger.debug("=====================================================") glogger.debug("Sending query to TPF endpoint: {}".format(endpoint)) glogger.debug("=====================================================") query_metadata = gquery.get_yaml_decorators(raw_tpf_query) # TODO: pagination for TPF # Preapre HTTP request headers = {"Accept": request.headers["Accept"]} if content: headers = {"Accept": static.mimetypes[content]} tpf_list = re.split("\n|=", raw_tpf_query) subject = tpf_list[tpf_list.index("subject") + 1] predicate = tpf_list[tpf_list.index("predicate") + 1] object = tpf_list[tpf_list.index("object") + 1] data = {"subject": subject, "predicate": predicate, "object": object} response = requests.get(endpoint, params=data, headers=headers) glogger.debug("Response header from endpoint: " + response.headers["Content-Type"]) # Response headers resp = make_response(response.text) resp.headers["Server"] = "grlc/1.0.0" resp.headers["Content-Type"] = response.headers["Content-Type"] return resp else: return "Couldn't find a SPARQL, RDF dump, or TPF query with the requested name", 404
def process_sparql_query_text(query_text, raw_repo_uri, call_name, extraMetadata): # We get the endpoint name first, since some query metadata fields (eg enums) require it endpoint, auth = gquery.guess_endpoint_uri(query_text, raw_repo_uri) glogger.debug("Read query endpoint: {}".format(endpoint)) try: query_metadata = gquery.get_metadata(query_text, endpoint) except Exception as e: raw_query_uri = raw_repo_uri + ' / ' + call_name glogger.error("Could not parse query at {}".format(raw_query_uri)) glogger.error(traceback.print_exc()) return None #glogger.debug("Query metadata: {}".format(query_metadata)) tags = query_metadata['tags'] if 'tags' in query_metadata else [] #glogger.debug("Read query tags: {}".format(', '.join(tags))) summary = query_metadata['summary'] if 'summary' in query_metadata else "" #glogger.debug("Read query summary: {}".format(summary)) description = query_metadata[ 'description'] if 'description' in query_metadata else "" #glogger.debug("Read query description: {}".format(description)) method = query_metadata['method'].lower( ) if 'method' in query_metadata else "" if method not in [ 'get', 'post', 'head', 'put', 'delete', 'options', 'connect' ]: method = "" pagination = query_metadata[ 'pagination'] if 'pagination' in query_metadata else "" #glogger.debug("Read query pagination: {}".format(pagination)) # enums = query_metadata['enumerate'] if 'enumerate' in query_metadata else [] # glogger.debug("Read query enumerates: {}".format(', '.join(enums))) mime = query_metadata['mime'] if 'mime' in query_metadata else "" #glogger.debug("Read endpoint dump MIME type: {}".format(mime)) endpoint_in_url = query_metadata[ 'endpoint_in_url'] if 'endpoint_in_url' in query_metadata else True #glogger.debug("Read endpoint in url: {}".format(endpoint_in_url)) # Processing of the parameters params = [] # PV properties item_properties = {} # If this query allows pagination, add page number as parameter if pagination: pagination_param = {} pagination_param['name'] = "page" pagination_param['type'] = "int" pagination_param['in'] = "query" pagination_param[ 'description'] = "The page number for this paginated query ({} results per page)".format( pagination) params.append(pagination_param) if query_metadata['type'] == 'SelectQuery' or query_metadata[ 'type'] == 'ConstructQuery' or query_metadata[ 'type'] == 'InsertData': # try: # parameters = gquery.get_parameters(query_text, endpoint) # except Exception as e: # glogger.error(e) # glogger.error("Could not parse parameters of query {}".format(call_name)) # return None # glogger.debug("Read request parameters") # glogger.debug(parameters) # TODO: do something intelligent with the parameters! # As per #3, prefetching IRIs via SPARQL and filling enum parameters = query_metadata['parameters'] for v, p in list(parameters.items()): param = {} param['name'] = p['name'] param['type'] = p['type'] param['required'] = p['required'] param['in'] = "query" param[ 'description'] = "A value of type {} that will substitute {} in the original query".format( p['type'], p['original']) if p['enum']: param['enum'] = p['enum'] params.append(param) if endpoint_in_url: endpoint_param = {} endpoint_param['name'] = "endpoint" endpoint_param['type'] = "string" endpoint_param['in'] = "query" endpoint_param['description'] = "Alternative endpoint for SPARQL query" endpoint_param['default'] = endpoint params.append(endpoint_param) if query_metadata['type'] == 'SelectQuery': # Fill in the spec for SELECT if not method: method = 'get' # item['item_properties'] = {} for pv in query_metadata['variables']: item_properties[pv] = { "name": pv, "type": "object", "required": ["type", "value"], "properties": { "type": { "type": "string" }, "value": { "type": "string" }, "xml:lang": { "type": "string" }, "datatype": { "type": "string" } } } elif query_metadata['type'] == 'ConstructQuery': if not method: method = 'get' elif query_metadata['type'] == 'UNKNOWN': glogger.warning( "grlc could not parse this query; assuming a plain, non-parametric SELECT in the API spec" ) if not method: method = 'get' else: glogger.warning( "Query of type {} is currently unsupported! Skipping".format( query_metadata['type'])) # Finally: main structure of the callname spec item = { 'call_name': call_name, 'method': method, 'tags': tags, 'summary': summary, 'description': description, 'params': params, 'item_properties': None, # From projection variables, only SelectQuery 'query': query_metadata['query'] } # else: # TODO: process all other kinds of queries # if not method: # method = 'post' # item = { # 'call_name': call_name, # 'method': method, # 'tags': tags, # 'summary': summary, # 'description': description, # 'query': query_metadata['query'] # } for extraField in extraMetadata: if extraField in query_metadata: item[extraField] = query_metadata[extraField] return item
def query(user, repo, query_name, sha=None, content=None): glogger.debug( "-----> Executing call name at /{}/{}/{} on commit {}".format( user, repo, query_name, sha)) glogger.debug("Request accept header: " + request.headers["Accept"]) if user is None and repo is None: loader = LocalLoader() else: loader = GithubLoader(user, repo, sha, None) query, q_type = loader.getTextForName(query_name) # Call name implemented with SPARQL query if q_type == qType['SPARQL']: raw_sparql_query = query raw_repo_uri = loader.getRawRepoUri() endpoint, auth = gquery.guess_endpoint_uri(raw_sparql_query, loader) glogger.debug("=====================================================") glogger.debug("Sending query to SPARQL endpoint: {}".format(endpoint)) glogger.debug("=====================================================") query_metadata = gquery.get_metadata(raw_sparql_query) pagination = query_metadata[ 'pagination'] if 'pagination' in query_metadata else "" # Rewrite query using parameter values rewritten_query = gquery.rewrite_query(raw_sparql_query, request.args, endpoint) # Rewrite query using pagination paginated_query = gquery.paginate_query(rewritten_query, request.args) resp = None # If we have a mime field, we load the remote dump and query it locally if 'mime' in query_metadata and query_metadata['mime']: g = Graph() try: query_metadata = gquery.get_metadata(raw_sparql_query) g.parse(endpoint, format=query_metadata['mime']) except Exception as e: glogger.error(e) results = g.query(paginated_query, result='sparql') # glogger.debug("Results of SPARQL query against locally loaded dump:") # Prepare return format as requested resp_string = "" # glogger.debug("Requested formats: {}".format(request.headers['Accept'])) # if content: # glogger.debug("Requested formats from extension: {}".format(static.mimetypes[content])) if 'application/json' in request.headers['Accept'] or ( content and 'application/json' in static.mimetypes[content]): resp_string = results.serialize(format='json') elif 'text/csv' in request.headers['Accept'] or ( content and 'text/csv' in static.mimetypes[content]): resp_string = results.serialize(format='csv') # elif 'text/html' in request.headers['Accept']: # resp_string = results.serialize(format='html') else: return 'Unacceptable requested format', 415 del g resp = make_response(resp_string) # If there's no mime type, the endpoint is an actual SPARQL endpoint else: # Prepare HTTP request headers = {'Accept': request.headers['Accept']} if content: headers = { 'Accept': static.mimetypes[content], 'Authorization': 'token {}'.format(static.ACCESS_TOKEN) } data = {'query': paginated_query} response = requests.get(endpoint, params=data, headers=headers, auth=auth) glogger.debug('Response header from endpoint: ' + response.headers['Content-Type']) # Response headers resp = make_response(response.text) resp.headers['Server'] = 'grlc/1.0.0' resp.headers['Content-Type'] = response.headers['Content-Type'] # If the query is paginated, set link HTTP headers if pagination: # Get number of total results count = gquery.count_query_results(rewritten_query, endpoint) page = 1 if 'page' in request.args: page = int(request.args['page']) next_url = re.sub("page=[0-9]+", "page={}".format(page + 1), request.url) prev_url = re.sub("page=[0-9]+", "page={}".format(page - 1), request.url) first_url = re.sub("page=[0-9]+", "page=1", request.url) last_url = re.sub("page=[0-9]+", "page={}".format(count / pagination), request.url) else: next_url = request.url + "?page={}".format(page + 1) prev_url = request.url + "?page={}".format(page - 1) first_url = request.url + "?page={}".format(page) last_url = request.url + "?page={}".format(count / pagination) if page == 1: resp.headers['Link'] = "<{}>; rel=next, <{}>; rel=last".format( next_url, last_url) elif page == count / pagination: resp.headers[ 'Link'] = "<{}>; rel=prev, <{}>; rel=first".format( prev_url, first_url) else: resp.headers[ 'Link'] = "<{}>; rel=next, <{}>; rel=prev, <{}>; rel=first, <{}>; rel=last".format( next_url, prev_url, first_url, last_url) return resp # Call name implemented with TPF query elif q_type == qType['TPF']: raw_tpf_query = query endpoint, auth = gquery.guess_endpoint_uri(raw_tpf_query, raw_repo_uri) glogger.debug("=====================================================") glogger.debug("Sending query to TPF endpoint: {}".format(endpoint)) glogger.debug("=====================================================") query_metadata = gquery.get_yaml_decorators(raw_tpf_query) # TODO: pagination for TPF # Preapre HTTP request headers = { 'Accept': request.headers['Accept'], 'Authorization': 'token {}'.format(static.ACCESS_TOKEN) } if content: headers = { 'Accept': static.mimetypes[content], 'Authorization': 'token {}'.format(static.ACCESS_TOKEN) } tpf_list = re.split('\n|=', raw_tpf_query) subject = tpf_list[tpf_list.index('subject') + 1] predicate = tpf_list[tpf_list.index('predicate') + 1] object = tpf_list[tpf_list.index('object') + 1] data = {'subject': subject, 'predicate': predicate, 'object': object} response = requests.get(endpoint, params=data, headers=headers, auth=auth) glogger.debug('Response header from endpoint: ' + response.headers['Content-Type']) # Response headers resp = make_response(response.text) resp.headers['Server'] = 'grlc/1.0.0' resp.headers['Content-Type'] = response.headers['Content-Type'] return resp else: return "Couldn't find a SPARQL, RDF dump, or TPF query with the requested name", 404
def process_sparql_query_text(query_text, raw_repo_uri, call_name, extraMetadata): try: query_metadata = gquery.get_metadata(query_text) except Exception as e: raw_query_uri = raw_repo_uri + ' / ' + call_name glogger.error("Could not parse query at {}".format(raw_query_uri)) glogger.error(traceback.print_exc()) return None tags = query_metadata['tags'] if 'tags' in query_metadata else [] glogger.debug("Read query tags: {}".format(', '.join(tags))) summary = query_metadata['summary'] if 'summary' in query_metadata else "" glogger.debug("Read query summary: {}".format(summary)) description = query_metadata[ 'description'] if 'description' in query_metadata else "" glogger.debug("Read query description: {}".format(description)) method = query_metadata['method'].lower( ) if 'method' in query_metadata else "" if method not in [ 'get', 'post', 'head', 'put', 'delete', 'options', 'connect' ]: method = "" pagination = query_metadata[ 'pagination'] if 'pagination' in query_metadata else "" glogger.debug("Read query pagination: {}".format(pagination)) # enums = query_metadata['enumerate'] if 'enumerate' in query_metadata else [] # glogger.debug("Read query enumerates: {}".format(', '.join(enums))) mime = query_metadata['mime'] if 'mime' in query_metadata else "" glogger.debug("Read endpoint dump MIME type: {}".format(mime)) # endpoint = query_metadata['endpoint'] if 'endpoint' in query_metadata else "" endpoint = gquery.guess_endpoint_uri(query_text, raw_repo_uri) glogger.debug("Read query endpoint: {}".format(endpoint)) if query_metadata['type'] == 'SelectQuery': try: parameters = gquery.get_parameters(query_text, endpoint) except Exception as e: glogger.error(e) glogger.error( "Could not parse parameters of query {}".format(call_name)) return None glogger.debug("Read request parameters") # glogger.debug(parameters) # TODO: do something intelligent with the parameters! # As per #3, prefetching IRIs via SPARQL and filling enum params = [] for v, p in list(parameters.items()): param = {} param['name'] = p['name'] param['type'] = p['type'] param['required'] = p['required'] param['in'] = "query" param[ 'description'] = "A value of type {} that will substitute {} in the original query".format( p['type'], p['original']) if p['enum']: param['enum'] = p['enum'] params.append(param) # If this query allows pagination, add page number as parameter if pagination: pagination_param = {} pagination_param['name'] = "page" pagination_param['type'] = "int" pagination_param['in'] = "query" pagination_param[ 'description'] = "The page number for this paginated query ({} results per page)".format( pagination) params.append(pagination_param) if query_metadata['type'] == 'SelectQuery': # We now know it is a SELECT query if not method: method = 'get' item_properties = {} for pv in query_metadata['variables']: item_properties[pv] = { "name": pv, "type": "object", "required": ["type", "value"], "properties": { "type": { "type": "string" }, "value": { "type": "string" }, "xml:lang": { "type": "string" }, "datatype": { "type": "string" } } } item = { 'call_name': call_name, 'method': method, 'tags': tags, 'summary': summary, 'description': description, 'params': params, 'item_properties': item_properties, 'query': query_metadata['query'] } else: # We know it is an UPDATE; ignore params and props if not method: method = 'post' item = { 'call_name': call_name, 'method': method, 'tags': tags, 'summary': summary, 'description': description, 'query': query_metadata['query'] } for extraField in extraMetadata: if extraField in query_metadata: item[extraField] = query_metadata[extraField] return item