def on_get(self, req, resp): """Execution of the GET prov graph list request.""" fuseki = GraphStore() resp.data = json.dumps(fuseki._prov_list(), indent=1, sort_keys=True) resp.content_type = 'application/json' resp.status = falcon.HTTP_200 app_logger.info('Finished operations on /graph/list/prov GET Request.')
def init_api(): """Create the API endpoint.""" provservice = falcon.API() provservice.add_route('/health', HealthCheck()) provservice.add_route('/%s/prov' % (api_version), ConstructProvenance()) provservice.add_route('/%s/prov/show/{provID}' % (api_version), RetrieveProvenance()) provservice.add_route('/%s/index/prov' % (api_version), IndexProv()) provservice.add_route('/%s/status/task/{task_id}' % (api_version), RetrieveQueueTask()) provservice.add_route('/%s/graph/query' % (api_version), GraphSPARQL()) provservice.add_route('/%s/graph/update' % (api_version), GraphUpdate()) provservice.add_route('/%s/graph/list' % (api_version), GraphList()) provservice.add_route('/%s/graph/list/prov' % (api_version), ProvList()) provservice.add_route('/%s/graph/statistics' % (api_version), GraphStatistics()) provservice.add_route('/%s/graph' % (api_version), GraphResource()) app_logger.info('ProvService REST API is running.') return provservice
def _graph_statistics(self): """Graph Store statistics agregated.""" result = {} try: request = requests.get("{0}stats/{1}".format( self.server_address, self.dataset), auth=('admin', self.key)) except Exception as error: app_logger.error('Something is wrong: {0}'.format(error)) raise else: stats = request.json() result['dataset'] = "/{0}".format(self.dataset) result['requests'] = {} result['requests']['totalRequests'] = stats['datasets'][ '/{0}'.format(self.dataset)]['Requests'] result['requests']['failedRequests'] = stats['datasets'][ '/{0}'.format(self.dataset)]['RequestsBad'] triples = 0 graphs = self._graph_list() for e in graphs['graphs']: triples += int(e['tripleCount']) result['totalTriples'] = triples app_logger.info( 'Constructed statistics list for dataset: "/{0}".'.format( self.dataset)) return result
def _do_bulk_index(self, output_list): """Construct message for indexing service.""" message = dict() message["provenance"] = dict() message["payload"] = dict() payload_message = message["payload"] payload_message["indexingServiceInput"] = dict() payload_message["indexingServiceInput"]["task"] = "replace" payload_message["indexingServiceInput"]["targetAlias"] = [prov_alias] payload_message["indexingServiceInput"]["sourceData"] = [] for key, item in output_list.iteritems(): index_data = dict({ "useBulk": True, "docType": key, "inputType": "URI", "input": str(item) }) payload_message["indexingServiceInput"]["sourceData"].append( index_data) frame_rpc = RpcClient(broker['host'], broker['user'], broker['pass'], broker['indexqueue']) app_logger.info('Index service message: {0}'.format( json.dumps(message))) response = frame_rpc.call(json.dumps(message)) return response
def _prov_activity(self, base_uri, wf_base_uri): """Construct Activity provenance Graph.""" activity = self.prov_object['activity'] agent_id = str(self.prov_object['agent']['ID']) act_uri = create_uri(ATTXBase, base_uri, agent_id) self.graph.add((act_uri, RDF.type, PROV.Activity)) if activity.get('type'): self.graph.add( (act_uri, RDF.type, create_uri(ATTXOnto, activity['type']))) self._prov_association(act_uri, wf_base_uri) if activity.get('title'): self.graph.add( (act_uri, DCTERMS.title, Literal(activity['title']))) self.graph.add((act_uri, RDFS.label, Literal(activity['title']))) if activity.get('description'): self.graph.add((act_uri, DCTERMS.description, Literal(activity['description']))) if activity.get('status'): self.graph.add( (act_uri, ATTXOnto.hasStatus, Literal(activity['status']))) if activity.get('configuration'): self.graph.add((act_uri, ATTXOnto.hasConfig, Literal(activity['configuration']))) self._prov_time(act_uri) if activity.get('communication'): self._prov_communication(act_uri, wf_base_uri, base_uri) if self.prov_object.get('input'): self._prov_usage(base_uri, act_uri, self.prov_object['input']) if self.prov_object.get('output'): self._prov_generation(base_uri, act_uri, self.prov_object['output']) app_logger.info( 'Constructed provenance for Activity with URI: attx:{0}.'.format( base_uri))
def _construct_provenance(self): """Parse Provenance Object and construct Provenance Graph.""" bind_prefix(self.graph) try: activity_id = ''.join( filter(None, ('activity', str(self.prov_object['context']['activityID'])))) workflow_id = ''.join( filter(None, ('workflow', str(self.prov_object['context']['workflowID'])))) # if an activity does not include step ID it is an WorkflowExecution if self.prov_object['context'].get('stepID'): step_id = ''.join( filter( None, ('step', str(self.prov_object['context']['stepID'])))) else: step_id = None base_uri = "_".join( filter(None, (workflow_id, activity_id, step_id))) wf_base_uri = "{0}_{1}".format(workflow_id, activity_id) app_logger.info('Constructed base ID: {0}'.format(base_uri)) if self.prov_object['activity']['type'] == "DescribeStepExecution": self._prov_dataset(base_uri) else: self._prov_activity(base_uri, wf_base_uri) except Exception as error: app_logger.error( 'Something is wrong with parsing the prov_object: {0}'.format( error)) raise error else: self._store_provenance(wf_base_uri) self._store_provenance_graph() return self.graph.serialize(format='turtle')
def on_post(self, req, resp, parsed): """Execution of the POST SPARQL query request.""" fuseki = GraphStore() data = fuseki._graph_sparql(parsed['namedGraph'], parsed['query']) resp.data = str(data) resp.content_type = 'application/xml' # for now just this type resp.status = falcon.HTTP_200 app_logger.info('Finished operations on /graph/query POST Request.')
def on_post(self, req, resp, parsed): """Execution of the POST update query request.""" fuseki = GraphStore() resp.data = json.dumps( fuseki._graph_add(parsed['namedGraph'], parsed['triples'])) resp.content_type = 'application/json' resp.status = falcon.HTTP_200 app_logger.info('Finished operations on /graph/update POST Request.')
def on_delete(self, req, resp): """Execution of the DELETE named graph request.""" graph_uri = req.get_param('uri') fuseki = GraphStore() fuseki._drop_graph(graph_uri) resp.content_type = 'plain/text' app_logger.info( 'Deleted/DELETE graph with URI: {0}.'.format(graph_uri)) resp.status = falcon.HTTP_200
def on_get(self, req, resp): """Execution of the GET named graph request.""" graph_uri = req.get_param('uri') fuseki = GraphStore() response = fuseki._graph_retrieve(graph_uri) if response is not None: resp.data = str(response) resp.content_type = 'text/turtle' app_logger.info('Retrieved: {0}.'.format(graph_uri)) resp.status = falcon.HTTP_200 else: raise falcon.HTTPGone()
def on_get(self, req, resp, task_id): """Respond on GET request to queued task endpoint.""" task_output = AsyncResult(task_id) result = { 'status': task_output.status.lower(), 'output': str(task_output.result) } resp.status = falcon.HTTP_200 resp.body = json.dumps(result) app_logger.info( 'Finished operations on /status/task/{0} GET Request.'.format( task_id))
def on_post(self, req, resp): """POST request to index provenance documents in Elasticsearch.""" try: response = execute_indexing() result = {'taskID': response.id} resp.body = json.dumps(result) resp.content_type = 'application/json' resp.status = falcon.HTTP_200 except Exception: raise falcon.HTTPBadGateway( 'Services not found', 'Could not find Services for either ldFrame, esIndexing or both.' ) app_logger.info('Accepted POST Request for /index/prov.')
def _graph_health(self): """Do the Health check for Graph Store.""" status = None try: request = requests.get("{0}ping".format(self.server_address)) except Exception as error: app_logger.error('Something is wrong: {0}'.format(error)) status = False raise ConnectionError( 'Tried getting graph health, with error {}'.format(error)) else: app_logger.info('Response from Graph Store is {0}'.format(request)) status = True return status
def _graph_add(self, named_graph, data): """Update named graph in Graph Store.""" headers = {'content-type': "text/turtle", 'cache-control': "no-cache"} try: request = requests.post("{0}/data?graph={1}".format( self.request_address, named_graph), data=data, headers=headers) except Exception as error: app_logger.error('Something is wrong: {0}'.format(error)) raise else: app_logger.info('Updated named graph: {0}.'.format(named_graph)) app_logger.info('Graph Store response: {0}.'.format(request.text)) return request.text
def _graph_sparql(self, named_graph, query): """Execute SPARQL query on the Graph Store.""" store_api = "{0}/query".format(self.request_address) try: sparql = SPARQLWrapper(store_api) # add a default graph, though that can also be in the query string sparql.addDefaultGraph(named_graph) sparql.setQuery(query) data = sparql.query().convert() except Exception as error: app_logger.error('Something is wrong: {0}'.format(error)) raise else: app_logger.info( 'Execture SPARQL query on named graph: {0}.'.format( named_graph)) return data.toxml()
def _drop_graph(self, named_graph): """Drop named graph from Graph Store.""" drop_query = quote(" DROP GRAPH <{0}>".format(named_graph)) payload = "update={0}".format(drop_query) headers = { 'content-type': "application/x-www-form-urlencoded", 'cache-control': "no-cache" } try: request = requests.post("{0}/update".format(self.request_address), data=payload, headers=headers) except Exception as error: app_logger.error('Something is wrong: {0}'.format(error)) raise error else: app_logger.info('Deleted named graph: {0}.'.format(named_graph)) return request.text
def handle_message(self, message): """Handle provenance message.""" prov = json.loads(message.body) try: if isinstance(prov, dict): response = prov_task.delay(prov["provenance"], prov["payload"]) result = {'task_id': response.id} elif isinstance(prov, list): tasks = [] for obj in prov: response = prov_task.delay(obj["provenance"], obj["payload"]) tasks.append(response.id) result = {'task_id': tasks} app_logger.info( 'Processed provenance message with result {0}.'.format(result)) except Exception as error: app_logger.error('Something went wrong: {0}'.format(error))
def _graph_retrieve(self, named_graph): """Retrieve named graph from Graph Store.""" try: request = requests.get("{0}/data?graph={1}".format( self.request_address, named_graph)) except Exception as error: app_logger.error('Something is wrong: {0}'.format(error)) raise else: if request.status_code == 200: app_logger.info( 'Retrived named graph: {0}.'.format(named_graph)) return request.text elif request.status_code == 404: app_logger.info( 'Retrived named graph: {0} does not exist.'.format( named_graph)) return None
def on_post(self, req, resp, parsed): """Respond on GET request to map endpoint.""" if isinstance(parsed, dict): response = prov_task.delay(parsed["provenance"], parsed["payload"]) result = {'taskID': response.id} resp.body = json.dumps(result) resp.content_type = 'application/json' elif isinstance(parsed, list): tasks = [] for obj in parsed: response = prov_task.delay(obj["provenance"], obj["payload"]) tasks.append(response.id) result = {'taskID': tasks} resp.body = json.dumps(result) resp.content_type = 'application/json' # result = construct_provenance(parsed["provenance"], parsed["payload"]) # resp.body = result # resp.content_type = 'text/turtle' resp.status = falcon.HTTP_200 app_logger.info('Accepted POST Request for /prov.')
def _get_framed_provenance(self, graph, prov_doc_type): """Construct message for framing service.""" message = dict() message["provenance"] = dict() message["payload"] = dict() payload_message = message["payload"] payload_message["framingServiceInput"] = dict() payload_message["framingServiceInput"]["docType"] = prov_doc_type payload_message["framingServiceInput"]["ldFrame"] = prov_ld_frame payload_message["framingServiceInput"]["sourceData"] = [] graph_data = dict({"inputType": "Graph", "input": str(graph)}) payload_message["framingServiceInput"]["sourceData"].append(graph_data) frame_rpc = RpcClient(broker['host'], broker['user'], broker['pass'], broker['framequeue']) app_logger.info('Frame service message: {0}'.format( json.dumps(message))) response = frame_rpc.call(json.dumps(message)) return response
def start(self): """Start the Consumers. :return: """ if not self.connection: self.create_connection() while True: try: channel = self.connection.channel() channel.queue.declare(self.queue) channel.basic.consume(self, self.queue, no_ack=False) app_logger.info('Connected to queue {0}'.format(self.queue)) channel.start_consuming(to_tuple=False) if not channel.consumer_tags: channel.close() except amqpstorm.AMQPError as error: app_logger.error('Something went wrong: {0}'.format(error)) self.create_connection() except KeyboardInterrupt: self.connection.close() break
def create_connection(self): """Create a connection. :return: """ attempts = 0 while True: attempts += 1 try: self.connection = Connection(self.hostname, self.username, self.password) app_logger.info( 'Established connection with AMQP server {0}'.format( self.connection)) break except amqpstorm.AMQPError as error: app_logger.error('Something went wrong: {0}'.format(error)) if self.max_retries and attempts > self.max_retries: break time.sleep(min(attempts * 2, 30)) except KeyboardInterrupt: break
def _prov_list(self): """List Graph Store Provenance Named Graphs.""" result = {} temp_list = [] list_query = quote( "select ?g {{graph ?g {{?s ?p ?o}} filter(regex(str(?g), '{0}'))}} group by ?g" .format(ATTXPROVURL)) try: request = requests.get("{0}/sparql?query={1}".format( self.request_address, list_query)) except Exception as error: app_logger.error('Something is wrong: {0}'.format(error)) raise else: graphs = request.json() result['graphsCount'] = len(graphs['results']['bindings']) for g in graphs['results']['bindings']: temp_list.append(g['g']['value']) result['graphs'] = temp_list app_logger.info( 'Constructed list of prov Named Graphs from "/{0}" dataset.'. format(self.dataset)) return result
def _graph_list(self): """List Graph Store Named Graphs.""" result = {} temp_list = [] list_query = quote( "select ?g (count(*) as ?count) {graph ?g {?s ?p ?o}} group by ?g") try: request = requests.get("{0}/sparql?query={1}".format( self.request_address, list_query)) except Exception as error: app_logger.error('Something is wrong: {0}'.format(error)) raise else: graphs = request.json() result['graphsCount'] = len(graphs['results']['bindings']) for g in graphs['results']['bindings']: temp_graph = dict([('graphURI', g['g']['value']), ('tripleCount', g['count']['value'])]) temp_list.append(temp_graph) result['graphs'] = temp_list app_logger.info( 'Constructed list of Named Graphs from "/{0}" dataset.'.format( self.dataset)) return result
def _index_prov(self): """Index provenance in Elasticsearch.""" fuseki = GraphStore() data = fuseki._prov_list() bulk_list = dict() if len(data['graphs']) > 0: for graph in data['graphs']: prov_doc_type = str(graph).split( "http://data.hulib.helsinki.fi/prov_", 1)[1] frame_response = self._get_framed_provenance( graph, prov_doc_type) frame_data = json.loads(frame_response) if str(frame_data["payload"]["status"]).lower() == "success": bulk_list[prov_doc_type] = frame_data["payload"][ "framingServiceOutput"]["output"] # bulk_list.append() else: raise AssertionError("Frame operation did not succeed.") self._do_bulk_index(bulk_list) app_logger.info( 'Indexed documents with doc type: {0}'.format(prov_doc_type)) else: app_logger.warning('There are no provenance graphs.')
def on_get(self, req, resp, provID): """Respond on GET request to map endpoint.""" resp.status = falcon.HTTP_200 app_logger.info( 'Finished operations on /prov/{0} GET Request.'.format(provID))