def on_get(self, req, resp):
     """Execution of the GET prov graph list request."""
     fuseki = GraphStore()
     resp.data = json.dumps(fuseki._prov_list(), indent=1, sort_keys=True)
     resp.content_type = 'application/json'
     resp.status = falcon.HTTP_200
     app_logger.info('Finished operations on /graph/list/prov GET Request.')
Exemple #2
0
def init_api():
    """Create the API endpoint."""
    provservice = falcon.API()

    provservice.add_route('/health', HealthCheck())

    provservice.add_route('/%s/prov' % (api_version), ConstructProvenance())
    provservice.add_route('/%s/prov/show/{provID}' % (api_version),
                          RetrieveProvenance())

    provservice.add_route('/%s/index/prov' % (api_version), IndexProv())

    provservice.add_route('/%s/status/task/{task_id}' % (api_version),
                          RetrieveQueueTask())

    provservice.add_route('/%s/graph/query' % (api_version), GraphSPARQL())
    provservice.add_route('/%s/graph/update' % (api_version), GraphUpdate())
    provservice.add_route('/%s/graph/list' % (api_version), GraphList())
    provservice.add_route('/%s/graph/list/prov' % (api_version), ProvList())
    provservice.add_route('/%s/graph/statistics' % (api_version),
                          GraphStatistics())
    provservice.add_route('/%s/graph' % (api_version), GraphResource())

    app_logger.info('ProvService REST API is running.')
    return provservice
Exemple #3
0
 def _graph_statistics(self):
     """Graph Store statistics agregated."""
     result = {}
     try:
         request = requests.get("{0}stats/{1}".format(
             self.server_address, self.dataset),
                                auth=('admin', self.key))
     except Exception as error:
         app_logger.error('Something is wrong: {0}'.format(error))
         raise
     else:
         stats = request.json()
         result['dataset'] = "/{0}".format(self.dataset)
         result['requests'] = {}
         result['requests']['totalRequests'] = stats['datasets'][
             '/{0}'.format(self.dataset)]['Requests']
         result['requests']['failedRequests'] = stats['datasets'][
             '/{0}'.format(self.dataset)]['RequestsBad']
         triples = 0
         graphs = self._graph_list()
         for e in graphs['graphs']:
             triples += int(e['tripleCount'])
         result['totalTriples'] = triples
         app_logger.info(
             'Constructed statistics list for dataset: "/{0}".'.format(
                 self.dataset))
         return result
    def _do_bulk_index(self, output_list):
        """Construct message for indexing service."""
        message = dict()
        message["provenance"] = dict()
        message["payload"] = dict()
        payload_message = message["payload"]

        payload_message["indexingServiceInput"] = dict()
        payload_message["indexingServiceInput"]["task"] = "replace"
        payload_message["indexingServiceInput"]["targetAlias"] = [prov_alias]
        payload_message["indexingServiceInput"]["sourceData"] = []

        for key, item in output_list.iteritems():
            index_data = dict({
                "useBulk": True,
                "docType": key,
                "inputType": "URI",
                "input": str(item)
            })
            payload_message["indexingServiceInput"]["sourceData"].append(
                index_data)

        frame_rpc = RpcClient(broker['host'], broker['user'], broker['pass'],
                              broker['indexqueue'])
        app_logger.info('Index service message: {0}'.format(
            json.dumps(message)))
        response = frame_rpc.call(json.dumps(message))
        return response
 def _prov_activity(self, base_uri, wf_base_uri):
     """Construct Activity provenance Graph."""
     activity = self.prov_object['activity']
     agent_id = str(self.prov_object['agent']['ID'])
     act_uri = create_uri(ATTXBase, base_uri, agent_id)
     self.graph.add((act_uri, RDF.type, PROV.Activity))
     if activity.get('type'):
         self.graph.add(
             (act_uri, RDF.type, create_uri(ATTXOnto, activity['type'])))
     self._prov_association(act_uri, wf_base_uri)
     if activity.get('title'):
         self.graph.add(
             (act_uri, DCTERMS.title, Literal(activity['title'])))
         self.graph.add((act_uri, RDFS.label, Literal(activity['title'])))
     if activity.get('description'):
         self.graph.add((act_uri, DCTERMS.description,
                         Literal(activity['description'])))
     if activity.get('status'):
         self.graph.add(
             (act_uri, ATTXOnto.hasStatus, Literal(activity['status'])))
     if activity.get('configuration'):
         self.graph.add((act_uri, ATTXOnto.hasConfig,
                         Literal(activity['configuration'])))
     self._prov_time(act_uri)
     if activity.get('communication'):
         self._prov_communication(act_uri, wf_base_uri, base_uri)
     if self.prov_object.get('input'):
         self._prov_usage(base_uri, act_uri, self.prov_object['input'])
     if self.prov_object.get('output'):
         self._prov_generation(base_uri, act_uri,
                               self.prov_object['output'])
     app_logger.info(
         'Constructed provenance for Activity with URI: attx:{0}.'.format(
             base_uri))
 def _construct_provenance(self):
     """Parse Provenance Object and construct Provenance Graph."""
     bind_prefix(self.graph)
     try:
         activity_id = ''.join(
             filter(None, ('activity',
                           str(self.prov_object['context']['activityID']))))
         workflow_id = ''.join(
             filter(None, ('workflow',
                           str(self.prov_object['context']['workflowID']))))
         # if an activity does not include step ID it is an WorkflowExecution
         if self.prov_object['context'].get('stepID'):
             step_id = ''.join(
                 filter(
                     None,
                     ('step', str(self.prov_object['context']['stepID']))))
         else:
             step_id = None
         base_uri = "_".join(
             filter(None, (workflow_id, activity_id, step_id)))
         wf_base_uri = "{0}_{1}".format(workflow_id, activity_id)
         app_logger.info('Constructed base ID: {0}'.format(base_uri))
         if self.prov_object['activity']['type'] == "DescribeStepExecution":
             self._prov_dataset(base_uri)
         else:
             self._prov_activity(base_uri, wf_base_uri)
     except Exception as error:
         app_logger.error(
             'Something is wrong with parsing the prov_object: {0}'.format(
                 error))
         raise error
     else:
         self._store_provenance(wf_base_uri)
         self._store_provenance_graph()
         return self.graph.serialize(format='turtle')
 def on_post(self, req, resp, parsed):
     """Execution of the POST SPARQL query request."""
     fuseki = GraphStore()
     data = fuseki._graph_sparql(parsed['namedGraph'], parsed['query'])
     resp.data = str(data)
     resp.content_type = 'application/xml'  # for now just this type
     resp.status = falcon.HTTP_200
     app_logger.info('Finished operations on /graph/query POST Request.')
 def on_post(self, req, resp, parsed):
     """Execution of the POST update query request."""
     fuseki = GraphStore()
     resp.data = json.dumps(
         fuseki._graph_add(parsed['namedGraph'], parsed['triples']))
     resp.content_type = 'application/json'
     resp.status = falcon.HTTP_200
     app_logger.info('Finished operations on /graph/update POST Request.')
 def on_delete(self, req, resp):
     """Execution of the DELETE named graph request."""
     graph_uri = req.get_param('uri')
     fuseki = GraphStore()
     fuseki._drop_graph(graph_uri)
     resp.content_type = 'plain/text'
     app_logger.info(
         'Deleted/DELETE graph with URI: {0}.'.format(graph_uri))
     resp.status = falcon.HTTP_200
 def on_get(self, req, resp):
     """Execution of the GET named graph request."""
     graph_uri = req.get_param('uri')
     fuseki = GraphStore()
     response = fuseki._graph_retrieve(graph_uri)
     if response is not None:
         resp.data = str(response)
         resp.content_type = 'text/turtle'
         app_logger.info('Retrieved: {0}.'.format(graph_uri))
         resp.status = falcon.HTTP_200
     else:
         raise falcon.HTTPGone()
 def on_get(self, req, resp, task_id):
     """Respond on GET request to queued task endpoint."""
     task_output = AsyncResult(task_id)
     result = {
         'status': task_output.status.lower(),
         'output': str(task_output.result)
     }
     resp.status = falcon.HTTP_200
     resp.body = json.dumps(result)
     app_logger.info(
         'Finished operations on /status/task/{0} GET Request.'.format(
             task_id))
Exemple #12
0
 def on_post(self, req, resp):
     """POST request to index provenance documents in Elasticsearch."""
     try:
         response = execute_indexing()
         result = {'taskID': response.id}
         resp.body = json.dumps(result)
         resp.content_type = 'application/json'
         resp.status = falcon.HTTP_200
     except Exception:
         raise falcon.HTTPBadGateway(
             'Services not found',
             'Could not find Services for either ldFrame, esIndexing or both.'
         )
     app_logger.info('Accepted POST Request for /index/prov.')
Exemple #13
0
 def _graph_health(self):
     """Do the Health check for Graph Store."""
     status = None
     try:
         request = requests.get("{0}ping".format(self.server_address))
     except Exception as error:
         app_logger.error('Something is wrong: {0}'.format(error))
         status = False
         raise ConnectionError(
             'Tried getting graph health, with error {}'.format(error))
     else:
         app_logger.info('Response from Graph Store is {0}'.format(request))
         status = True
     return status
Exemple #14
0
 def _graph_add(self, named_graph, data):
     """Update named graph in Graph Store."""
     headers = {'content-type': "text/turtle", 'cache-control': "no-cache"}
     try:
         request = requests.post("{0}/data?graph={1}".format(
             self.request_address, named_graph),
                                 data=data,
                                 headers=headers)
     except Exception as error:
         app_logger.error('Something is wrong: {0}'.format(error))
         raise
     else:
         app_logger.info('Updated named graph: {0}.'.format(named_graph))
         app_logger.info('Graph Store response: {0}.'.format(request.text))
         return request.text
Exemple #15
0
 def _graph_sparql(self, named_graph, query):
     """Execute SPARQL query on the Graph Store."""
     store_api = "{0}/query".format(self.request_address)
     try:
         sparql = SPARQLWrapper(store_api)
         # add a default graph, though that can also be in the query string
         sparql.addDefaultGraph(named_graph)
         sparql.setQuery(query)
         data = sparql.query().convert()
     except Exception as error:
         app_logger.error('Something is wrong: {0}'.format(error))
         raise
     else:
         app_logger.info(
             'Execture SPARQL query on named graph: {0}.'.format(
                 named_graph))
         return data.toxml()
Exemple #16
0
 def _drop_graph(self, named_graph):
     """Drop named graph from Graph Store."""
     drop_query = quote(" DROP GRAPH <{0}>".format(named_graph))
     payload = "update={0}".format(drop_query)
     headers = {
         'content-type': "application/x-www-form-urlencoded",
         'cache-control': "no-cache"
     }
     try:
         request = requests.post("{0}/update".format(self.request_address),
                                 data=payload,
                                 headers=headers)
     except Exception as error:
         app_logger.error('Something is wrong: {0}'.format(error))
         raise error
     else:
         app_logger.info('Deleted named graph: {0}.'.format(named_graph))
         return request.text
 def handle_message(self, message):
     """Handle provenance message."""
     prov = json.loads(message.body)
     try:
         if isinstance(prov, dict):
             response = prov_task.delay(prov["provenance"], prov["payload"])
             result = {'task_id': response.id}
         elif isinstance(prov, list):
             tasks = []
             for obj in prov:
                 response = prov_task.delay(obj["provenance"],
                                            obj["payload"])
                 tasks.append(response.id)
             result = {'task_id': tasks}
         app_logger.info(
             'Processed provenance message with result {0}.'.format(result))
     except Exception as error:
         app_logger.error('Something went wrong: {0}'.format(error))
Exemple #18
0
 def _graph_retrieve(self, named_graph):
     """Retrieve named graph from Graph Store."""
     try:
         request = requests.get("{0}/data?graph={1}".format(
             self.request_address, named_graph))
     except Exception as error:
         app_logger.error('Something is wrong: {0}'.format(error))
         raise
     else:
         if request.status_code == 200:
             app_logger.info(
                 'Retrived named graph: {0}.'.format(named_graph))
             return request.text
         elif request.status_code == 404:
             app_logger.info(
                 'Retrived named graph: {0} does not exist.'.format(
                     named_graph))
             return None
 def on_post(self, req, resp, parsed):
     """Respond on GET request to map endpoint."""
     if isinstance(parsed, dict):
         response = prov_task.delay(parsed["provenance"], parsed["payload"])
         result = {'taskID': response.id}
         resp.body = json.dumps(result)
         resp.content_type = 'application/json'
     elif isinstance(parsed, list):
         tasks = []
         for obj in parsed:
             response = prov_task.delay(obj["provenance"], obj["payload"])
             tasks.append(response.id)
         result = {'taskID': tasks}
         resp.body = json.dumps(result)
         resp.content_type = 'application/json'
     # result = construct_provenance(parsed["provenance"], parsed["payload"])
     # resp.body = result
     # resp.content_type = 'text/turtle'
     resp.status = falcon.HTTP_200
     app_logger.info('Accepted POST Request for /prov.')
    def _get_framed_provenance(self, graph, prov_doc_type):
        """Construct message for framing service."""
        message = dict()
        message["provenance"] = dict()
        message["payload"] = dict()
        payload_message = message["payload"]

        payload_message["framingServiceInput"] = dict()
        payload_message["framingServiceInput"]["docType"] = prov_doc_type
        payload_message["framingServiceInput"]["ldFrame"] = prov_ld_frame
        payload_message["framingServiceInput"]["sourceData"] = []

        graph_data = dict({"inputType": "Graph", "input": str(graph)})
        payload_message["framingServiceInput"]["sourceData"].append(graph_data)

        frame_rpc = RpcClient(broker['host'], broker['user'], broker['pass'],
                              broker['framequeue'])
        app_logger.info('Frame service message: {0}'.format(
            json.dumps(message)))
        response = frame_rpc.call(json.dumps(message))
        return response
    def start(self):
        """Start the Consumers.

        :return:
        """
        if not self.connection:
            self.create_connection()
        while True:
            try:
                channel = self.connection.channel()
                channel.queue.declare(self.queue)
                channel.basic.consume(self, self.queue, no_ack=False)
                app_logger.info('Connected to queue {0}'.format(self.queue))
                channel.start_consuming(to_tuple=False)
                if not channel.consumer_tags:
                    channel.close()
            except amqpstorm.AMQPError as error:
                app_logger.error('Something went wrong: {0}'.format(error))
                self.create_connection()
            except KeyboardInterrupt:
                self.connection.close()
                break
    def create_connection(self):
        """Create a connection.

        :return:
        """
        attempts = 0
        while True:
            attempts += 1
            try:
                self.connection = Connection(self.hostname, self.username,
                                             self.password)
                app_logger.info(
                    'Established connection with AMQP server {0}'.format(
                        self.connection))
                break
            except amqpstorm.AMQPError as error:
                app_logger.error('Something went wrong: {0}'.format(error))
                if self.max_retries and attempts > self.max_retries:
                    break
                time.sleep(min(attempts * 2, 30))
            except KeyboardInterrupt:
                break
Exemple #23
0
 def _prov_list(self):
     """List Graph Store Provenance Named Graphs."""
     result = {}
     temp_list = []
     list_query = quote(
         "select ?g {{graph ?g {{?s ?p ?o}} filter(regex(str(?g), '{0}'))}} group by ?g"
         .format(ATTXPROVURL))
     try:
         request = requests.get("{0}/sparql?query={1}".format(
             self.request_address, list_query))
     except Exception as error:
         app_logger.error('Something is wrong: {0}'.format(error))
         raise
     else:
         graphs = request.json()
         result['graphsCount'] = len(graphs['results']['bindings'])
         for g in graphs['results']['bindings']:
             temp_list.append(g['g']['value'])
         result['graphs'] = temp_list
         app_logger.info(
             'Constructed list of prov Named Graphs from "/{0}" dataset.'.
             format(self.dataset))
         return result
Exemple #24
0
 def _graph_list(self):
     """List Graph Store Named Graphs."""
     result = {}
     temp_list = []
     list_query = quote(
         "select ?g (count(*) as ?count) {graph ?g {?s ?p ?o}} group by ?g")
     try:
         request = requests.get("{0}/sparql?query={1}".format(
             self.request_address, list_query))
     except Exception as error:
         app_logger.error('Something is wrong: {0}'.format(error))
         raise
     else:
         graphs = request.json()
         result['graphsCount'] = len(graphs['results']['bindings'])
         for g in graphs['results']['bindings']:
             temp_graph = dict([('graphURI', g['g']['value']),
                                ('tripleCount', g['count']['value'])])
             temp_list.append(temp_graph)
         result['graphs'] = temp_list
         app_logger.info(
             'Constructed list of Named Graphs from "/{0}" dataset.'.format(
                 self.dataset))
         return result
    def _index_prov(self):
        """Index provenance in Elasticsearch."""
        fuseki = GraphStore()
        data = fuseki._prov_list()
        bulk_list = dict()
        if len(data['graphs']) > 0:
            for graph in data['graphs']:
                prov_doc_type = str(graph).split(
                    "http://data.hulib.helsinki.fi/prov_", 1)[1]
                frame_response = self._get_framed_provenance(
                    graph, prov_doc_type)
                frame_data = json.loads(frame_response)
                if str(frame_data["payload"]["status"]).lower() == "success":
                    bulk_list[prov_doc_type] = frame_data["payload"][
                        "framingServiceOutput"]["output"]
                    # bulk_list.append()
                else:
                    raise AssertionError("Frame operation did not succeed.")

            self._do_bulk_index(bulk_list)
            app_logger.info(
                'Indexed documents with doc type: {0}'.format(prov_doc_type))
        else:
            app_logger.warning('There are no provenance graphs.')
 def on_get(self, req, resp, provID):
     """Respond on GET request to map endpoint."""
     resp.status = falcon.HTTP_200
     app_logger.info(
         'Finished operations on /prov/{0} GET Request.'.format(provID))