def get_graph_of_user(self, username):
        """get all graph of a user

        Parameters
        ----------
        username : string
            Username

        Returns
        -------
        list
            List of graphs
        """
        query_launcher = SparqlQueryLauncher(self.app, self.session)
        query_builder = SparqlQuery(self.app, self.session)

        query = """
        SELECT DISTINCT ?graph
        WHERE {{
            ?graph dc:creator <{}> .
        }}
        """.format(username)

        header, data = query_launcher.process_query(query_builder.prefix_query(query))

        graphs = []
        for result in data:
            graphs.append(result["graph"])
        return graphs
Beispiel #2
0
    def load_graph(self, rdf_graph, tmp_file_name):
        """Load a rdflib graph into the triplestore

        Write rdf to a tmp file, and send the url to this file
        to the triplestore with a LOAD request

        Parameters
        ----------
        rdf_graph : Graph
            rdf graph to load
        tmp_file_name : string
            Path to a tmp file
        """
        sparql = SparqlQueryLauncher(self.app, self.session)

        temp_file_path = '{}/{}'.format(self.ttl_dir, tmp_file_name)

        encoding = 'utf-8' if self.serialization_format != 'nt' else None
        rdf_graph.serialize(format=self.serialization_format,
                            encoding=encoding,
                            destination=temp_file_path)

        # Load the chunk
        sparql.load_data(tmp_file_name, self.file_graph, self.host_url)

        # Remove tmp file
        if not self.settings.getboolean('askomics', 'debug_ttl'):
            os.remove(temp_file_path)
Beispiel #3
0
    def toggle_public(self, graph, public):
        """Change public status of data into the triplestore

        Parameters
        ----------
        graph : string
            Graph to update public status
        public : string
            true or false (string)
        """
        query = '''
        WITH GRAPH <{graph}>
        DELETE {{
            <{graph}> :public ?public .
        }}
        INSERT {{
            <{graph}> :public <{public}> .
        }}
        WHERE {{
            <{graph}> :public ?public .
        }}
        '''.format(graph=graph, public=public)

        query_launcher = SparqlQueryLauncher(self.app, self.session)
        query_launcher.execute_query(self.prefix_query(query))
Beispiel #4
0
    def get_all_graphs(self):
        """get all graph of a user

        Returns
        -------
        list
            List of graphs
        """

        query_launcher = SparqlQueryLauncher(self.app, self.session)
        query_builder = SparqlQuery(self.app, self.session)

        query = """
        SELECT DISTINCT ?graph
        WHERE {{
            ?graph dc:creator ?user .
        }}
        """

        header, data = query_launcher.process_query(
            query_builder.prefix_query(query))

        graphs = []
        for result in data:
            graphs.append(result["graph"])
        return graphs
Beispiel #5
0
    def set_graphs_and_endpoints(self,
                                 entities=None,
                                 graphs=None,
                                 endpoints=None):
        """Get all public and private graphs containing the given entities

        Parameters
        ----------
        entities : list, optional
            list of entity uri
        """
        substrlst = []
        filter_entity_string = ''
        if entities:
            for entity in entities:
                substrlst.append("?entity_uri = <{}>".format(entity))
            filter_entity_string = 'FILTER (' + ' || '.join(substrlst) + ')'

        filter_public_string = 'FILTER (?public = <true>)'
        if 'user' in self.session:
            filter_public_string = 'FILTER (?public = <true> || ?creator = <{}>)'.format(
                self.session["user"]["username"])

        query = '''
        SELECT DISTINCT ?graph ?endpoint
        WHERE {{
          ?graph :public ?public .
          ?graph dc:creator ?creator .
          GRAPH ?graph {{
            ?graph prov:atLocation ?endpoint .
            ?entity_uri a :entity .
          }}
          {}
          {}
        }}
        '''.format(filter_public_string, filter_entity_string)

        query_launcher = SparqlQueryLauncher(self.app, self.session)
        header, results = query_launcher.process_query(
            self.prefix_query(query))
        self.graphs = []
        self.endpoints = []
        for res in results:
            if not graphs or res["graph"] in graphs:
                self.graphs.append(res["graph"])

            # If local triplestore url is not accessible by federetad query engine
            if res["endpoint"] == self.settings.get(
                    'triplestore',
                    'endpoint') and self.local_endpoint_f is not None:
                endpoint = self.local_endpoint_f
            else:
                endpoint = res["endpoint"]

            if not endpoints or endpoint in endpoints:
                self.endpoints.append(endpoint)

        self.endpoints = Utils.unique(self.endpoints)
        self.federated = len(self.endpoints) > 1
Beispiel #6
0
def query(self, session, info):
    """Save the query results in filesystem and db

    Parameters
    ----------
    session : dict
        AskOmics session
    graph_state : dict
        JSON graph state

    Returns
    -------
    dict
        error: True if error, else False
        errorMessage: the error message of error, else an empty string
    """
    try:
        info["celery_id"] = self.request.id
        result = Result(app, session, info, force_no_db=True)

        # Save job in database database
        result.set_celery_id(self.request.id)
        result.update_db_status("started",
                                update_celery=True,
                                update_date=True)

        # launch query
        query = SparqlQuery(app, session, info["graph_state"])

        query.build_query_from_json(for_editor=False)

        headers = query.selects
        results = []
        if query.graphs:
            query_launcher = SparqlQueryLauncher(app,
                                                 session,
                                                 get_result_query=True,
                                                 federated=query.federated,
                                                 endpoints=query.endpoints)
            headers, results = query_launcher.process_query(query.sparql,
                                                            isql_api=True)

        # write result to a file
        file_size = result.save_result_in_file(headers, results)

        # Update database status
        result.update_db_status("success", size=file_size)

    except Exception as e:
        traceback.print_exc(file=sys.stdout)
        trace = traceback.format_exc()
        result.update_db_status("error",
                                error=True,
                                error_message=str(e),
                                traceback=trace)
        result.rollback()
        raise e
        return {'error': True, 'errorMessage': str(e)}
    return {'error': False, 'errorMessage': ''}
Beispiel #7
0
    def create_result(self, has_form=False):
        """Create a result entry in db

        Returns
        -------
        dict
            Result info
        """
        # Query: transcript concerned by DE and included in QTL

        if has_form:
            with open("tests/data/graphState_simple_query_form.json", "r") as file:
                file_content = file.read()

        else:
            with open("tests/data/graphState_simple_query.json", "r") as file:
                file_content = file.read()

        json_query = json.loads(file_content)

        # Get query and endpoints and graphs of the query
        query = SparqlQuery(self.app, self.session, json_query)
        query_launcher = SparqlQueryLauncher(self.app, self.session)
        query.build_query_from_json(preview=False, for_editor=False)

        info = {
            "graph_state": json_query,
            "query": query.sparql,
            "celery_id": '00000000-0000-0000-0000-000000000000',
            "graphs": query.graphs,
            "endpoints": query.endpoints
        }

        # Save job in database database
        result = Result(self.app, self.session, info)

        result.save_in_db()

        # Execute query and write result to file
        headers, results = query_launcher.process_query(query.sparql)
        file_size = result.save_result_in_file(headers, results)

        # Update database status
        result.update_db_status("success", size=file_size)

        return {
            "id": result.id,
            "path": result.file_path,
            "start": result.start,
            "end": result.end,
            "size": file_size
        }
Beispiel #8
0
def get_preview():
    """Get a preview of query

    Returns
    -------
    json
        resultsPreview: Preview of the query results
        headerPreview: Header of the results table
        error: True if error, else False
        errorMessage: the error message of error, else an empty string
    """
    try:
        # If public datasets and queries are protected, dont return anything to unlogged users
        if "user" not in session and current_app.iniconfig.getboolean("askomics", "protect_public"):
            preview = []
            header = []
        else:
            data = request.get_json()
            if not (data and data.get("graphState")):
                return jsonify({
                    'resultsPreview': [],
                    'headerPreview': [],
                    'error': True,
                    'errorMessage': "Missing graphState parameter"
                }), 400

            query = SparqlQuery(current_app, session, data["graphState"], get_graphs=False)
            query.build_query_from_json(preview=True, for_editor=False)

            endpoints = query.endpoints
            federated = query.federated

            header = query.selects
            preview = []
            if query.graphs:
                query_launcher = SparqlQueryLauncher(current_app, session, get_result_query=True, federated=federated, endpoints=endpoints)
                header, preview = query_launcher.process_query(query.sparql)

    except Exception as e:
        traceback.print_exc(file=sys.stdout)
        return jsonify({
            'resultsPreview': [],
            'headerPreview': [],
            'error': True,
            'errorMessage': str(e)
        }), 500
    return jsonify({
        'resultsPreview': preview,
        'headerPreview': header,
        'error': False,
        'errorMessage': ''
    })
Beispiel #9
0
    def set_triples_number(self):
        """Set graph triples number by requesting the triplestore"""
        query = """
        SELECT count(*) AS ?count
        FROM <{}>
        WHERE {{
            ?s ?p ?o .
        }}
        """.format(self.file_graph)

        sparql = SparqlQueryLauncher(self.app, self.session)
        result = sparql.process_query(query)
        self.ntriples = result[1][0]["count"]
Beispiel #10
0
def sparql_query(self, session, info):
    """Save the sparql query results in filesystem and db

    Parameters
    ----------
    session : dict
        AskOmics session
    info : dict
        sparql query

    Returns
    -------
    dict
        error: True if error, else False
        errorMessage: the error message of error, else an empty string
    """
    try:
        info["celery"] = self.request.id
        result = Result(app, session, info, force_no_db=True)

        # Save job in db
        result.set_celery_id(self.request.id)
        result.update_db_status("started", update_celery=True)

        query_launcher = SparqlQueryLauncher(app,
                                             session,
                                             get_result_query=True,
                                             federated=info["federated"],
                                             endpoints=info["endpoints"])
        header, data = query_launcher.process_query(info["sparql_query"],
                                                    isql_api=True)

        # Write results in file
        file_size = result.save_result_in_file(header, data)

        # Update database status
        result.update_db_status("success", size=file_size)

    except Exception as e:
        traceback.print_exc(file=sys.stdout)
        trace = traceback.format_exc()
        result.update_db_status("error",
                                error=True,
                                error_message=str(e),
                                traceback=trace)
        raise e
        return {'error': True, 'errorMessage': str(e)}
    return {'error': False, 'errorMessage': ''}
Beispiel #11
0
    def clean_triplestore(self):
        """remove all test graph in the triplestore"""
        query = '''
        SELECT ?graph
        WHERE {{
            GRAPH ?graph {{ ?s ?p ?o . }}
            FILTER (strStarts(str(?graph), "{}"))

        }}
        '''.format(self.get_config("triplestore", "default_graph"))

        query_launcher = SparqlQueryLauncher(self.app, {})

        header, data = query_launcher.process_query(query)
        for result in data:
            query_launcher.drop_dataset(result["graph"])
Beispiel #12
0
    def integrate(self, public=False):
        """Integrate the file into the triplestore

        Parameters
        ----------
        public : bool, optional
            Integrate in private or public graph
        """
        sparql = SparqlQueryLauncher(self.app, self.session)
        tse = TriplestoreExplorer(self.app, self.session)

        self.public = public

        method = self.settings.get('triplestore', 'upload_method')

        # Load file into a RDF graph
        self.graph_chunk.parse(self.path, format=self.type_dict[self.type])

        # get metadata
        self.set_metadata()

        # Remove metadata from data
        self.delete_metadata_location()

        # insert metadata
        sparql.insert_data(self.graph_metadata, self.file_graph, metadata=True)

        if method == "load":
            # write rdf into a tmpfile and load it
            temp_file_name = 'tmp_{}_{}.{}'.format(Utils.get_random_string(5),
                                                   self.name,
                                                   self.rdf_extention)

            # Try to load data. if failure, wait 5 sec and retry 5 time
            Utils.redo_if_failure(self.log, 5, 5, self.load_graph,
                                  self.graph_chunk, temp_file_name)

        else:
            # Insert
            # Try to insert data. if failure, wait 5 sec and retry 5 time
            Utils.redo_if_failure(self.log, 5, 5, sparql.insert_data,
                                  self.graph_chunk, self.file_graph)

        # Remove chached abstraction
        tse.uncache_abstraction(public=self.public)

        self.set_triples_number()
Beispiel #13
0
def get_preview():
    """Get a preview of query

    Returns
    -------
    json
        resultsPreview: Preview of the query results
        headerPreview: Header of the results table
        error: True if error, else False
        errorMessage: the error message of error, else an empty string
    """
    try:
        data = request.get_json()

        query_builder = SparqlQueryBuilder(current_app, session)

        query = query_builder.build_query_from_json(data["graphState"],
                                                    preview=True,
                                                    for_editor=False)
        endpoints = query_builder.endpoints
        federated = query_builder.federated

        header = query_builder.selects
        preview = []
        if query_builder.graphs:
            query_launcher = SparqlQueryLauncher(current_app,
                                                 session,
                                                 get_result_query=True,
                                                 federated=federated,
                                                 endpoints=endpoints)
            header, preview = query_launcher.process_query(query)

    except Exception as e:
        traceback.print_exc(file=sys.stdout)
        return jsonify({
            'resultsPreview': [],
            'headerPreview': [],
            'error': True,
            'errorMessage': str(e)
        }), 500
    return jsonify({
        'resultsPreview': preview,
        'headerPreview': header,
        'error': False,
        'errorMessage': ''
    })
Beispiel #14
0
 def delete_datasets(self):
     """delete the datasets from the database and the triplestore"""
     sparql = SparqlQueryLauncher(self.app, self.session)
     for dataset in self.datasets:
         # Delete from triplestore
         if dataset.graph_name:
             Utils.redo_if_failure(self.log, 3, 1, sparql.drop_dataset,
                                   dataset.graph_name)
         # Delete from db
         dataset.delete_from_db()
Beispiel #15
0
    def update_base_url(self, graph, old_url, new_url):
        """Update base url for a graph
        Parameters
        ----------
        graph : string
            Graph to update
        old_url : string
            Old base_url
        new_url : string
            New base_url

        Returns
        -------
        list
            List of graphs
        """

        query_launcher = SparqlQueryLauncher(self.app, self.session)
        query_builder = SparqlQuery(self.app, self.session)

        query = """
        WITH <{0}>
        DELETE{{
            ?s ?p ?o
        }}
        INSERT{{
            ?s2 ?p2 ?o2
        }}
        WHERE {{
            ?s ?p ?o
            FILTER(REGEX(?s, '{1}', 'i') || REGEX(?p, '{1}', 'i') || REGEX(?o, '{1}', 'i')) .
            BIND(IF (isURI(?o), URI(REPLACE(STR(?o), '{1}', '{2}')), ?o) AS ?o2) .
            BIND(IF (isURI(?s), URI(REPLACE(STR(?s), '{1}', '{2}')), ?s) AS ?s2) .
            BIND(IF (isURI(?p), URI(REPLACE(STR(?p), '{1}', '{2}')), ?p) AS ?p2) .
        }}
        """.format(graph, old_url, new_url)

        header, data = query_launcher.process_query(
            query_builder.prefix_query(query))
Beispiel #16
0
    def delete_datasets(self):
        """delete the datasets from the database and the triplestore"""
        sparql = SparqlQueryLauncher(self.app, self.session)
        tse = TriplestoreExplorer(self.app, self.session)

        for dataset in self.datasets:
            # Delete from triplestore
            if dataset.graph_name:
                Utils.redo_if_failure(self.log, 3, 1, sparql.drop_dataset,
                                      dataset.graph_name)
            # Delete from db
            dataset.delete_from_db()

            # Uncache abstraction
            tse.uncache_abstraction(public=dataset.public)
Beispiel #17
0
    def delete_user_rdf(self, username):
        """Delete a user rdf graphs

        Delete in DB, TS and filesystem

        Parameters
        ----------
        username : string
            Username to delete
        """
        tse = TriplestoreExplorer(self.app, self.session)
        query_launcher = SparqlQueryLauncher(self.app, self.session)
        graphs = tse.get_graph_of_user(username)
        for graph in graphs:
            Utils.redo_if_failure(self.log, 3, 1, query_launcher.drop_dataset, graph)
Beispiel #18
0
    def integrate(self, public=False):
        """Integrate the file into the triplestore

        Parameters
        ----------
        public : bool, optional
            Integrate in private or public graph
        """
        sparql = SparqlQueryLauncher(self.app, self.session)

        self.public = public

        method = self.settings.get('triplestore', 'upload_method')

        # insert metadata
        sparql.insert_data(self.get_metadata(), self.file_graph, metadata=True)

        if method == "load":
            # cp file into ttl dir
            tmp_file_name = 'tmp_{}_{}.ttl'.format(
                Utils.get_random_string(5),
                self.name,
            )
            temp_file_path = '{}/{}'.format(self.ttl_dir, tmp_file_name)
            copyfile(self.path, temp_file_path)
            # Load the chunk
            sparql.load_data(tmp_file_name, self.file_graph, self.host_url)

            # Remove tmp file
            if not self.settings.getboolean('askomics', 'debug_ttl'):
                os.remove(temp_file_path)
        else:

            with open(self.path) as ttl_file:
                ttl_content = ttl_file.read()
                sparql.insert_ttl_string(ttl_content, self.user_graph)

        self.set_triples_number()
Beispiel #19
0
    def get_abstraction_relations(self):
        """Get user abstraction relations from the triplestore

        Returns
        -------
        list
            Relations
        """
        filter_user = ""
        if self.logged_user():
            filter_user = "******".format(
                self.session["user"]["username"])

        query_launcher = SparqlQueryLauncher(self.app, self.session)
        query_builder = SparqlQueryBuilder(self.app, self.session)

        query = '''
        SELECT DISTINCT ?graph ?entity_uri ?entity_faldo ?entity_label ?node_type ?attribute_uri ?attribute_faldo ?attribute_label ?attribute_range ?property_uri ?property_faldo ?property_label ?range_uri ?category_value_uri ?category_value_label
        WHERE {{
            # Graphs
            ?graph :public ?public .
            ?graph dc:creator ?creator .
            GRAPH ?graph {{
                # Property (relations and categories)
                ?property_uri a owl:ObjectProperty .
                ?property_uri a :AskomicsRelation .
                ?property_uri rdfs:label ?property_label .
                ?property_uri rdfs:domain ?entity_uri .
                ?property_uri rdfs:range ?range_uri .
            }}
            FILTER (
                ?public = <true>{}
            )
        }}
        '''.format(filter_user)

        header, data = query_launcher.process_query(
            query_builder.prefix_query(query))

        relations_list = []
        relations = []

        for result in data:
            # Relation
            if "property_uri" in result:
                rel_tpl = (result["property_uri"], result["entity_uri"],
                           result["range_uri"])
                if rel_tpl not in relations_list:
                    relations_list.append(rel_tpl)
                    relation = {
                        "uri": result["property_uri"],
                        "label": result["property_label"],
                        "graphs": [
                            result["graph"],
                        ],
                        "source": result["entity_uri"],
                        "target": result["range_uri"]
                    }
                    relations.append(relation)
                else:
                    # if graph is diff, append it
                    index_relation = relations_list.index(rel_tpl)
                    if result["graph"] not in relations[index_relation][
                            "graphs"]:
                        relations[index_relation]["graphs"].append(
                            result["graph"])

        return relations
Beispiel #20
0
    def get_startpoints(self):
        """Get public and user startpoints

        Returns
        -------
        list
            Startpoints
        """
        filter_user = ""
        if self.logged_user():
            filter_user = "******".format(
                self.session["user"]["username"])

        query_launcher = SparqlQueryLauncher(self.app, self.session)
        query_builder = SparqlQueryBuilder(self.app, self.session)

        query = '''
        SELECT DISTINCT ?endpoint ?graph ?entity ?entity_label ?creator ?public
        WHERE {{
            ?graph :public ?public .
            ?graph dc:creator ?creator .
            GRAPH ?graph {{
                ?graph prov:atLocation ?endpoint .
                ?entity a :entity .
                ?entity a :startPoint .
                ?entity rdfs:label ?entity_label .
            }}
            FILTER (
                ?public = <true>{}
            )
        }}
        '''.format(filter_user)

        header, data = query_launcher.process_query(
            query_builder.prefix_query(query))

        startpoints = []
        entities = []

        for result in data:

            if result["endpoint"] == self.settings.get("triplestore",
                                                       "endpoint"):
                endpoint_name = "local"
            else:
                try:
                    endpoint_name = tld.get_fld(
                        result["endpoint"]).split('.')[0]
                except Exception:
                    endpoint_name = urlparse(result["endpoint"]).netloc

            if result["entity"] not in entities:
                # new entity
                entities.append(result['entity'])
                startpoint = {
                    "entity":
                    result["entity"],
                    "entity_label":
                    result["entity_label"],
                    "graphs": [{
                        "uri": result["graph"],
                        "public": result["public"],
                        "creator": result["creator"]
                    }],
                    "endpoints": [{
                        "url": result["endpoint"],
                        "name": endpoint_name
                    }],
                    "public":
                    self.str_to_bool(result["public"]),
                    "private":
                    not self.str_to_bool(result["public"])
                }
                startpoints.append(startpoint)
            else:
                # update existing entity
                index = entities.index(result['entity'])
                graph = {
                    "uri": result["graph"],
                    "public": result["public"],
                    "creator": result["creator"]
                }
                startpoints[index]["graphs"].append(graph)
                startpoints[index]["endpoints"].append({
                    "url":
                    result["endpoint"],
                    "name":
                    endpoint_name
                })
                if self.str_to_bool(result["public"]):
                    startpoints[index]["public"] = True
                else:
                    startpoints[index]["private"] = True

        return startpoints
Beispiel #21
0
    def get_abstraction_attributes(self):
        """Get user abstraction attributes from the triplestore

        Returns
        -------
        list
            AskOmics attributes
        """
        filter_user = ""
        if self.logged_user():
            filter_user = "******".format(
                self.session["user"]["username"])

        litterals = ("http://www.w3.org/2001/XMLSchema#string",
                     "http://www.w3.org/2001/XMLSchema#decimal")

        query_launcher = SparqlQueryLauncher(self.app, self.session)
        query_builder = SparqlQueryBuilder(self.app, self.session)

        query = '''
        SELECT DISTINCT ?graph ?entity_uri ?attribute_uri ?attribute_type ?attribute_faldo ?attribute_label ?attribute_range ?category_value_uri ?category_value_label
        WHERE {{
            # Graphs
            ?graph :public ?public .
            ?graph dc:creator ?creator .
            GRAPH ?graph {{
                ?attribute_uri a ?attribute_type .
                VALUES ?attribute_type {{ owl:DatatypeProperty :AskomicsCategory }}
                ?attribute_uri rdfs:label ?attribute_label .
                ?attribute_uri rdfs:domain ?entity_uri .
                ?attribute_uri rdfs:range ?attribute_range .
                # Faldo
                OPTIONAL {{
                    ?attribute_uri a ?attribute_faldo .
                    VALUES ?attribute_faldo {{ askomics:faldoStart askomics:faldoEnd askomics:faldoStrand askomics:faldoReference }}
                }}
                # Categories (DK)
                OPTIONAL {{
                    ?attribute_range askomics:category ?category_value_uri .
                    ?category_value_uri rdfs:label ?category_value_label .
                }}
            }}
            FILTER (
                ?public = <true>{}
            )
        }}
        '''.format(filter_user)

        header, data = query_launcher.process_query(
            query_builder.prefix_query(query))
        attributes_list = []

        attributes = []

        for result in data:
            # Attributes
            if "attribute_uri" in result and "attribute_label" in result and result[
                    "attribute_type"] != "{}AskomicsCategory".format(
                        self.settings.get("triplestore", "prefix")
                    ) and result["attribute_range"] in litterals:
                attr_tpl = (result["attribute_uri"], result["entity_uri"])
                if attr_tpl not in attributes_list:
                    attributes_list.append(attr_tpl)
                    attribute = {
                        "uri":
                        result["attribute_uri"],
                        "label":
                        result["attribute_label"],
                        "graphs": [
                            result["graph"],
                        ],
                        "entityUri":
                        result["entity_uri"],
                        "type":
                        result["attribute_range"],
                        "faldo":
                        result["attribute_faldo"]
                        if "attribute_faldo" in result else None,
                        "categories": []
                    }
                    attributes.append(attribute)
                else:
                    # if graph is different, store it
                    index_attribute = attributes_list.index(attr_tpl)
                    if result["graph"] not in attributes[index_attribute][
                            "graphs"]:
                        attributes[index_attribute]["graphs"].append(
                            result["graph"])

                index_attribute = attributes_list.index(attr_tpl)

            # Categories
            if "attribute_uri" in result and result[
                    "attribute_type"] == "{}AskomicsCategory".format(
                        self.settings.get("triplestore", "prefix")):
                attr_tpl = (result["attribute_uri"], result["entity_uri"])
                if attr_tpl not in attributes_list:
                    attributes_list.append(attr_tpl)
                    attribute = {
                        "uri":
                        result["attribute_uri"],
                        "label":
                        result["attribute_label"],
                        "graphs": [
                            result["graph"],
                        ],
                        "entityUri":
                        result["entity_uri"],
                        "type":
                        result["attribute_type"],
                        "faldo":
                        result["attribute_faldo"]
                        if "attribute_faldo" in result else None,
                        "categories": [{
                            "uri": result["category_value_uri"],
                            "label": result["category_value_label"]
                        }]
                    }
                    attributes.append(attribute)
                else:
                    # if graph diff, store it
                    index_attribute = attributes_list.index(attr_tpl)
                    if result["graph"] not in attributes[index_attribute][
                            "graphs"]:
                        attributes[index_attribute]["graphs"].append(
                            result["graph"])
                    # Store value if new
                    value = {
                        "uri": result["category_value_uri"],
                        "label": result["category_value_label"]
                    }
                    if value not in attributes[index_attribute]["categories"]:
                        attributes[index_attribute]["categories"].append(value)

        return attributes
Beispiel #22
0
    def get_abstraction_entities(self):
        """Get abstraction entities

        Returns
        -------
        list
            List of entities available
        """
        filter_user = ""
        if self.logged_user():
            filter_user = "******".format(
                self.session["user"]["username"])

        query_launcher = SparqlQueryLauncher(self.app, self.session)
        query_builder = SparqlQueryBuilder(self.app, self.session)

        query = '''
        SELECT DISTINCT ?endpoint ?graph ?entity_uri ?entity_type ?entity_faldo ?entity_label ?have_no_label
        WHERE {{
            ?graph :public ?public .
            ?graph dc:creator ?creator .
            GRAPH ?graph {{
                ?graph prov:atLocation ?endpoint .
                ?entity_uri a ?entity_type .
                VALUES ?entity_type {{ :entity :bnode }} .
                # Faldo
                OPTIONAL {{
                    ?entity_uri a ?entity_faldo .
                    VALUES ?entity_faldo {{ :faldo }} .
                }}
                # Label
                OPTIONAL {{ ?entity_uri rdfs:label ?entity_label . }}
                OPTIONAL {{ ?entity_uri :instancesHaveNoLabels ?have_no_label . }}
            }}
            FILTER (
                ?public = <true>{}
            )
        }}
        '''.format(filter_user)

        header, data = query_launcher.process_query(
            query_builder.prefix_query(query))

        entities_list = []  # list of entity uri
        entities = []  # list of entity dict

        for result in data:
            if result["entity_uri"] not in entities_list:
                # New entity
                entities_list.append(result["entity_uri"])
                # Uri, graph and label
                label = "" if "entity_label" not in result else result[
                    "entity_label"]
                entity_type = "bnode" if result[
                    "entity_type"] == "{}bnode".format(
                        self.settings.get("triplestore", "prefix")) else "node"
                entity = {
                    "uri":
                    result["entity_uri"],
                    "type":
                    entity_type,
                    "label":
                    label,
                    "instancesHaveLabels":
                    True if "have_no_label" not in result else
                    False if result["have_no_label"] == "1" else True,
                    "faldo":
                    True if "entity_faldo" in result else False,
                    "endpoints": [result["endpoint"]],
                    "graphs": [result["graph"]],
                }

                entities.append(entity)
            else:
                # if graph is different, store it
                index_entity = entities_list.index(result['entity_uri'])
                if result["graph"] not in entities[index_entity]["graphs"]:
                    entities[index_entity]["graphs"].append(result["graph"])
                # If endpoint is different, store it
                if result["endpoint"] not in entities[index_entity][
                        "endpoints"]:
                    entities[index_entity]["endpoints"].append(
                        result["endpoint"])

        return entities
Beispiel #23
0
    def integrate(self, dataset_id=None):
        """Integrate the file into the triplestore"""
        sparql = SparqlQueryLauncher(self.app, self.session)

        # insert metadata
        sparql.insert_data(self.get_metadata(), self.file_graph, metadata=True)

        content_generator = self.generate_rdf_content()

        # Insert content
        chunk_number = 0

        for _ in content_generator:

            if self.graph_chunk.ntriple >= self.max_chunk_size:

                if self.graph_chunk.percent and dataset_id:
                    self.update_percent_in_db(self.graph_chunk.percent,
                                              dataset_id)

                if self.method == 'load':

                    # write rdf into a tmpfile and load it
                    temp_file_name = 'tmp_{}_{}_chunk_{}.{}'.format(
                        Utils.get_random_string(5), self.name, chunk_number,
                        self.rdf_extention)

                    # Try to load data. if failure, wait 5 sec and retry 5 time
                    Utils.redo_if_failure(self.log, 5, 5, self.load_graph,
                                          self.graph_chunk, temp_file_name)
                else:
                    # Insert
                    # Try to insert data. if failure, wait 5 sec and retry 5 time
                    Utils.redo_if_failure(self.log, 5, 5, sparql.insert_data,
                                          self.graph_chunk, self.file_graph)

                chunk_number += 1
                self.graph_chunk = RdfGraph(self.app, self.session)

        # Load the last chunk
        if self.graph_chunk.percent and dataset_id:
            self.update_percent_in_db(100, dataset_id)

        if self.method == 'load':
            temp_file_name = 'tmp_{}_{}_chunk_{}.{}'.format(
                Utils.get_random_string(5), self.name, chunk_number,
                self.rdf_extention)

            # Try to load data. if failure, wait 5 sec and retry 5 time
            Utils.redo_if_failure(self.log, 5, 5, self.load_graph,
                                  self.graph_chunk, temp_file_name)
        else:
            # Insert
            # Try to insert data. if failure, wait 5 sec and retry 5 time
            Utils.redo_if_failure(self.log, 5, 5, sparql.insert_data,
                                  self.graph_chunk, self.file_graph)

        # Content is inserted, now insert abstraction and domain_knowledge
        self.set_rdf_abstraction_domain_knowledge()

        if self.method == 'load':

            temp_file_name = 'tmp_{}_{}_abstraction_domain_knowledge.{}'.format(
                Utils.get_random_string(5), self.name, self.rdf_extention)

            self.load_graph(self.graph_abstraction_dk, temp_file_name)
        else:
            # Insert
            sparql.insert_data(self.graph_abstraction_dk, self.file_graph)

        self.set_triples_number()
Beispiel #24
0
 def rollback(self):
     """Drop the dataset from the triplestore in case of error"""
     sparql = SparqlQueryLauncher(self.app, self.session)
     sparql.drop_dataset(self.file_graph)
Beispiel #25
0
def query():
    """Perform a sparql query

    Returns
    -------
    json
        query results
    """
    q = request.get_json()['query']
    graphs = request.get_json()['graphs']
    endpoints = request.get_json()['endpoints']

    local_endpoint_f = current_app.iniconfig.get('triplestore', 'endpoint')
    try:
        local_endpoint_f = current_app.iniconfig.get('federation',
                                                     'local_endpoint')
    except Exception:
        pass

    # No graph selected in local TS
    if not graphs and local_endpoint_f in endpoints:
        return jsonify({
            'error': True,
            'errorMessage': "No graph selected in local triplestore",
            'header': [],
            'data': []
        }), 500

    # No endpoint selected
    if not endpoints:
        return jsonify({
            'error': True,
            'errorMessage': "No endpoint selected",
            'header': [],
            'data': []
        }), 500

    try:
        query_builder = SparqlQueryBuilder(current_app, session)

        query_builder.set_graphs_and_endpoints(graphs=graphs,
                                               endpoints=endpoints)

        federated = query_builder.is_federated()
        replace_froms = query_builder.replace_froms()

        query = query_builder.format_query(q,
                                           replace_froms=replace_froms,
                                           federated=federated)
        # header, data = query_launcher.process_query(query)
        header = query_builder.selects
        data = []
        if query_builder.graphs or query_builder.endpoints:
            query_launcher = SparqlQueryLauncher(current_app,
                                                 session,
                                                 get_result_query=True,
                                                 federated=federated,
                                                 endpoints=endpoints)
            header, data = query_launcher.process_query(query)

    except Exception as e:
        current_app.logger.error(str(e).replace('\\n', '\n'))
        traceback.print_exc(file=sys.stdout)
        return jsonify({
            'error': True,
            'errorMessage': str(e).replace('\\n', '\n'),
            'header': [],
            'data': []
        }), 500

    return jsonify({'header': header, 'data': data})
Beispiel #26
0
def query():
    """Perform a sparql query

    Returns
    -------
    json
        query results
    """

    if not can_access(session['user']):
        return jsonify({"error": True, "errorMessage": "Admin required"}), 401

    data = request.get_json()
    if not (data and data.get("query")):
        return jsonify({
            'error': True,
            'errorMessage': "Missing query parameter",
            'header': [],
            'data': []
        }), 400

    q = data['query']
    graphs = data.get('graphs', [])
    endpoints = data.get('endpoints', [])

    local_endpoint_f = current_app.iniconfig.get('triplestore', 'endpoint')
    try:
        local_endpoint_f = current_app.iniconfig.get('federation',
                                                     'local_endpoint')
    except Exception:
        pass

    # No graph selected in local TS
    if not graphs and local_endpoint_f in endpoints:
        return jsonify({
            'error': True,
            'errorMessage': "No graph selected in local triplestore",
            'header': [],
            'data': []
        }), 400

    # No endpoint selected
    if not endpoints:
        return jsonify({
            'error': True,
            'errorMessage': "No endpoint selected",
            'header': [],
            'data': []
        }), 400

    try:
        query = SparqlQuery(current_app, session, get_graphs=False)

        query.set_graphs_and_endpoints(graphs=graphs, endpoints=endpoints)

        federated = query.is_federated()
        replace_froms = query.replace_froms()

        sparql = query.format_query(q,
                                    replace_froms=replace_froms,
                                    federated=federated)
        # header, data = query_launcher.process_query(query)
        header = query.selects
        data = []
        if query.graphs or query.endpoints:
            query_launcher = SparqlQueryLauncher(current_app,
                                                 session,
                                                 get_result_query=True,
                                                 federated=federated,
                                                 endpoints=endpoints)
            header, data = query_launcher.process_query(sparql)

    except Exception as e:
        current_app.logger.error(str(e).replace('\\n', '\n'))
        traceback.print_exc(file=sys.stdout)
        return jsonify({
            'error': True,
            'errorMessage': str(e).replace('\\n', '\n'),
            'header': [],
            'data': []
        }), 500

    return jsonify({'header': header, 'data': data})