예제 #1
0
    def insert_metadatas(self,accessL):
        """
        Insert the metadatas into the parent graph
        """

        self.log.debug('--- insert_metadatas ---')

        sqb = SparqlQueryBuilder(self.settings, self.session)
        query_laucher = QueryLauncher(self.settings, self.session)

        valAcces = 'public' if accessL else 'private'

        ttl = '<' + self.graph + '> prov:generatedAtTime "' + self.timestamp + '"^^xsd:dateTime .\n'
        ttl += '<' + self.graph + '> dc:creator "' + self.session['username'] + '" .\n'
        ttl += '<' + self.graph + '> :accessLevel "' +  valAcces + '" .\n'
        ttl += '<' + self.graph + '> foaf:Group "' +  self.session['group']  + '" .\n'
        ttl += '<' + self.graph + '> prov:wasDerivedFrom "' + self.name + '" .\n'
        ttl += '<' + self.graph + '> dc:hasVersion "' + get_distribution('Askomics').version + '" .\n'
        ttl += '<' + self.graph + '> prov:describesService "' + os.uname()[1] + '" .\n'

        if self.is_defined("askomics.endpoint"):
            ttl += '<' + self.graph + '> prov:atLocation "' + self.get_param("askomics.endpoint") + '" .\n'
        else:
            raise ValueError("askomics.endpoint does not exit.")

        sparql_header = sqb.header_sparql_config('')

        query_laucher.insert_data(ttl, self.graph, sparql_header)
예제 #2
0
    def create_user_graph(self):
        """
        Create a subgraph for the user. All his data will be inserted in this subgraph
        """

        query_laucher = QueryLauncher(self.settings, self.session)
        sqa = SparqlQueryAuth(self.settings, self.session)

        ttl = '<' + self.settings['askomics.graph'] + ':' + self.username + \
            '> rdfg:subGraphOf <' + self.settings['askomics.graph'] + '>'

        header_ttl = sqa.header_sparql_config(ttl)
        query_laucher.insert_data(ttl, self.settings["askomics.graph"], header_ttl)
예제 #3
0
    def create_user_graph(self):
        """
        Create a subgraph for the user. All his data will be inserted in this subgraph
        """

        query_laucher = QueryLauncher(self.settings, self.session)
        sqa = SparqlQueryAuth(self.settings, self.session)

        ttl = '<' + self.settings['askomics.graph'] + ':' + self.username + \
            '> rdfg:subGraphOf <' + self.settings['askomics.graph'] + '>'

        header_ttl = sqa.header_sparql_config(ttl)
        query_laucher.insert_data(ttl, self.settings["askomics.graph"], header_ttl)
예제 #4
0
    def get_metadatas(self):
        """
        Create metadatas and insert them into AskOmics main graph.
        """
        self.log.debug("====== INSERT METADATAS ======")
        sqb = SparqlQueryBuilder(self.settings, self.session)
        ql = QueryLauncher(self.settings, self.session)

        ttlMetadatas = "<" + self.metadatas['graphName'] + "> " + "prov:generatedAtTime " + '"' + self.metadatas['loadDate'] + '"^^xsd:dateTime .\n'
        ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "dc:creator " + '"' + self.metadatas['username'] + '"^^xsd:string  .\n'
        ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "prov:wasDerivedFrom " + '"' + self.metadatas['fileName'] + '"^^xsd:string .\n'
        ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "dc:hasVersion " + '"' + self.metadatas['version'] + '"^^xsd:string .\n'
        ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "prov:describesService " + '"' + self.metadatas['server'] + '"^^xsd:string .'

        sparqlHeader = sqb.header_sparql_config("")

        ql.insert_data(ttlMetadatas, self.get_param("askomics.graph"), sparqlHeader)
예제 #5
0
    def get_metadatas(self):
        """
        Create metadatas and insert them into AskOmics main graph.
        """
        self.log.debug("====== INSERT METADATAS ======")
        sqb = SparqlQueryBuilder(self.settings, self.session)
        ql = QueryLauncher(self.settings, self.session)

        ttlMetadatas = "<" + self.metadatas['graphName'] + "> " + "prov:generatedAtTime " + '"' + self.metadatas['loadDate'] + '"^^xsd:dateTime .\n'
        ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "dc:creator " + '"' + self.metadatas['username'] + '"^^xsd:string  .\n'
        ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "prov:wasDerivedFrom " + '"' + self.metadatas['fileName'] + '"^^xsd:string .\n'
        ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "dc:hasVersion " + '"' + self.metadatas['version'] + '"^^xsd:string .\n'
        ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "prov:describesService " + '"' + self.metadatas['server'] + '"^^xsd:string .'

        sparqlHeader = sqb.header_sparql_config("")

        ql.insert_data(ttlMetadatas, self.get_param("askomics.graph"), sparqlHeader)
예제 #6
0
    def persist_user(self, host_url):
        """
        Persist all user infos in the TS
        """
        query_laucher = QueryLauncher(self.settings, self.session)
        sqa = SparqlQueryAuth(self.settings, self.session)

        #check if user is the first. if yes, set him admin
        if self.get_number_of_users() == 0:
            admin = 'true'
            blocked = 'false'
            self.set_admin(True)
            self.set_blocked(False)
        else:
            admin = 'false'
            blocked = 'true'
            self.set_admin(False)
            self.set_blocked(True)

        chunk = ':' + self.username + ' rdf:type foaf:Person ;\n'
        indent = len(self.username) * ' ' + ' '
        chunk += indent + 'foaf:name \"' + self.username + '\" ;\n'
        chunk += indent + ':password \"' + self.sha256_pw + '\" ;\n'
        chunk += indent + 'foaf:mbox <mailto:' + self.email + '> ;\n'
        chunk += indent + ':isadmin \"' + admin + '\"^^xsd:boolean ;\n'
        chunk += indent + ':isblocked \"' + blocked + '\"^^xsd:boolean ;\n'
        chunk += indent + ':randomsalt \"' + self.randomsalt + '\" .\n'

        header_ttl = sqa.header_sparql_config(chunk)
        query_laucher.insert_data(chunk, self.settings["askomics.users_graph"],
                                  header_ttl)

        emails = self.get_admins_emails()

        # Send a mail to all admins
        body = 'Hello,\n'
        body += 'User \'' + self.username + '\' just created an account on Askomics.\n'
        body += 'Log into the admin interface in order to unblock this user, or contact him '
        body += 'at ' + self.email + '.\n\n\n'
        body += host_url + '\n\n'

        self.send_mails(host_url, emails,
                        '[AskOmics@' + host_url + '] New account created',
                        body)
예제 #7
0
    def load_data_from_file(self, fp, urlbase):
        """
        Load a locally created ttl file in the triplestore using http (with load_data(url)) or with the filename for Fuseki (with fuseki_load_data(fp.name)).

        :param fp: a file handle for the file to load
        :param urlbase:the base URL of current askomics instance. It is used to let triple stores access some askomics temporary ttl files using http.
        :return: a dictionnary with information on the success or failure of the operation
        """
        if not fp.closed:
            fp.flush() # This is required as otherwise, data might not be really written to the file before being sent to triplestore

        sqb = SparqlQueryBuilder(self.settings, self.session)
        ql = QueryLauncher(self.settings, self.session)
        graphName = "askomics:graph:" + self.name + '_' + self.timestamp
        self.metadatas['graphName'] = graphName
        ttlNamedGraph = "<" + graphName + "> " + "rdfg:subGraphOf" + " <" + self.get_param("askomics.graph") + "> ."
        sparqlHeader = sqb.header_sparql_config("")
        ql.insert_data(ttlNamedGraph, self.get_param("askomics.graph"), sparqlHeader)

        url = urlbase+"/ttl/"+os.path.basename(fp.name)
        self.log.debug(url)
        data = {}
        try:
            if self.is_defined("askomics.file_upload_url"):
                queryResults = ql.upload_data(fp.name, graphName)
                self.metadatas['server'] = queryResults.headers['Server']
                self.metadatas['loadDate'] = self.timestamp
            else:
                queryResults = ql.load_data(url, graphName)
                self.metadatas['server'] = queryResults.info()['server']
                self.metadatas['loadDate'] = self.timestamp
            data['status'] = 'ok'
        except Exception as e:
            self._format_exception(e, data=data)
        finally:
            if self.settings["askomics.debug"]:
                data['url'] = url
            else:
                os.remove(fp.name) # Everything ok, remove temp file

        self.get_metadatas()

        return data
예제 #8
0
    def importMoSate(self,mo,state):
        '''
            Import in the TPS all triplet necessary to defined an askomics module
        '''

        rdf = ":"+self.escape['entity'](mo['module'])+" rdfs:label " + self.escape['text'](mo['module'])+";\n"
        rdf += " rdfs:comment " + self.escape['text'](mo['comment'])+";\n"
        rdf += " :module_version " + self.escape['text'](mo['version'])+";\n"
        rdf += " :module_state " + self.escape['text'](state)+""
        if (state == 'ok'):
            rdf += ";\n :module_graph " + '<'+mo['graph']+'>.\n'
        else:
            rdf += ".\n"

        sqb = SparqlQueryBuilder(self.settings, self.session)
        ql  = QueryLauncher(self.settings, self.session)
        sh = sqb.header_sparql_config('')

        ql.insert_data(rdf, self.graph_modules , sh)
예제 #9
0
    def load_data_from_file(self, fp, urlbase):
        """
        Load a locally created ttl file in the triplestore using http (with load_data(url)) or with the filename for Fuseki (with fuseki_load_data(fp.name)).

        :param fp: a file handle for the file to load
        :param urlbase:the base URL of current askomics instance. It is used to let triple stores access some askomics temporary ttl files using http.
        :return: a dictionnary with information on the success or failure of the operation
        """
        if not fp.closed:
            fp.flush() # This is required as otherwise, data might not be really written to the file before being sent to triplestore

        sqb = SparqlQueryBuilder(self.settings, self.session)
        ql = QueryLauncher(self.settings, self.session)
        graphName = "urn:sparql:" + self.name + '_' + self.timestamp
        self.metadatas['graphName'] = graphName
        ttlNamedGraph = "<" + graphName + "> " + "rdfg:subGraphOf" + " <" + self.get_param("askomics.graph") + "> ."
        sparqlHeader = sqb.header_sparql_config("")
        ql.insert_data(ttlNamedGraph, self.get_param("askomics.graph"), sparqlHeader)

        url = urlbase+"/ttl/"+os.path.basename(fp.name)
        self.log.debug(url)
        data = {}
        try:
            if self.is_defined("askomics.file_upload_url"):
                queryResults = ql.upload_data(fp.name, graphName)
                self.metadatas['server'] = queryResults.headers['Server']
                self.metadatas['loadDate'] = self.timestamp
            else:
                queryResults = ql.load_data(url, graphName)
                self.metadatas['server'] = queryResults.info()['server']
                self.metadatas['loadDate'] = self.timestamp
            data['status'] = 'ok'
        except Exception as e:
            self._format_exception(e, data=data)
        finally:
            if self.settings["askomics.debug"]:
                data['url'] = url
            else:
                os.remove(fp.name) # Everything ok, remove temp file

        self.get_metadatas()

        return data
예제 #10
0
    def test_statistics(self):

        #load files
        self.it.empty()
        self.it.load_test2()

        ql = QueryLauncher(self.settings, self.request.session)

        queryResults = ql.insert_data(':sujet :predicat :objet .', 'test',
                                      'prefix :<test>')
        server = queryResults.info()['server']
        self.request.json_body = {'namedGraphs': ['test']}

        self.askview.delete_graph()

        data = self.askview.statistics()

        assert data['ntriples'] == 279
        assert data['nclasses'] == '6'
        assert data['nentities'] == '19'
        assert data['ngraphs'] == '5'
        assert data['class'] == {
            'Personne': {
                'count': '7'
            },
            'Sexe': {
                'count': '2'
            },
            'Instrument': {
                'count': '2'
            }
        }

        for key in data['metadata'].keys():
            self.assertRegexpMatches(
                key,
                r'^urn:sparql:(instrument|enseigne|connait|joue|personne)\.tsv_[0-9]+\.[0-9]+$'
            )
            for key2 in data['metadata'][key]:
                self.assertRegexpMatches(
                    key2, r'^(version|username|filename|loadDate|server)$')
                if key2 == 'version':
                    assert data['metadata'][key][key2] == '2.0'
                elif key2 == 'username':
                    assert data['metadata'][key][key2] == getpass.getuser()
                elif key2 == 'filename':
                    self.assertRegexpMatches(
                        data['metadata'][key][key2],
                        r'^(instrument|enseigne|connait|joue|personne)\.tsv$')
                elif key2 == 'loadDate':
                    self.assertRegexpMatches(data['metadata'][key][key2],
                                             r'^[0-9]+\.[0-9]+$')
                elif key2 == 'server':
                    assert data['metadata'][key][key2] == server
예제 #11
0
    def persist_user(self,host_url):
        """
        Persist all user infos in the TS
        """
        query_laucher = QueryLauncher(self.settings, self.session)
        sqa = SparqlQueryAuth(self.settings, self.session)

        #check if user is the first. if yes, set him admin
        if self.get_number_of_users() == 0:
            admin = 'true'
            blocked = 'false'
            self.set_admin(True)
            self.set_blocked(False)
        else:
            admin = 'false'
            blocked = 'true'
            self.set_admin(False)
            self.set_blocked(True)

        chunk = ':' + self.username + ' rdf:type foaf:Person ;\n'
        indent = len(self.username) * ' ' + ' '
        chunk += indent + 'foaf:name \"' + self.username + '\" ;\n'
        chunk += indent + ':password \"' + self.sha256_pw + '\" ;\n'
        chunk += indent + 'foaf:mbox <mailto:' + self.email + '> ;\n'
        chunk += indent + ':isadmin \"' + admin + '\"^^xsd:boolean ;\n'
        chunk += indent + ':isblocked \"' + blocked + '\"^^xsd:boolean ;\n'
        chunk += indent + ':randomsalt \"' + self.randomsalt + '\" .\n'

        header_ttl = sqa.header_sparql_config(chunk)
        query_laucher.insert_data(chunk, self.settings["askomics.users_graph"], header_ttl)

        emails = self.get_admins_emails()

        # Send a mail to all admins
        body = 'Hello,\n'
        body += 'User \'' + self.username + '\' just created an account on Askomics.\n'
        body += 'Log into the admin interface in order to unblock this user, or contact him '
        body += 'at ' + self.email + '.\n\n\n'
        body += host_url + '\n\n'

        self.send_mails(host_url, emails, '[AskOmics@'+ host_url + '] New account created', body)
예제 #12
0
    def importMoSate(self, mo, state):
        '''
            Import in the TPS all triplet necessary to defined an askomics module
        '''

        rdf = ":" + self.escape['entity'](
            mo['module']) + " rdfs:label " + self.escape['text'](
                mo['module']) + ";\n"
        rdf += " rdfs:comment " + self.escape['text'](mo['comment']) + ";\n"
        rdf += " :module_version " + self.escape['text'](mo['version']) + ";\n"
        rdf += " :module_state " + self.escape['text'](state) + ""
        if (state == 'ok'):
            rdf += ";\n :module_graph " + '<' + mo['graph'] + '>.\n'
        else:
            rdf += ".\n"

        sqb = SparqlQueryBuilder(self.settings, self.session)
        ql = QueryLauncher(self.settings, self.session)
        sh = sqb.header_sparql_config('')

        ql.insert_data(rdf, self.graph_modules, sh)
예제 #13
0
    def add_jdoe_in_users(self):
        """Insert a John Doe User

        username is jdoe
        mail is [email protected]
        password is iamjohndoe
        not admin and not blocked
        """

        query_laucher = QueryLauncher(self.settings, self.request.session)
        sqa = SparqlQueryAuth(self.settings, self.request.session)
        chunk = ':jdoe rdf:type foaf:Person ;\n'
        indent = len('jdoe') * ' ' + ' '
        chunk += indent + 'foaf:name \"jdoe\" ;\n'
        chunk += indent + ':password \"23df582b51c3482b677c8eac54872b8bd0a49bfadc853628b8b8bd4806147b54\" ;\n' #iamjohndoe
        chunk += indent + 'foaf:mbox <mailto:[email protected]> ;\n'
        chunk += indent + ':isadmin \"false\"^^xsd:boolean ;\n'
        chunk += indent + ':isblocked \"false\"^^xsd:boolean ;\n'
        chunk += indent + ':randomsalt \"00000000000000000000\" .\n'

        header_ttl = sqa.header_sparql_config(chunk)
        query_laucher.insert_data(chunk, 'urn:sparql:test_askomics:users', header_ttl)
예제 #14
0
    def add_jsmith_in_users(self):
        """Insert a Jane Smith User

        username is jsmith
        mail is [email protected]
        password is iamjanesmith
        not admin and not blocked
        """

        query_laucher = QueryLauncher(self.settings, self.request.session)
        sqa = SparqlQueryAuth(self.settings, self.request.session)
        chunk = ':jsmith rdf:type foaf:Person ;\n'
        indent = len('jsmith') * ' ' + ' '
        chunk += indent + 'foaf:name \"jsmith\" ;\n'
        chunk += indent + ':password \"db64872417dcc1488a72b034cbe75268f52eb2486807af096dd2f4c620694efc\" ;\n' #iamjanesmith
        chunk += indent + 'foaf:mbox <mailto:[email protected]> ;\n'
        chunk += indent + ':isadmin \"false\"^^xsd:boolean ;\n'
        chunk += indent + ':isblocked \"false\"^^xsd:boolean ;\n'
        chunk += indent + ':randomsalt \"00000000000000000000\" .\n'

        header_ttl = sqa.header_sparql_config(chunk)
        query_laucher.insert_data(chunk, 'urn:sparql:test_askomics:users', header_ttl)
예제 #15
0
    def add_another_admin_in_users(self):
        """Insert an admin User

        username is otheradmin
        mail is [email protected]
        password is iamadmin
        admin and not blocked
        """

        query_laucher = QueryLauncher(self.settings, self.request.session)
        sqa = SparqlQueryAuth(self.settings, self.request.session)
        chunk = ':otheradmin rdf:type foaf:Person ;\n'
        indent = len('otheradmin') * ' ' + ' '
        chunk += indent + 'foaf:name \"otheradmin\" ;\n'
        chunk += indent + ':password \"682cf6a90d94758bdedcf854e8d784e3d5d360a36cd65a2c49eaff214998c23a\" ;\n' #iamadmin
        chunk += indent + 'foaf:mbox <mailto:[email protected]> ;\n'
        chunk += indent + ':isadmin \"true\"^^xsd:boolean ;\n'
        chunk += indent + ':isblocked \"false\"^^xsd:boolean ;\n'
        chunk += indent + ':randomsalt \"00000000000000000000\" .\n'

        header_ttl = sqa.header_sparql_config(chunk)
        query_laucher.insert_data(chunk, 'urn:sparql:test_askomics:users', header_ttl)
예제 #16
0
    def test_statistics(self):

        #load files
        self.it.empty()
        self.it.load_test2()

        ql = QueryLauncher(self.settings, self.request.session)

        queryResults = ql.insert_data(':sujet :predicat :objet .', 'test', 'prefix :<test>')
        server = queryResults.info()['server']
        self.request.json_body = {'namedGraphs': ['test']}

        self.askview.delete_graph()

        data = self.askview.statistics()

        assert data['ntriples'] == 279
        assert data['nclasses'] == '6'
        assert data['nentities'] == '19'
        assert data['ngraphs'] == '5'
        assert data['class'] == {
            'Personne': {'count': '7'},
            'Sexe': {'count': '2'},
            'Instrument': {'count': '2'}
        }

        for key in data['metadata'].keys():
            self.assertRegexpMatches(key, r'^urn:sparql:(instrument|enseigne|connait|joue|personne)\.tsv_[0-9]+\.[0-9]+$')
            for key2 in data['metadata'][key]:
                self.assertRegexpMatches(key2, r'^(version|username|filename|loadDate|server)$')
                if key2 == 'version':
                    assert data['metadata'][key][key2] == '2.0'
                elif key2 == 'username':
                    assert data['metadata'][key][key2] == getpass.getuser()
                elif key2 == 'filename':
                    self.assertRegexpMatches(data['metadata'][key][key2], r'^(instrument|enseigne|connait|joue|personne)\.tsv$')
                elif key2 == 'loadDate':
                    self.assertRegexpMatches(data['metadata'][key][key2], r'^[0-9]+\.[0-9]+$')
                elif key2 == 'server':
                    assert data['metadata'][key][key2] == server
예제 #17
0
    def persist(self, urlbase, public):
        """
        insert the ttl sourcefile in the TS

        """
        pathttl = self.get_rdf_user_directory()
        shutil.copy(self.path, pathttl)
        data = None

        method = 'load'
        if self.get_param("askomics.upload_user_data_method"):
            method = self.get_param("askomics.upload_user_data_method")

        if method == 'load':
            fil_open = open(pathttl + '/' + os.path.basename(self.path))
            data = self.load_data_from_file(fil_open, urlbase)

        else:
            chunk = self.file_get_contents(pathttl + '/' + os.path.basename(self.path))
            query_lauch = QueryLauncher(self.settings, self.session)
            data = query_lauch.insert_data(chunk, self.graph, '')

        self.insert_metadatas(public)
        return data
예제 #18
0
    def persist(self, urlbase, public):
        """
        insert the ttl sourcefile in the TS

        """
        pathttl = self.get_rdf_user_directory()
        shutil.copy(self.path, pathttl)
        data = None

        method = 'load'
        if self.get_param("askomics.upload_user_data_method"):
            method = self.get_param("askomics.upload_user_data_method")

        if method == 'load':
            fil_open = open(pathttl + '/' + os.path.basename(self.path))
            data = self.load_data_from_file(fil_open, urlbase)

        else:
            chunk = self.file_get_contents(pathttl + '/' + os.path.basename(self.path))
            query_lauch = QueryLauncher(self.settings, self.session)
            data = query_lauch.insert_data(chunk, self.graph, '')

        self.insert_metadatas(public)
        return data
예제 #19
0
    def persist(self, urlbase, public):
        """
        Store the current source file in the triple store

        :param urlbase: the base URL of current askomics instance. It is used to let triple stores access some askomics temporary ttl files using http.
        :return: a dictionnary with information on the success or failure of the operation
        :rtype: Dict
        """
        self.insert_metadatas(public)

        content_ttl = self.get_turtle()
        ql = QueryLauncher(self.settings, self.session)

        # use insert data instead of load sparql procedure when the dataset is small
        total_triple_count = 0
        chunk_count = 1
        chunk = ""
        pathttl = self.get_rdf_user_directory()

        method = 'load'
        if self.get_param("askomics.upload_user_data_method"):
            method = self.get_param("askomics.upload_user_data_method")

        if method == 'load':

            fp = None

            triple_count = 0
            for triple in content_ttl:
                chunk += triple + '\n'
                triple_count += 1

                # with open('/tmp/DEBUGTTL' + str(triple_count), 'w') as debug_file:
                #     debug_file.write(chunk)

                if triple_count > int(self.settings[
                        'askomics.max_content_size_to_update_database']):
                    # Temp file must be accessed by http so we place it in askomics/ttl/ dir
                    fp = tempfile.NamedTemporaryFile(dir=pathttl,
                                                     prefix="tmp_" +
                                                     self.alphanum_name,
                                                     suffix=".ttl",
                                                     mode="w",
                                                     delete=False)
                    # We have reached the maximum chunk size, load it and then we will start a new chunk
                    self.log.debug("Loading ttl chunk %s file %s" %
                                   (chunk_count, fp.name))
                    header_ttl = self.get_turtle_template(chunk)
                    fp.write(header_ttl + '\n')
                    fp.write(chunk)
                    fp.close()
                    data = self.load_data_from_file(fp, urlbase)
                    if data['status'] == 'failed':
                        return data

                    chunk = ""
                    total_triple_count += triple_count
                    triple_count = 0
                    chunk_count += 1

            # Load the last chunk
            if triple_count > 0:
                self.log.debug("Loading ttl chunk %s (last)" % (chunk_count))
                fp = tempfile.NamedTemporaryFile(dir=pathttl,
                                                 prefix="tmp_" +
                                                 self.alphanum_name,
                                                 suffix=".ttl",
                                                 mode="w",
                                                 delete=False)
                header_ttl = self.get_turtle_template(chunk)
                fp.write(header_ttl + '\n')
                fp.write(chunk)
                fp.close()
                data = self.load_data_from_file(fp, urlbase)
                if data['status'] == 'failed':
                    return data

            total_triple_count += triple_count

            # Data is inserted, now insert the abstraction

            # We get the abstraction now as we need first to parse the whole file to have category_values
            abstraction_ttl = self.get_abstraction()
            domain_knowledge_ttl = self.get_domain_knowledge()
            header_ttl = self.get_turtle_template(abstraction_ttl + "\n" +
                                                  domain_knowledge_ttl)

            fp = tempfile.NamedTemporaryFile(dir=pathttl,
                                             prefix="tmp_" +
                                             self.alphanum_name,
                                             suffix=".ttl",
                                             mode="w",
                                             delete=False)
            fp.write(header_ttl + '\n')
            fp.write(abstraction_ttl + '\n')
            fp.write(domain_knowledge_ttl + '\n')

            self.log.debug("Loading ttl abstraction file %s" % (fp.name))
            fp.close()
            data = self.load_data_from_file(fp, urlbase)
            if data['status'] == 'failed':
                return data
            data['total_triple_count'] = total_triple_count

        else:

            sqb = SparqlQueryBuilder(self.settings, self.session)

            triple_count = 0
            chunk = ""
            for triple in content_ttl:

                chunk += triple + '\n'

                triple_count += 1

                if triple_count > int(
                        self.settings[
                            'askomics.max_content_size_to_update_database']
                ) / 10:  # FIXME the limit is much lower than for load
                    # We have reached the maximum chunk size, load it and then we will start a new chunk
                    self.log.debug("Inserting ttl chunk %s" % (chunk_count))
                    try:
                        header_ttl = sqb.header_sparql_config(chunk)
                        queryResults = ql.insert_data(chunk, self.graph,
                                                      header_ttl)
                    except Exception as e:
                        return self._format_exception(e)

                    chunk = ""
                    total_triple_count += triple_count
                    triple_count = 0
                    chunk_count += 1

            # Load the last chunk
            if triple_count > 0:
                self.log.debug("Inserting ttl chunk %s (last)" % (chunk_count))

                try:
                    header_ttl = sqb.header_sparql_config(chunk)
                    queryResults = ql.insert_data(chunk, self.graph,
                                                  header_ttl)
                except Exception as e:
                    return self._format_exception(e)

            total_triple_count += triple_count

            # Data is inserted, now insert the abstraction

            # We get the abstraction now as we need first to parse the whole file to have category_values
            abstraction_ttl = self.get_abstraction()
            domain_knowledge_ttl = self.get_domain_knowledge()

            chunk += abstraction_ttl + '\n'
            chunk += domain_knowledge_ttl + '\n'

            self.log.debug("Inserting ttl abstraction")
            try:
                header_ttl = sqb.header_sparql_config(chunk)
                ql.insert_data(chunk, self.graph, header_ttl)
            except Exception as e:
                return self._format_exception(e)

            self.metadatas['graphName'] = self.graph
            sparqlHeader = sqb.header_sparql_config("")

            data = {}

            if 'server' in queryResults.info():
                self.metadatas['server'] = queryResults.info()['server']
            else:
                self.metadatas['server'] = 'unknown'

            data['status'] = 'ok'
            data['total_triple_count'] = total_triple_count

        data['expected_lines_number'] = self.get_number_of_lines()

        return data
예제 #20
0
    def test_generateAbstractAskomicsRDF(self):
        import os

        from askomics.libaskomics.rdfdb.SparqlQueryBuilder import SparqlQueryBuilder
        from askomics.libaskomics.rdfdb.QueryLauncher import QueryLauncher

        m = ModulesManager(self.settings, self.request.session)

        sqb = SparqlQueryBuilder(self.settings, self.request.session)
        ql = QueryLauncher(self.settings, self.request.session)
        sh = sqb.header_sparql_config('')

        rdf = """
              <http://bidon/relationTest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ;
			  <http://www.w3.org/2000/01/rdf-schema#label> "relationBidon" ;
			  <http://www.w3.org/2000/01/rdf-schema#domain> <http://bidon/Type1> ;
		      <http://www.w3.org/2000/01/rdf-schema#range> <http://bidon/Type2>.

              <http://bidon/Type1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>.
              <http://bidon/Type2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>.
              """

        ql.insert_data(rdf, "urn:test:askomics", sh)
        m.generateAbstractAskomicsRDF("urn:test:askomics")

        rdf = """
              <http://bidon/relationTest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ;
			  <http://www.w3.org/2000/01/rdf-schema#label> "relationBidon" ;
			  <http://www.w3.org/2000/01/rdf-schema#domain> <http://bidon/Type1> ;
		      <http://www.w3.org/2000/01/rdf-schema#range> <http://bidon/Type2>.

              <http://bidon/Type1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>.
              <http://bidon/Type2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>.

              <http://bidon/Type1> <http://www.w3.org/2000/01/rdf-schema#label> "Type1".
              <http://bidon/Type2> <http://www.w3.org/2000/01/rdf-schema#label> "Type2".

              <http://bidon/Attribute1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty> ;
              <http://www.w3.org/2000/01/rdf-schema#label> "Attribute1";
              <http://www.w3.org/2000/01/rdf-schema#domain> <http://bidon/Type1> ;
              <http://www.w3.org/2000/01/rdf-schema#range> <http://www.w3.org/2001/XMLSchema#int>.
              """
        ql.insert_data(rdf, "urn:test:askomics2", sh)
        m.generateAbstractAskomicsRDF("urn:test:askomics2")

        rdf = """
              <http://bidon/relationTest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ;
			  <http://www.w3.org/2000/01/rdf-schema#label> "relationBidon" ;
			  <http://www.w3.org/2000/01/rdf-schema#domain> <http=bidon=Type1> ;
		      <http://www.w3.org/2000/01/rdf-schema#range> <http=bidon=Type2>.

              <http=bidon=Type1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>.
              <http=bidon=Type2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>.

              <http=bidon=Type1> <http://www.w3.org/2000/01/rdf-schema#label> "Type1".
              <http=bidon=Type2> <http://www.w3.org/2000/01/rdf-schema#label> "Type2".

              <http://bidon/Attribute1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty> ;
              <http://www.w3.org/2000/01/rdf-schema#label> "Attribute1";
              <http://www.w3.org/2000/01/rdf-schema#domain> <http=bidon=Type1> ;
              <http://www.w3.org/2000/01/rdf-schema#range> <http://www.w3.org/2001/XMLSchema#int>.
              """
        ql.insert_data(rdf, "urn:test:askomics3", sh)
        m.generateAbstractAskomicsRDF("urn:test:askomics3")
예제 #21
0
    def test_generateAbstractAskomicsRDF(self):
        import os

        from askomics.libaskomics.rdfdb.SparqlQueryBuilder import SparqlQueryBuilder
        from askomics.libaskomics.rdfdb.QueryLauncher import QueryLauncher

        m = ModulesManager(self.settings, self.request.session)

        sqb = SparqlQueryBuilder(self.settings, self.request.session)
        ql = QueryLauncher(self.settings, self.request.session)
        sh = sqb.header_sparql_config('')

        rdf = """
              <http://bidon/relationTest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ;
			  <http://www.w3.org/2000/01/rdf-schema#label> "relationBidon" ;
			  <http://www.w3.org/2000/01/rdf-schema#domain> <http://bidon/Type1> ;
		      <http://www.w3.org/2000/01/rdf-schema#range> <http://bidon/Type2>.

              <http://bidon/Type1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>.
              <http://bidon/Type2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>.
              """

        ql.insert_data(rdf, "urn:test:askomics", sh)
        m.generateAbstractAskomicsRDF("urn:test:askomics")

        rdf = """
              <http://bidon/relationTest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ;
			  <http://www.w3.org/2000/01/rdf-schema#label> "relationBidon" ;
			  <http://www.w3.org/2000/01/rdf-schema#domain> <http://bidon/Type1> ;
		      <http://www.w3.org/2000/01/rdf-schema#range> <http://bidon/Type2>.

              <http://bidon/Type1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>.
              <http://bidon/Type2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>.

              <http://bidon/Type1> <http://www.w3.org/2000/01/rdf-schema#label> "Type1".
              <http://bidon/Type2> <http://www.w3.org/2000/01/rdf-schema#label> "Type2".

              <http://bidon/Attribute1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty> ;
              <http://www.w3.org/2000/01/rdf-schema#label> "Attribute1";
              <http://www.w3.org/2000/01/rdf-schema#domain> <http://bidon/Type1> ;
              <http://www.w3.org/2000/01/rdf-schema#range> <http://www.w3.org/2001/XMLSchema#int>.
              """
        ql.insert_data(rdf, "urn:test:askomics2", sh)
        m.generateAbstractAskomicsRDF("urn:test:askomics2")

        rdf = """
              <http://bidon/relationTest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ;
			  <http://www.w3.org/2000/01/rdf-schema#label> "relationBidon" ;
			  <http://www.w3.org/2000/01/rdf-schema#domain> <http=bidon=Type1> ;
		      <http://www.w3.org/2000/01/rdf-schema#range> <http=bidon=Type2>.

              <http=bidon=Type1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>.
              <http=bidon=Type2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>.

              <http=bidon=Type1> <http://www.w3.org/2000/01/rdf-schema#label> "Type1".
              <http=bidon=Type2> <http://www.w3.org/2000/01/rdf-schema#label> "Type2".

              <http://bidon/Attribute1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty> ;
              <http://www.w3.org/2000/01/rdf-schema#label> "Attribute1";
              <http://www.w3.org/2000/01/rdf-schema#domain> <http=bidon=Type1> ;
              <http://www.w3.org/2000/01/rdf-schema#range> <http://www.w3.org/2001/XMLSchema#int>.
              """
        ql.insert_data(rdf, "urn:test:askomics3", sh)
        m.generateAbstractAskomicsRDF("urn:test:askomics3")
예제 #22
0
    def persist(self, urlbase,method):
        """
        Store the current source file in the triple store

        :param urlbase: the base URL of current askomics instance. It is used to let triple stores access some askomics temporary ttl files using http.
        :return: a dictionnary with information on the success or failure of the operation
        :rtype: Dict
        """
        content_ttl = self.get_turtle()

        ql = QueryLauncher(self.settings, self.session)

        # use insert data instead of load sparql procedure when the dataset is small
        total_triple_count = 0
        chunk_count = 1
        chunk = ""
        pathttl = self.get_ttl_directory()
        if method == 'load':

            fp = None

            triple_count = 0
            for triple in content_ttl:
                chunk += triple + '\n'
                triple_count += 1

                if triple_count > int(self.settings['askomics.max_content_size_to_update_database']):
                    # Temp file must be accessed by http so we place it in askomics/ttl/ dir
                    fp = tempfile.NamedTemporaryFile(dir=pathttl, prefix="tmp_"+self.metadatas['fileName'], suffix=".ttl", mode="w", delete=False)
                    # We have reached the maximum chunk size, load it and then we will start a new chunk
                    self.log.debug("Loading ttl chunk %s file %s" % (chunk_count, fp.name))
                    header_ttl = self.get_turtle_template(chunk)
                    fp.write(header_ttl + '\n')
                    fp.write(chunk)
                    fp.close()
                    data = self.load_data_from_file(fp, urlbase)
                    if data['status'] == 'failed':
                        return data

                    chunk = ""
                    total_triple_count += triple_count
                    triple_count = 0
                    chunk_count += 1

            # Load the last chunk
            if triple_count > 0:
                self.log.debug("Loading ttl chunk %s (last)" % (chunk_count))
                fp = tempfile.NamedTemporaryFile(dir=pathttl, prefix="tmp_"+self.metadatas['fileName'], suffix=".ttl", mode="w", delete=False)
                header_ttl = self.get_turtle_template(chunk)
                fp.write(header_ttl + '\n')
                fp.write(chunk)
                fp.close()
                data = self.load_data_from_file(fp, urlbase)
                if data['status'] == 'failed':
                    return data
                os.remove(fp.name) # Everything ok, remove previous temp file

            total_triple_count += triple_count

            # Data is inserted, now insert the abstraction

            # We get the abstraction now as we need first to parse the whole file to have category_values
            abstraction_ttl = self.get_abstraction()
            domain_knowledge_ttl = self.get_domain_knowledge()
            header_ttl = self.get_turtle_template(abstraction_ttl+"\n"+domain_knowledge_ttl)

            fp = tempfile.NamedTemporaryFile(dir=pathttl, prefix="tmp_"+self.metadatas['fileName'], suffix=".ttl", mode="w", delete=False)
            fp.write(header_ttl + '\n')
            fp.write(abstraction_ttl + '\n')
            fp.write(domain_knowledge_ttl + '\n')

            self.log.debug("Loading ttl abstraction file %s" % (fp.name))
            fp.close()
            data = self.load_data_from_file(fp, urlbase)
            if data['status'] == 'failed':
                return data
            data['total_triple_count'] = total_triple_count
            os.remove(fp.name)

        else:

            sqb = SparqlQueryBuilder(self.settings, self.session)


            graphName = "askomics:graph:" + self.name + '_' + self.timestamp

            triple_count = 0
            chunk = ""
            for triple in content_ttl:

                chunk += triple + '\n'

                triple_count += 1

                if triple_count > int(self.settings['askomics.max_content_size_to_update_database']) / 10: # FIXME the limit is much lower than for load
                    # We have reached the maximum chunk size, load it and then we will start a new chunk
                    self.log.debug("Inserting ttl chunk %s" % (chunk_count))
                    try:
                        header_ttl = sqb.header_sparql_config(chunk)
                        queryResults = ql.insert_data(chunk, graphName, header_ttl)
                    except Exception as e:
                        return self._format_exception(e)

                    chunk = ""
                    total_triple_count += triple_count
                    triple_count = 0
                    chunk_count += 1

            # Load the last chunk
            if triple_count > 0:
                self.log.debug("Inserting ttl chunk %s (last)" % (chunk_count))

                try:
                    header_ttl = sqb.header_sparql_config(chunk)
                    queryResults = ql.insert_data(chunk, graphName, header_ttl)
                except Exception as e:
                    return self._format_exception(e)

            total_triple_count += triple_count

            # Data is inserted, now insert the abstraction

            # We get the abstraction now as we need first to parse the whole file to have category_values
            abstraction_ttl = self.get_abstraction()
            domain_knowledge_ttl = self.get_domain_knowledge()

            chunk += abstraction_ttl + '\n'
            chunk += domain_knowledge_ttl + '\n'

            self.log.debug("Inserting ttl abstraction")
            try:
                header_ttl = sqb.header_sparql_config(chunk)
                ql.insert_data(chunk, graphName, header_ttl)
            except Exception as e:
                return self._format_exception(e)

            ttlNamedGraph = "<" + graphName + "> " + "rdfg:subGraphOf" + " <" + self.get_param("askomics.graph") + "> ."
            self.metadatas['graphName'] = graphName
            sparqlHeader = sqb.header_sparql_config("")
            ql.insert_data(ttlNamedGraph, self.get_param("askomics.graph"), sparqlHeader)

            data = {}

            self.metadatas['server'] = queryResults.info()['server']
            self.metadatas['loadDate'] = self.timestamp

            data['status'] = 'ok'
            data['total_triple_count'] = total_triple_count
            self.get_metadatas()

        data['expected_lines_number'] = self.get_number_of_lines()

        return data