Example #1
0
    def integrate(self, public=False):
        """Integrate the file into the triplestore

        Parameters
        ----------
        public : bool, optional
            Integrate in private or public graph
        """
        sparql = SparqlQueryLauncher(self.app, self.session)
        tse = TriplestoreExplorer(self.app, self.session)

        self.public = public

        method = self.settings.get('triplestore', 'upload_method')

        # Load file into a RDF graph
        self.graph_chunk.parse(self.path, format=self.type_dict[self.type])

        # get metadata
        self.set_metadata()

        # Remove metadata from data
        self.delete_metadata_location()

        # insert metadata
        sparql.insert_data(self.graph_metadata, self.file_graph, metadata=True)

        if method == "load":
            # write rdf into a tmpfile and load it
            temp_file_name = 'tmp_{}_{}.{}'.format(Utils.get_random_string(5),
                                                   self.name,
                                                   self.rdf_extention)

            # Try to load data. if failure, wait 5 sec and retry 5 time
            Utils.redo_if_failure(self.log, 5, 5, self.load_graph,
                                  self.graph_chunk, temp_file_name)

        else:
            # Insert
            # Try to insert data. if failure, wait 5 sec and retry 5 time
            Utils.redo_if_failure(self.log, 5, 5, sparql.insert_data,
                                  self.graph_chunk, self.file_graph)

        # Remove chached abstraction
        tse.uncache_abstraction(public=self.public)

        self.set_triples_number()
Example #2
0
    def integrate(self, public=False):
        """Integrate the file into the triplestore

        Parameters
        ----------
        public : bool, optional
            Integrate in private or public graph
        """
        sparql = SparqlQueryLauncher(self.app, self.session)

        self.public = public

        method = self.settings.get('triplestore', 'upload_method')

        # insert metadata
        sparql.insert_data(self.get_metadata(), self.file_graph, metadata=True)

        if method == "load":
            # cp file into ttl dir
            tmp_file_name = 'tmp_{}_{}.ttl'.format(
                Utils.get_random_string(5),
                self.name,
            )
            temp_file_path = '{}/{}'.format(self.ttl_dir, tmp_file_name)
            copyfile(self.path, temp_file_path)
            # Load the chunk
            sparql.load_data(tmp_file_name, self.file_graph, self.host_url)

            # Remove tmp file
            if not self.settings.getboolean('askomics', 'debug_ttl'):
                os.remove(temp_file_path)
        else:

            with open(self.path) as ttl_file:
                ttl_content = ttl_file.read()
                sparql.insert_ttl_string(ttl_content, self.user_graph)

        self.set_triples_number()
Example #3
0
    def integrate(self, dataset_id=None):
        """Integrate the file into the triplestore"""
        sparql = SparqlQueryLauncher(self.app, self.session)

        # insert metadata
        sparql.insert_data(self.get_metadata(), self.file_graph, metadata=True)

        content_generator = self.generate_rdf_content()

        # Insert content
        chunk_number = 0

        for _ in content_generator:

            if self.graph_chunk.ntriple >= self.max_chunk_size:

                if self.graph_chunk.percent and dataset_id:
                    self.update_percent_in_db(self.graph_chunk.percent,
                                              dataset_id)

                if self.method == 'load':

                    # write rdf into a tmpfile and load it
                    temp_file_name = 'tmp_{}_{}_chunk_{}.{}'.format(
                        Utils.get_random_string(5), self.name, chunk_number,
                        self.rdf_extention)

                    # Try to load data. if failure, wait 5 sec and retry 5 time
                    Utils.redo_if_failure(self.log, 5, 5, self.load_graph,
                                          self.graph_chunk, temp_file_name)
                else:
                    # Insert
                    # Try to insert data. if failure, wait 5 sec and retry 5 time
                    Utils.redo_if_failure(self.log, 5, 5, sparql.insert_data,
                                          self.graph_chunk, self.file_graph)

                chunk_number += 1
                self.graph_chunk = RdfGraph(self.app, self.session)

        # Load the last chunk
        if self.graph_chunk.percent and dataset_id:
            self.update_percent_in_db(100, dataset_id)

        if self.method == 'load':
            temp_file_name = 'tmp_{}_{}_chunk_{}.{}'.format(
                Utils.get_random_string(5), self.name, chunk_number,
                self.rdf_extention)

            # Try to load data. if failure, wait 5 sec and retry 5 time
            Utils.redo_if_failure(self.log, 5, 5, self.load_graph,
                                  self.graph_chunk, temp_file_name)
        else:
            # Insert
            # Try to insert data. if failure, wait 5 sec and retry 5 time
            Utils.redo_if_failure(self.log, 5, 5, sparql.insert_data,
                                  self.graph_chunk, self.file_graph)

        # Content is inserted, now insert abstraction and domain_knowledge
        self.set_rdf_abstraction_domain_knowledge()

        if self.method == 'load':

            temp_file_name = 'tmp_{}_{}_abstraction_domain_knowledge.{}'.format(
                Utils.get_random_string(5), self.name, self.rdf_extention)

            self.load_graph(self.graph_abstraction_dk, temp_file_name)
        else:
            # Insert
            sparql.insert_data(self.graph_abstraction_dk, self.file_graph)

        self.set_triples_number()