Esempio n. 1
0
    def generate_release(self):
        '''
        Get a list of data set to be processed and try to harmonised them into
        one big data cube
        '''
        # Prepare a task list
        tasks = []
        for sheet_name in self._get_sheets_list():
            output_file = self._conf.get_path('release') + sheet_name + '.ttl'
            task = {
                'sheet_name': sheet_name,
                'output_file': output_file,
                'endpoint': self._conf.get_SPARQL(),
                'compress': self._conf.isCompress(),
                'target': self._conf.get_namespace('data'),
                'release_graph': self._conf.get_graph_name('release'),
                'raw_data_graph': self._conf.get_graph_name('raw-data'),
                'rules_graph': self._conf.get_graph_name('rules'),
                'measure': self._conf.get_measure()
            }
            tasks.append(task)

        # Call cube in parallel, avoid hammering the store too much
        cpu_count = multiprocessing.cpu_count()
        pool = multiprocessing.Pool(processes=min(4, cpu_count))
        pool.map(generate_release_thread, tasks)
        pool.close()
        pool.join()

        # Push all the data to the triple store
        self._push_to_graph(self._conf.get_graph_name('release'),
                            self._conf.get_path('release'))

        # Create an instance of CubeMaker
        cubeMaker = CubeMaker(self._conf.get_SPARQL(),
                              self._conf.get_graph_name('release'),
                              self._conf.get_graph_name('raw-data'),
                              self._conf.get_graph_name('rules'))
        cubeMaker.set_target_namespace(self._conf.get_namespace('data'))
        cubeMaker.set_compress(self._conf.isCompress())

        # Update the DSD
        dsd_file_name = self._conf.get_path('release') + 'dsd.ttl'
        log.info("Asking CubeMaker to generate the DSD")
        cubeMaker.generate_dsd(self._conf.get_cube_title(),
                               self._conf.get_measure(),
                               self._conf.get_measureunit(),
                               self._conf.get_slices(), dsd_file_name)

        # Load the DSD
        pusher = Pusher(self._conf.get_SPARUL(), self._conf.get_user(),
                        self._conf.get_secret())
        log.info("[{}] Adding the content of the DSD".format(
            self._conf.get_graph_name('release')))
        if self._conf.isCompress():
            dsd_file_name = dsd_file_name + ".bz2"
        pusher.upload_file(self._conf.get_graph_name('release'), dsd_file_name)
Esempio n. 2
0
    def generate_release(self):
        '''
        Get a list of data set to be processed and try to harmonised them into
        one big data cube
        '''
        # Prepare a task list
        tasks = []
        for sheet_name in self._get_sheets_list():
            output_file = self._conf.get_path('release') + sheet_name + '.ttl'
            task = {'sheet_name'     : sheet_name,
                    'output_file'    : output_file,
                    'endpoint'       : self._conf.get_SPARQL(),
                    'compress'       : self._conf.isCompress(),
                    'target'         : self._conf.get_namespace('data'),
                    'release_graph'  : self._conf.get_graph_name('release'),
                    'raw_data_graph' : self._conf.get_graph_name('raw-data'),
                    'rules_graph'    : self._conf.get_graph_name('rules'),
                    'measure'        : self._conf.get_measure()}
            tasks.append(task)

        # Call cube in parallel, avoid hammering the store too much
        cpu_count = multiprocessing.cpu_count()
        pool = multiprocessing.Pool(processes=min(4, cpu_count))
        pool.map(generate_release_thread, tasks)
        pool.close()
        pool.join()
            
        # Push all the data to the triple store
        self._push_to_graph(self._conf.get_graph_name('release'),
                            self._conf.get_path('release'))
    
        # Create an instance of CubeMaker
        cubeMaker = CubeMaker(self._conf.get_SPARQL(),
                              self._conf.get_graph_name('release'),
                              self._conf.get_graph_name('raw-data'),
                              self._conf.get_graph_name('rules'))
        cubeMaker.set_target_namespace(self._conf.get_namespace('data'))
        cubeMaker.set_compress(self._conf.isCompress())
        
        # Update the DSD
        dsd_file_name = self._conf.get_path('release') + 'dsd.ttl'
        log.info("Asking CubeMaker to generate the DSD")
        cubeMaker.generate_dsd(self._conf.get_cube_title(),
                               self._conf.get_measure(),
                               self._conf.get_measureunit(),
                               self._conf.get_slices(),
                               dsd_file_name)
        
        # Load the DSD
        pusher = Pusher(self._conf.get_SPARUL(),
                        self._conf.get_user(),
                        self._conf.get_secret())
        log.info("[{}] Adding the content of the DSD".format(self._conf.get_graph_name('release')))
        if self._conf.isCompress():
            dsd_file_name = dsd_file_name + ".bz2"
        pusher.upload_file(self._conf.get_graph_name('release'), dsd_file_name)
Esempio n. 3
0
def generate_release_thread(parameters):
    '''
    Worker thread for generate_release
    '''
    sheet_name = parameters['sheet_name']
    output_file = parameters['output_file']
    log.info("[{}] Calling CubeMaker".format(sheet_name))
    try:
        cubeMaker = CubeMaker(parameters['endpoint'],
                              parameters['release_graph'],
                              parameters['raw_data_graph'],
                              parameters['rules_graph'])
        cubeMaker.set_target_namespace(parameters['target'])
        cubeMaker.set_compress(parameters['compress'])
        cubeMaker.process(parameters['measure'], sheet_name, output_file)
    except Exception as e:
        log.error("[{}] Error in CubeMaker: {}".format(sheet_name, e))
Esempio n. 4
0
def generate_release_thread(parameters):
    '''
    Worker thread for generate_release
    '''
    sheet_name = parameters['sheet_name']
    output_file = parameters['output_file']
    log.info("[{}] Calling CubeMaker".format(sheet_name))
    try:
        cubeMaker = CubeMaker(parameters['endpoint'],
                              parameters['release_graph'], 
                              parameters['raw_data_graph'], 
                              parameters['rules_graph'])
        cubeMaker.set_target_namespace(parameters['target'])
        cubeMaker.set_compress(parameters['compress'])
        cubeMaker.process(parameters['measure'], sheet_name, output_file)
    except Exception as e:
        log.error("[{}] Error in CubeMaker: {}".format(sheet_name, e))