Ejemplo n.º 1
0
    def persist_services(self, io_conf, chain=None):
        """Persist process services in files.

        :param dict io_conf: I/O config as returned by ConfigObject.io_conf
        :param str chain: name of chain for which data is persisted
        """
        # parse I/O config
        io_conf = ConfigObject.IoConfig(**io_conf)

        # parse specified chain
        if chain:
            # prepend underscore for output directory
            chain = '_{}'.format(chain)
        else:
            # use default directory if chain not specified
            chain = 'default'

        # get chain path and set link of latest data
        base_path = persistence.io_dir('proc_service_data', io_conf)
        chain_path = '{0:s}/{1:s}'.format(base_path, chain)
        persistence.create_dir(chain_path)
        self.logger.debug('Persisting process services in "{path}".', path=chain_path)
        try:
            # remove old symlink
            os.remove('{}/latest'.format(base_path))
        except OSError:
            pass
        try:
            # create new symlink
            os.symlink(chain, '{}/latest'.format(base_path))
        except OSError as exc:
            self.logger.fatal('Unable to create symlink to latest version of services: <{path}/latest>.',
                              path=base_path)
            raise exc

        # remove old data
        service_paths = glob.glob('{}/*.pkl'.format(chain_path))
        try:
            for path in service_paths:
                os.remove(path)
        except Exception as exc:
            self.logger.fatal('Unable to remove previously persisted process services.')
            raise exc

        # persist services
        for cls in self.get_services():
            self.service(cls).persist_in_file('{0:s}/{1!s}.pkl'.format(chain_path, cls))
Ejemplo n.º 2
0
    def execute(self):
        """Execute the link.

        Pick up the dataframe and write to disk.
        """
        ds = process_manager.service(DataStore)
        settings = process_manager.service(ConfigObject)

        # check that all dataframes are present
        assert all(k in ds for k in self.path_map), 'key(s) not in DataStore.'

        # check that all ds items are dataframes
        assert all(isinstance(ds[k], pd.DataFrame) for k in self.path_map), \
            'key(s) is not a pandas DataFrame.'

        # Kwarg for numpy and feather writers
        self.store_index = self.kwargs.pop('store_index', True)

        # collect writer and store the dataframes
        for k, path in self.path_map.items():
            df = ds[k]
            if self.add_counter_to_name:
                # execution index
                ex_str = '_p' + str(self._counter)
                # fork index
                fidx = settings.get('fork_index', 0)
                fi_str = '_f' + str(fidx) if settings.get('fork',
                                                          False) else ''
                # updated path
                ps = os.path.splitext(path)
                path = ps[0] + fi_str + ex_str + ps[1]
            writer = get_writer(path, self.writer)
            folder = os.path.dirname(path)
            persistence.create_dir(folder)
            self.logger.debug('Checking for directory <{dir}>.', dir=folder)
            if not os.path.exists(folder):
                self.logger.fatal('Path given is invalid.')
            self.logger.info('Writing file "{path}".', path=path)
            if writer == numpy_writer or writer == feather_writer:
                writer(df, path, self.store_index)
            else:
                writer(df, path, **self.kwargs)

        self._counter += 1
        return StatusCode.Success
Ejemplo n.º 3
0
 def _process_results_path(self):
     """Process results_path argument."""
     if not self.results_path:
         self.results_path = persistence.io_path('results_data', 'report')
     persistence.create_dir(self.results_path)