Beispiel #1
0
    def init_parameter(self, parameter_context, bricking_scheme):
        """
        Initializes a parameter using a ParameterContext object and a bricking
        scheme for that parameter

        @param parameter_context    ParameterContext object describing the parameter to initialize
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @return A PersistedStorage object
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        parameter_name = parameter_context.name

        self.global_bricking_scheme = bricking_scheme

        pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name)
        self.parameter_metadata[parameter_name] = pm

        pm.parameter_context = parameter_context

        log.debug('Initialize %s', parameter_name)

        self.master_manager.create_group(parameter_name)

        log.debug('Performing Rtree dict setup')
        tD = parameter_context.dom.total_extents
        bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter
        # Verify domain is Rtree friendly
        tree_rank = len(bD)
        log.debug('tree_rank: %s', tree_rank)
        if tree_rank == 1:
            tree_rank += 1
        log.debug('tree_rank: %s', tree_rank)
        p = rtree.index.Property()
        p.dimension = tree_rank

        brick_tree = rtree.index.Index(properties=p)

        pm.brick_list = {}
        if isinstance(parameter_context.param_type, (FunctionType, ConstantType)):
            # These have constant storage, never expand!!
                pm.brick_domains = [(1,),(1,),(1,),bricking_scheme]
        else:
            pm.brick_domains = [tD, bD, cD, bricking_scheme]

        pm.tree_rank = tree_rank
        pm.brick_tree = brick_tree

        v = PersistedStorage(pm, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values)
        self.value_list[parameter_name] = v

        self.expand_domain(parameter_context)

        # CBM TODO: Consider making this optional and bulk-flushing from the coverage after all parameters have been initialized
        # No need to check if they're dirty, we know they are!
        pm.flush()
        self.master_manager.flush()

        return v
Beispiel #2
0
    def init_parameter(self, parameter_context, bricking_scheme):
        parameter_name = parameter_context.name

        self.global_bricking_scheme = bricking_scheme

        pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name)
        self.parameter_metadata[parameter_name] = pm

        pm.parameter_context = parameter_context

        log.debug('Initialize %s', parameter_name)

        self.master_manager.create_group(parameter_name)

        log.debug('Performing Rtree dict setup')
        tD = parameter_context.dom.total_extents
        bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter
        # Verify domain is Rtree friendly
        tree_rank = len(bD)
        log.debug('tree_rank: %s', tree_rank)
        if tree_rank == 1:
            tree_rank += 1
        log.debug('tree_rank: %s', tree_rank)
        p = rtree.index.Property()
        p.dimension = tree_rank

        brick_tree = rtree.index.Index(properties=p)

        pm.brick_list = {}
        if isinstance(parameter_context.param_type, (FunctionType, ConstantType)):
            # These have constant storage, never expand!!
                pm.brick_domains = [(1,),(1,),(1,),bricking_scheme]
        else:
            pm.brick_domains = [tD, bD, cD, bricking_scheme]

        pm.tree_rank = tree_rank
        pm.brick_tree = brick_tree

        v = PersistedStorage(pm, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, auto_flush=self.auto_flush_values)
        self.value_list[parameter_name] = v

        self.expand_domain(parameter_context)

        if pm.is_dirty():
            pm.flush()

        if self.master_manager.is_dirty():
            self.master_manager.flush()

        return v
Beispiel #3
0
    def init_parameter(self, parameter_context, bricking_scheme):
        """
        Initializes a parameter using a ParameterContext object and a bricking
        scheme for that parameter

        @param parameter_context    ParameterContext object describing the parameter to initialize
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @return A PersistedStorage object
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        parameter_name = parameter_context.name

        self.global_bricking_scheme = bricking_scheme

        pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name, read_only=False)
        self.parameter_metadata[parameter_name] = pm

        pm.parameter_context = parameter_context

        log.debug('Initialize %s', parameter_name)

        self.master_manager.create_group(parameter_name)

        if parameter_context.param_type._value_class == 'SparseConstantValue':
            v = SparsePersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values)
        else:
            v = PersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values)
        self.value_list[parameter_name] = v

        # CBM TODO: Consider making this optional and bulk-flushing from the coverage after all parameters have been initialized
        # No need to check if they're dirty, we know they are!
        pm.flush()

        # Put the pm into read_only mode
        pm.read_only = True

        # If there are already bricks, ensure there are appropriate links for this new parameter
        for brick_guid in self.master_manager.brick_list:
            brick_file_name = '{0}.hdf5'.format(brick_guid)
            self._add_brick_link(parameter_name, brick_guid, brick_file_name)

        self.master_manager.flush()

        return v
    def __init__(
        self,
        total_domain=(10, 10),
        brick_size=5,
        use_hdf=False,
        root_dir="test_data/multi_dim_trials",
        guid=None,
        dtype="int16",
    ):
        self.total_domain = total_domain
        self.brick_sizes = tuple(brick_size for x in total_domain)
        self.use_hdf = use_hdf
        self.dtype = np.dtype(dtype).name
        if self.use_hdf:
            self.guid = guid or create_guid()
            name = "%s_%s" % (self.guid, self.dtype)
            self.root_dir = root_dir
            if not os.path.exists(self.root_dir):
                os.makedirs(self.root_dir)

            if os.path.exists(os.path.join(self.root_dir, name)):
                shutil.rmtree(os.path.join(self.root_dir, name))

            #            self.master_manager = MasterManager(self.root_dir, name, name='md_test_{0}'.format(name))
            self.master_manager = MetadataManagerFactory.buildMetadataManager(
                self.root_dir, name, name="md_test_{0}".format(name)
            )

            self.master_manager.flush()

            pc = ParameterContext("test_param", param_type=QuantityType(self.dtype), fill_value=-1)
            self.param_manager = ParameterManager(os.path.join(self.root_dir, name, pc.name), pc.name)
            self.param_manager.parameter_context = pc
            self.master_manager.create_group(pc.name)

            self.param_manager.flush()

        self.bricks = {}

        self.brick_origins = bricking_utils.calc_brick_origins(self.total_domain, self.brick_sizes)
        self.brick_extents, self.rtree_extents = bricking_utils.calc_brick_and_rtree_extents(
            self.brick_origins, self.brick_sizes
        )
        self.build_bricks()

        self.rtree = RTreeProxy()
        for x in BrickingAssessor.rtree_populator(self.rtree_extents, self.brick_extents):
            self.rtree.insert(*x)
class BrickingAssessor(object):
    def __init__(self, total_domain=(10, 10), brick_size=5, use_hdf=False, root_dir='test_data/multi_dim_trials',
                 guid=None, dtype='int16'):
        self.total_domain = total_domain
        self.brick_sizes = tuple(brick_size for x in total_domain)
        self.use_hdf = use_hdf
        self.dtype = np.dtype(dtype).name
        if self.use_hdf:
            self.guid = guid or create_guid()
            name = '%s_%s' % (self.guid, self.dtype)
            self.root_dir = root_dir
            if not os.path.exists(self.root_dir):
                os.makedirs(self.root_dir)

            if os.path.exists(os.path.join(self.root_dir, name)):
                shutil.rmtree(os.path.join(self.root_dir, name))

            self.master_manager = MasterManager(self.root_dir, name, name='md_test_{0}'.format(name))

            self.master_manager.flush()

            pc = ParameterContext('test_param', param_type=QuantityType(self.dtype), fill_value=-1)
            self.param_manager = ParameterManager(os.path.join(self.root_dir, name, pc.name), pc.name)
            self.param_manager.parameter_context = pc
            self.master_manager.create_group(pc.name)

            self.param_manager.flush()

        self.bricks = {}

        self.brick_origins = bricking_utils.calc_brick_origins(self.total_domain, self.brick_sizes)
        self.brick_extents, self.rtree_extents = bricking_utils.calc_brick_and_rtree_extents(self.brick_origins,
                                                                                             self.brick_sizes)
        self.build_bricks()

        self.rtree = RTreeProxy()
        for x in BrickingAssessor.rtree_populator(self.rtree_extents, self.brick_extents):
            self.rtree.insert(*x)

    @classmethod
    def rtree_populator(cls, rtree_extents, brick_extents):
        for i, e in enumerate(rtree_extents):
            yield i, e, brick_extents[i]

    def _get_numpy_array(self, shape):
        if not isinstance(shape, tuple):
            shape = tuple(shape)

        return np.arange(utils.prod(shape), dtype=self.dtype).reshape(shape)

    def build_bricks(self):
        for x in xrange(len(self.brick_origins)):
            if not self.use_hdf:
                self.bricks[x] = np.empty(self.brick_sizes, dtype=self.dtype)
                self.bricks[x].fill(-1)
            else:
                id = str(x)
                fn = '{0}.hdf5'.format(id)
                pth = os.path.join(self.param_manager.root_dir, fn)
                relpth = os.path.join(self.param_manager.root_dir.replace(self.master_manager.root_dir, '.'), fn)
                lnpth = '/{0}/{1}'.format(self.param_manager.parameter_name, id)

                self.master_manager.add_external_link(lnpth, relpth, id)
                self.bricks[x] = pth

    def reset_bricks(self):
        for i, arr in enumerate(self.bricks.itervalues()):
            if not self.use_hdf:
                arr.fill(-1)
            else:
                with HDFLockingFile(arr, mode='a') as f:
                    ds = f.require_dataset(str(i), shape=self.brick_sizes, dtype=self.dtype, chunks=None, fillvalue=-1)
                    ds[:] = -1

    def put_values_to_bricks(self, slice_, values):
        slice_ = utils.fix_slice(slice_, self.total_domain)
        bricks = bricking_utils.get_bricks_from_slice(slice_, self.rtree,
                                                      self.total_domain) # this is a list of tuples [(b_id, (bounds...),), ...]

        values = np.asanyarray(values)
        v_shp = values.shape
        log.debug('value_shape: %s', v_shp)
        s_shp = utils.slice_shape(slice_, self.total_domain)
        log.debug('slice_shape: %s', s_shp)
        is_broadcast = False
        if v_shp == ():
            log.debug('Broadcast!!')
            is_broadcast = True
            value_slice = ()
        elif v_shp != s_shp:
            if v_shp == tuple([i for i in s_shp if i != 1]): # Missing dimensions are singleton, just reshape to fit
                values = values.reshape(s_shp)
                v_shp = values.shape
            else:
                raise IndexError(
                    'Shape of \'value\' is not compatible with \'slice_\': slice_ shp == {0}\tvalue shp == {1}'.format(
                        s_shp, v_shp))
        else:
            value_slice = None

        log.debug('value_shape: %s', v_shp)

        for b in bricks:
            # b is (brick_id, (brick_bounds per dim...),)
            bid, bbnds = b
            log.debug('Determining slice for brick: %s', b)
            bexts = tuple([x + 1 for x in zip(*bbnds)[1]]) # Shift from index to size
            log.debug('bid=%s, bbnds=%s, bexts=%s', bid, bbnds, bexts)

            brick_slice, brick_mm = bricking_utils.get_brick_slice_nd(slice_, bbnds)

            if None in brick_slice: # Brick does not contain any of the requested indices
                log.debug('Brick does not contain any of the requested indices: Move to next brick')
                continue

            try:
                brick_slice = utils.fix_slice(brick_slice, bexts)
            except IndexError:
                log.debug('Malformed brick_slice: move to next brick')
                continue

            if not is_broadcast:
                value_slice = bricking_utils.get_value_slice_nd(slice_, v_shp, bbnds, brick_slice, brick_mm)

                try:
                    value_slice = utils.fix_slice(value_slice, v_shp)
                except IndexError:
                    log.debug('Malformed value_slice: move to next brick')
                    continue

            log.debug('\nbrick %s:\n\tbrick_slice %s=%s\n\tmin/max=%s\n\tvalue_slice %s=%s', b,
                      utils.slice_shape(brick_slice, bexts), brick_slice, brick_mm,
                      utils.slice_shape(value_slice, v_shp), value_slice)
            v = values[value_slice]
            log.debug('\nvalues %s=\n%s', v.shape, v)
            if not self.use_hdf:
                self.bricks[bid][brick_slice] = v
            else:
                fi = self.bricks[bid]
                with HDFLockingFile(fi, 'a') as f:
                    ds = f.require_dataset(str(bid), shape=self.brick_sizes, dtype=self.dtype, chunks=None,
                                           fillvalue=-1)
                    ds[brick_slice] = v

    def get_values_from_bricks(self, slice_):
        slice_ = utils.fix_slice(slice_, self.total_domain)
        bricks = bricking_utils.get_bricks_from_slice(slice_, self.rtree,
                                                      self.total_domain) # this is a list of tuples [(b_id, (bounds...),), ...]

        ret_shp = utils.slice_shape(slice_, self.total_domain)
        ret_arr = np.empty(ret_shp, dtype=self.dtype)

        for b in bricks:
            bid, bbnds = b
            brick_slice, brick_mm = bricking_utils.get_brick_slice_nd(slice_, bbnds)

            if None in brick_slice:
                continue

            ret_slice = bricking_utils.get_value_slice_nd(slice_, ret_shp, bbnds, brick_slice, brick_mm)

            if not self.use_hdf:
                ret_vals = self.bricks[bid][brick_slice]
            else:
                fi = self.bricks[bid]
                with HDFLockingFile(fi) as f:
                    ds = f.require_dataset(str(bid), shape=self.brick_sizes, dtype=self.dtype, chunks=None,
                                           fillvalue=-1)
                    ret_vals = ds[brick_slice]

            ret_arr[ret_slice] = ret_vals

        ret_arr = ret_arr.squeeze()

        if ret_arr.size == 1:
            if ret_arr.ndim == 0:
                ret_arr = ret_arr[()]
            else:
                ret_arr = ret_arr[0]

        return ret_arr