def _set_values_to_brick(self, brick_guid, brick_slice, values, value_slice=None):
        brick_file_path = os.path.join(self.brick_path, '{0}.hdf5'.format(brick_guid))
        log.trace('Brick slice to fill: %s', brick_slice)
        log.trace('Value slice to extract: %s', value_slice)

        # Create the HDF5 dataset that represents one brick
        bD = tuple(self.brick_domains[1])
        cD = self.brick_domains[2]
        if value_slice is not None:
            vals = values[value_slice]
        else:
            vals = values

        if values.ndim == 0 and len(values.shape) == 0 and np.iterable(vals): # Prevent single value strings from being iterated
            vals = [vals]

        # Check for object type
        data_type = self.dtype
        fv = self.fill_value

        # Check for object type
        if data_type == '|O8':
            if np.iterable(vals):
                vals = [pack(x) for x in vals]
            else:
                vals = pack(vals)

        if self.inline_data_writes:
            if data_type == '|O8':
                data_type = h5py.special_dtype(vlen=str)
            if 0 in cD or 1 in cD:
                cD = True
            with h5py.File(brick_file_path, 'a') as f:
                # TODO: Due to usage concerns, currently locking chunking to "auto"
                f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=None, fillvalue=fv)
                f[brick_guid][brick_slice] = vals
        else:
            work_key = brick_guid
            work = (brick_slice, vals)
            work_metrics = (brick_file_path, bD, cD, data_type, fv)
            log.trace('Work key: %s', work_key)
            log.trace('Work metrics: %s', work_metrics)
            log.trace('Work[0]: %s', work[0])

            # If the brick file doesn't exist, 'touch' it to make sure it's immediately available
            if not os.path.exists(brick_file_path):
                if data_type == '|O8':
                    data_type = h5py.special_dtype(vlen=str)
                if 0 in cD or 1 in cD:
                    cD = True
                with h5py.File(brick_file_path, 'a') as f:
                    # TODO: Due to usage concerns, currently locking chunking to "auto"
                    f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=None, fillvalue=fv)

            if self.auto_flush:
                # Immediately submit work to the dispatcher
                self.brick_dispatcher.put_work(work_key, work_metrics, work)
            else:
                # Queue the work for later flushing
                self._queue_work(work_key, work_metrics, work)
    def _set_values_to_brick(self, brick_guid, brick_slice, values, value_slice=None):
        brick_file_path = os.path.join(self.brick_path, '{0}.hdf5'.format(brick_guid))
        log.trace('Brick slice to fill: %s', brick_slice)
        log.trace('Value slice to extract: %s', value_slice)

        # Create the HDF5 dataset that represents one brick
        bD = tuple(self.brick_domains[1])
        cD = self.brick_domains[2]
        if value_slice is not None:
            vals = values[value_slice]
        else:
            vals = values

        if values.ndim == 0 and len(values.shape) == 0 and np.iterable(vals): # Prevent single value strings from being iterated
            vals = [vals]

        # Check for object type
        data_type = self.dtype
        fv = self.fill_value

        # Check for object type
        if data_type == '|O8':
            if np.iterable(vals):
                vals = [pack(x) for x in vals]
            else:
                vals = pack(vals)

        if self.inline_data_writes:
            if data_type == '|O8':
                data_type = h5py.special_dtype(vlen=str)
            if 0 in cD or 1 in cD:
                cD = True
            with HDFLockingFile(brick_file_path, 'a') as f:
                # TODO: Due to usage concerns, currently locking chunking to "auto"
                f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=None, fillvalue=fv)
                f[brick_guid][brick_slice] = vals
        else:
            work_key = brick_guid
            work = (brick_slice, vals)
            work_metrics = (brick_file_path, bD, cD, data_type, fv)
            log.trace('Work key: %s', work_key)
            log.trace('Work metrics: %s', work_metrics)
            log.trace('Work[0]: %s', work[0])

            # If the brick file doesn't exist, 'touch' it to make sure it's immediately available
            if not os.path.exists(brick_file_path):
                if data_type == '|O8':
                    data_type = h5py.special_dtype(vlen=str)
                if 0 in cD or 1 in cD:
                    cD = True
                with HDFLockingFile(brick_file_path, 'a') as f:
                    # TODO: Due to usage concerns, currently locking chunking to "auto"
                    f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=None, fillvalue=fv)

            if self.auto_flush:
                # Immediately submit work to the dispatcher
                self.brick_dispatcher.put_work(work_key, work_metrics, work)
            else:
                # Queue the work for later flushing
                self._queue_work(work_key, work_metrics, work)
    def flush(self, deep=True):
        if self.is_dirty(deep):
            try:
                # package for storage
                insert_dict = {}
                for k in list(self._dirty):
                    v = getattr(self, k)
                    log.trace('FLUSH: key=%s  v=%s', k, v)
                    if isinstance(v, Dictable):
                        prefix='DICTABLE|{0}:{1}|'.format(v.__module__, v.__class__.__name__)
                        value = prefix + pack(v.dump())
                    elif k == 'brick_tree':
                        if hasattr(self, 'brick_tree') and isinstance(self.brick_tree, RTreeProxy):
                            val = self.brick_tree.serialize()
                            if val != '':
                                insert_dict['brick_tree'] = val
                            continue
                    elif k == 'parameter_metadata':
                        value = pack_parameter_manager_dict(v)
                    else:
                        value = pack(v)

                    insert_dict[k] = value

                    # Update the hash_value in _hmap
                    self._hmap[k] = hash_any(v)

                dirty_spans = self.span_collection.get_dirty_spans()
                if len(dirty_spans) > 0:
                    val = str(self.span_collection)
                    log.trace("Span tuple: %s", val)
                    value = pack(val)
                    insert_dict['span_collection'] = value


                DBFactory.get_db().insert(self.guid, insert_dict, dirty_spans)

                for span in dirty_spans:
                    span.is_dirty = False
                self._dirty.clear()

            except IOError, ex:
                if "unable to create file (File accessability: Unable to open file)" in ex.message:
                    log.info('Issue writing to hdf file during master_manager.flush - this is not likely a huge problem: %s', ex.message)
                else:
                    raise

            super(DbBackedMetadataManager, self).__setattr__('_is_dirty',False)
Beispiel #4
0
    def __serialize(self, payload):
        from coverage_model.basic_types import Dictable
        if isinstance(payload, Dictable):
            prefix = 'DICTABLE|{0}:{1}|'.format(payload.__module__, payload.__class__.__name__)
            payload = prefix + pack(payload.dump())

        return payload
    def __serialize(self, payload):
        from coverage_model.basic_types import Dictable
        if isinstance(payload, Dictable):
            prefix = 'DICTABLE|{0}:{1}|'.format(payload.__module__, payload.__class__.__name__)
            payload = prefix + pack(payload.dump())

        return payload
    def pack(self):
        pack_dict = {}
        for k, v in self.__dict__.iteritems():
            if k in self._ignore or k.startswith('_'):
                continue
            if isinstance(v, Dictable):
                prefix='DICTABLE|{0}:{1}|'.format(v.__module__, v.__class__.__name__)
                v = prefix + pack(v.dump())
            pack_dict[k] = v

        return msgpack.packb(pack_dict)
Beispiel #7
0
 def msgpack(self, compressors=None):
     _dict = {'id': self.id, 'ingest_time': self.ingest_time, 'ingest_time_dict': self.ingest_times,
                  'coverage_id': self.coverage_id, 'mutable': self.mutable}
     if compressors is None:
         compressors = self.compressors
     data_dict = {}
     for param, data in self.param_dict.iteritems():
         if param == self.ingest_time_str:
             continue
         if compressors is not None:
             data_dict[param] = compressors[param].compress(data)
         else:
             data_dict[param] = [data.tolist(), str(data.dtype), data.shape]
     _dict['params'] = data_dict
     from coverage_model.persistence_helpers import pack
     js = pack(_dict)
     return js
    def __setitem__(self, slice_, value):
        # Always storing in first slot - ignore slice
        bid = 'sparse_value_brick'

        bD = (1,)
        cD = None
        brick_file_path = '{0}/{1}.hdf5'.format(self.brick_path, bid)

        vals = [self.__serialize(v) for v in value]

        vals = pack(vals)

        set_arr = np.empty(1, dtype=object)
        set_arr[0] = vals

        data_type = h5py.special_dtype(vlen=str)

        if self.inline_data_writes:
            with HDFLockingFile(brick_file_path, 'a') as f:
                f.require_dataset(bid, shape=bD, dtype=data_type, chunks=cD, fillvalue=None)
                f[bid][0] = set_arr
        else:
            work_key = bid
            work = ((0,), set_arr)
            work_metrics = (brick_file_path, bD, cD, data_type, None)

            # If the brick file doesn't exist, 'touch' it to make sure it's immediately available
            if not os.path.exists(brick_file_path):
                with HDFLockingFile(brick_file_path, 'a') as f:
                    # TODO: Due to usage concerns, currently locking chunking to "auto"
                    f.require_dataset(bid, shape=bD, dtype=data_type, chunks=cD, fillvalue=None)

            if self.auto_flush:
                # Immediately submit work to the dispatcher
                self.brick_dispatcher.put_work(work_key, work_metrics, work)
            else:
                # Queue the work for later flushing
                self._queue_work(work_key, work_metrics, work)
    def __setitem__(self, slice_, value):
        # Always storing in first slot - ignore slice
        bid = 'sparse_value_brick'

        bD = (1,)
        cD = None
        brick_file_path = '{0}/{1}.hdf5'.format(self.brick_path, bid)

        vals = [self.__serialize(v) for v in value]

        vals = pack(vals)

        set_arr = np.empty(1, dtype=object)
        set_arr[0] = vals

        data_type = h5py.special_dtype(vlen=str)

        if self.inline_data_writes:
            with h5py.File(brick_file_path, 'a') as f:
                f.require_dataset(bid, shape=bD, dtype=data_type, chunks=cD, fillvalue=None)
                f[bid][0] = set_arr
        else:
            work_key = bid
            work = ((0,), set_arr)
            work_metrics = (brick_file_path, bD, cD, data_type, None)

            # If the brick file doesn't exist, 'touch' it to make sure it's immediately available
            if not os.path.exists(brick_file_path):
                with h5py.File(brick_file_path, 'a') as f:
                    # TODO: Due to usage concerns, currently locking chunking to "auto"
                    f.require_dataset(bid, shape=bD, dtype=data_type, chunks=cD, fillvalue=None)

            if self.auto_flush:
                # Immediately submit work to the dispatcher
                self.brick_dispatcher.put_work(work_key, work_metrics, work)
            else:
                # Queue the work for later flushing
                self._queue_work(work_key, work_metrics, work)
Beispiel #10
0
    def __setitem__(self, slice_, value):
        # Always storing in first slot - ignore slice
        bid = 'sparse_value_brick'

        bD = (1,)
        cD = None
        brick_file_path = '{0}/{1}.hdf5'.format(self.brick_path, bid)

        vals = [self.__serialize(v) for v in value]

        vals = pack(vals)

        set_arr = np.empty(1, dtype=object)
        set_arr[0] = vals

        data_type = h5py.special_dtype(vlen=str)

        if self.inline_data_writes:
            with HDFLockingFile(brick_file_path, 'a') as f:
                f.require_dataset(bid, shape=bD, dtype=data_type, chunks=cD, fillvalue=None)
                f[bid][0] = set_arr
        else:
            work_key = bid
            work = ((0,), set_arr)
            work_metrics = (brick_file_path, bD, cD, data_type, None)

            # If the brick file doesn't exist, 'touch' it to make sure it's immediately available
            if not os.path.exists(brick_file_path):
                with HDFLockingFile(brick_file_path, 'a') as f:
                    # TODO: Due to usage concerns, currently locking chunking to "auto"
                    f.require_dataset(bid, shape=bD, dtype=data_type, chunks=cD, fillvalue=None)

            if self.auto_flush:
                # Immediately submit work to the dispatcher
                self.brick_dispatcher.put_work(work_key, work_metrics, work)
            else:
                # Queue the work for later flushing
                self._queue_work(work_key, work_metrics, work)

        if self.parameter_manager.parameter_name in self.master_manager.param_groups:
            try:
                log.trace("Parameter: %s , Values: %s , Fill: %s", self.parameter_manager.parameter_name, value, self.fill_value)
                min_val = min(value)
                max_val = max(value)
                log.trace("Value type: %s %s", type(value[0]), len(value))
                if len(value) > 0 and isinstance(value[0], Span):
                    min_val = self.fill_value
                    max_val = self.fill_value
                    mins = []
                    maxes = []
                    for span in value:
                        log.trace("Span min/max %s", span)
                        if isinstance(span, Span):
                            tup = span.tuplize()
                            tup = [x for x in tup if x is not None and x is not self.fill_value and isinstance(x, numbers.Number)]
                            if len(tup) > 0:
                                mins.append(min(tup))
                                maxes.append(max(tup))
                    if len(mins) > 0:
                        min_val = min(mins)
                    if len(maxes) > 0:
                        max_val = max(maxes)
                if max_val is not None and min_val is not None:
                    log.trace("SparsePersistedStorage saving %s min/max %s/%s", self.parameter_manager.parameter_name, min_val, max_val)
                    self.master_manager.track_data_written_to_brick(bid, 0, self.parameter_manager.parameter_name, min_val, max_val)
            except TypeError as e:
                log.debug("Don't store extents for types for which extents are meaningless: %s", type(v))
                raise
            except ValueError as e:
                log.warning("Values stored for extents were invalid for brick=%s, slice=%s, param=%s min/max=%s",
                            bid, 0, self.parameter_manager.parameter_name, str( (min(value), max(value)) ))
                raise
            except Exception as e:
                log.warning("Could not store Span extents for %s.  Unexpected error %s",
                            str( (bid, 0, self.parameter_manager.parameter_name)), e.message )
                raise
Beispiel #11
0
    def __setitem__(self, slice_, value):
        """
        Called to implement assignment of self[slice_, value].

        Not implemented by the abstract class

        @param slice    A set of valid constraints - int, [int,], (int,), or slice
        @param value    The value to assign to the storage at location slice_
        @raise  ValueError when brick contains no values for specified slice
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        if not isinstance(slice_, (list,tuple)):
            slice_ = [slice_]
        log.debug('setitem slice_: %s', slice_)
        val = np.asanyarray(value)
        val_origin = [0 for x in range(val.ndim)]

        brick_origin_offset = 0

        bricks = self._bricks_from_slice(slice_)
        log.trace('Slice %s indicates bricks: %s', slice_, bricks)

        for idx, brick_guid in bricks:
            # Figuring out which part of brick to set values
            try:
                brick_slice, value_slice, brick_origin_offset = self._calc_slices(slice_, brick_guid, val, val_origin, brick_origin_offset)
                log.trace('brick_slice: %s, value_slice: %s, brick_origin_offset: %s', brick_slice, value_slice, brick_origin_offset)
                if brick_slice is None:
                    raise ValueError('Brick contains no values for specified slice')
            except ValueError as ve:
                log.warn(ve.message + '; moving to next brick')
                continue

            brick_file_path = os.path.join(self.brick_path, '{0}.hdf5'.format(brick_guid))
            log.trace('Brick slice to fill: %s', brick_slice)
            log.trace('Value slice to extract: %s', value_slice)

            # Create the HDF5 dataset that represents one brick
            bD = tuple(self.brick_domains[1])
            cD = self.brick_domains[2]
            v = val[value_slice]
            if val.ndim == 0 and len(val.shape) == 0 and np.iterable(v): # Prevent single value strings from being iterated
                v = [v]

            # Check for object type
            data_type = self.dtype
            fv = self.fill_value

            # Check for object type
            if data_type == '|O8':
                if np.iterable(v):
                    v = [pack(x) for x in v]
                else:
                    v = pack(v)

            work_key = brick_guid
            work = (brick_slice, v)
            work_metrics = (brick_file_path, bD, cD, data_type, fv)
            log.trace('Work key: %s', work_key)
            log.trace('Work metrics: %s', work_metrics)
            log.trace('Work[0]: %s', work[0])

            if self.inline_data_writes:
                if data_type == '|O8':
                    data_type = h5py.special_dtype(vlen=str)
                if 0 in cD or 1 in cD:
                    cD = True
                with h5py.File(brick_file_path, 'a') as f:
                    # TODO: Due to usage concerns, currently locking chunking to "auto"
                    f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=True, fillvalue=fv)
                    if isinstance(brick_slice, tuple):
                        brick_slice = list(brick_slice)

                    f[brick_guid].__setitem__(*brick_slice, val=v)
            else:
                # If the brick file doesn't exist, 'touch' it to make sure it's immediately available
                if not os.path.exists(brick_file_path):
                    if data_type == '|O8':
                        data_type = h5py.special_dtype(vlen=str)
                    if 0 in cD or 1 in cD:
                        cD = True
                    with h5py.File(brick_file_path, 'a') as f:
                        # TODO: Due to usage concerns, currently locking chunking to "auto"
                        f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=True, fillvalue=fv)

                if self.auto_flush:
                    # Immediately submit work to the dispatcher
                    self.brick_dispatcher.put_work(work_key, work_metrics, work)
                else:
                    # Queue the work for later flushing
                    self._queue_work(work_key, work_metrics, work)
Beispiel #12
0
 def _get_master_attribute(self, att):
     if att == "inline_data_writes" or "auto_flush_values":
         return pack(True)
     else:
         raise NotImplementedError("Not sure what to do with attribute: {0}".format(att))
Beispiel #13
0
 def _get_master_attribute(self, att):
     if att == 'inline_data_writes' or 'auto_flush_values':
         return pack(True)
     else:
         raise NotImplementedError(
             'Not sure what to do with attribute: {0}'.format(att))
Beispiel #14
0
    def __setitem__(self, slice_, value):
        if not isinstance(slice_, (list,tuple)):
            slice_ = [slice_]
        log.debug('setitem slice_: %s', slice_)
        val = np.asanyarray(value)
        val_origin = [0 for x in range(val.ndim)]

        brick_origin_offset = 0

        bricks = self._bricks_from_slice(slice_)
        log.trace('Slice %s indicates bricks: %s', slice_, bricks)

        for idx, brick_guid in bricks:
            # Figuring out which part of brick to set values
            try:
                brick_slice, value_slice, brick_origin_offset = self._calc_slices(slice_, brick_guid, val, val_origin, brick_origin_offset)
                log.trace('brick_slice: %s, value_slice: %s, brick_origin_offset: %s', brick_slice, value_slice, brick_origin_offset)
                if brick_slice is None:
                    raise ValueError('Brick contains no values for specified slice')
            except ValueError as ve:
                log.warn(ve.message + '; moving to next brick')
                continue

            brick_file_path = os.path.join(self.brick_path, '{0}.hdf5'.format(brick_guid))
            log.trace('Brick slice to fill: %s', brick_slice)
            log.trace('Value slice to extract: %s', value_slice)

            # Create the HDF5 dataset that represents one brick
            bD = tuple(self.brick_domains[1])
            cD = self.brick_domains[2]
            v = val[value_slice]
            if val.ndim == 0 and len(val.shape) == 0 and np.iterable(v): # Prevent single value strings from being iterated
                v = [v]

            # Check for object type
            data_type = self.dtype
            fv = self.fill_value

            # Check for object type
            if data_type == '|O8':
                if np.iterable(v):
                    v = [pack(x) for x in v]
                else:
                    v = pack(v)

            work_key = brick_guid
            work = (brick_slice, v)
            work_metrics = (brick_file_path, bD, cD, data_type, fv)
            log.trace('Work key: %s', work_key)
            log.trace('Work metrics: %s', work_metrics)
            log.trace('Work[0]: %s', work[0])

            # If the brick file doesn't exist, 'touch' it to make sure it's immediately available
            if not os.path.exists(brick_file_path):
                if data_type == '|O8':
                    data_type = h5py.special_dtype(vlen=str)
                # TODO: Uncomment this to properly turn 0 & 1 chunking into True
                if 0 in cD or 1 in cD:
                    cD = True
                with h5py.File(brick_file_path, 'a') as f:
                    # TODO: Due to usage concerns, currently locking chunking to "auto"
                    f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=True, fillvalue=fv)

            #region FOR TESTING WITHOUT OUT-OF-BAND WRITES - IN-LINE WRITING OF VALUES
#            if data_type == '|O8':
#                data_type = h5py.special_dtype(vlen=str)
#                # TODO: Uncomment this to properly turn 0 & 1 chunking into True
#            if 0 in cD or 1 in cD:
#                cD = True
#            with h5py.File(brick_file_path, 'a') as f:
#                # TODO: Due to usage concerns, currently locking chunking to "auto"
#                f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=True, fillvalue=fv)
#            if isinstance(brick_slice, tuple):
#                brick_slice = list(brick_slice)
#
#            f[brick_guid].__setitem__(*brick_slice, val=v)
            #endregion

            if self.auto_flush:
                # Immediately submit work to the dispatcher
                self.brick_dispatcher.put_work(work_key, work_metrics, work)
            else:
                # Queue the work for later flushing
                self._queue_work(work_key, work_metrics, work)