def _set_values_to_brick(self, brick_guid, brick_slice, values, value_slice=None): brick_file_path = os.path.join(self.brick_path, '{0}.hdf5'.format(brick_guid)) log.trace('Brick slice to fill: %s', brick_slice) log.trace('Value slice to extract: %s', value_slice) # Create the HDF5 dataset that represents one brick bD = tuple(self.brick_domains[1]) cD = self.brick_domains[2] if value_slice is not None: vals = values[value_slice] else: vals = values if values.ndim == 0 and len(values.shape) == 0 and np.iterable(vals): # Prevent single value strings from being iterated vals = [vals] # Check for object type data_type = self.dtype fv = self.fill_value # Check for object type if data_type == '|O8': if np.iterable(vals): vals = [pack(x) for x in vals] else: vals = pack(vals) if self.inline_data_writes: if data_type == '|O8': data_type = h5py.special_dtype(vlen=str) if 0 in cD or 1 in cD: cD = True with h5py.File(brick_file_path, 'a') as f: # TODO: Due to usage concerns, currently locking chunking to "auto" f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=None, fillvalue=fv) f[brick_guid][brick_slice] = vals else: work_key = brick_guid work = (brick_slice, vals) work_metrics = (brick_file_path, bD, cD, data_type, fv) log.trace('Work key: %s', work_key) log.trace('Work metrics: %s', work_metrics) log.trace('Work[0]: %s', work[0]) # If the brick file doesn't exist, 'touch' it to make sure it's immediately available if not os.path.exists(brick_file_path): if data_type == '|O8': data_type = h5py.special_dtype(vlen=str) if 0 in cD or 1 in cD: cD = True with h5py.File(brick_file_path, 'a') as f: # TODO: Due to usage concerns, currently locking chunking to "auto" f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=None, fillvalue=fv) if self.auto_flush: # Immediately submit work to the dispatcher self.brick_dispatcher.put_work(work_key, work_metrics, work) else: # Queue the work for later flushing self._queue_work(work_key, work_metrics, work)
def _set_values_to_brick(self, brick_guid, brick_slice, values, value_slice=None): brick_file_path = os.path.join(self.brick_path, '{0}.hdf5'.format(brick_guid)) log.trace('Brick slice to fill: %s', brick_slice) log.trace('Value slice to extract: %s', value_slice) # Create the HDF5 dataset that represents one brick bD = tuple(self.brick_domains[1]) cD = self.brick_domains[2] if value_slice is not None: vals = values[value_slice] else: vals = values if values.ndim == 0 and len(values.shape) == 0 and np.iterable(vals): # Prevent single value strings from being iterated vals = [vals] # Check for object type data_type = self.dtype fv = self.fill_value # Check for object type if data_type == '|O8': if np.iterable(vals): vals = [pack(x) for x in vals] else: vals = pack(vals) if self.inline_data_writes: if data_type == '|O8': data_type = h5py.special_dtype(vlen=str) if 0 in cD or 1 in cD: cD = True with HDFLockingFile(brick_file_path, 'a') as f: # TODO: Due to usage concerns, currently locking chunking to "auto" f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=None, fillvalue=fv) f[brick_guid][brick_slice] = vals else: work_key = brick_guid work = (brick_slice, vals) work_metrics = (brick_file_path, bD, cD, data_type, fv) log.trace('Work key: %s', work_key) log.trace('Work metrics: %s', work_metrics) log.trace('Work[0]: %s', work[0]) # If the brick file doesn't exist, 'touch' it to make sure it's immediately available if not os.path.exists(brick_file_path): if data_type == '|O8': data_type = h5py.special_dtype(vlen=str) if 0 in cD or 1 in cD: cD = True with HDFLockingFile(brick_file_path, 'a') as f: # TODO: Due to usage concerns, currently locking chunking to "auto" f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=None, fillvalue=fv) if self.auto_flush: # Immediately submit work to the dispatcher self.brick_dispatcher.put_work(work_key, work_metrics, work) else: # Queue the work for later flushing self._queue_work(work_key, work_metrics, work)
def flush(self, deep=True): if self.is_dirty(deep): try: # package for storage insert_dict = {} for k in list(self._dirty): v = getattr(self, k) log.trace('FLUSH: key=%s v=%s', k, v) if isinstance(v, Dictable): prefix='DICTABLE|{0}:{1}|'.format(v.__module__, v.__class__.__name__) value = prefix + pack(v.dump()) elif k == 'brick_tree': if hasattr(self, 'brick_tree') and isinstance(self.brick_tree, RTreeProxy): val = self.brick_tree.serialize() if val != '': insert_dict['brick_tree'] = val continue elif k == 'parameter_metadata': value = pack_parameter_manager_dict(v) else: value = pack(v) insert_dict[k] = value # Update the hash_value in _hmap self._hmap[k] = hash_any(v) dirty_spans = self.span_collection.get_dirty_spans() if len(dirty_spans) > 0: val = str(self.span_collection) log.trace("Span tuple: %s", val) value = pack(val) insert_dict['span_collection'] = value DBFactory.get_db().insert(self.guid, insert_dict, dirty_spans) for span in dirty_spans: span.is_dirty = False self._dirty.clear() except IOError, ex: if "unable to create file (File accessability: Unable to open file)" in ex.message: log.info('Issue writing to hdf file during master_manager.flush - this is not likely a huge problem: %s', ex.message) else: raise super(DbBackedMetadataManager, self).__setattr__('_is_dirty',False)
def __serialize(self, payload): from coverage_model.basic_types import Dictable if isinstance(payload, Dictable): prefix = 'DICTABLE|{0}:{1}|'.format(payload.__module__, payload.__class__.__name__) payload = prefix + pack(payload.dump()) return payload
def __serialize(self, payload): from coverage_model.basic_types import Dictable if isinstance(payload, Dictable): prefix = 'DICTABLE|{0}:{1}|'.format(payload.__module__, payload.__class__.__name__) payload = prefix + pack(payload.dump()) return payload
def pack(self): pack_dict = {} for k, v in self.__dict__.iteritems(): if k in self._ignore or k.startswith('_'): continue if isinstance(v, Dictable): prefix='DICTABLE|{0}:{1}|'.format(v.__module__, v.__class__.__name__) v = prefix + pack(v.dump()) pack_dict[k] = v return msgpack.packb(pack_dict)
def msgpack(self, compressors=None): _dict = {'id': self.id, 'ingest_time': self.ingest_time, 'ingest_time_dict': self.ingest_times, 'coverage_id': self.coverage_id, 'mutable': self.mutable} if compressors is None: compressors = self.compressors data_dict = {} for param, data in self.param_dict.iteritems(): if param == self.ingest_time_str: continue if compressors is not None: data_dict[param] = compressors[param].compress(data) else: data_dict[param] = [data.tolist(), str(data.dtype), data.shape] _dict['params'] = data_dict from coverage_model.persistence_helpers import pack js = pack(_dict) return js
def __setitem__(self, slice_, value): # Always storing in first slot - ignore slice bid = 'sparse_value_brick' bD = (1,) cD = None brick_file_path = '{0}/{1}.hdf5'.format(self.brick_path, bid) vals = [self.__serialize(v) for v in value] vals = pack(vals) set_arr = np.empty(1, dtype=object) set_arr[0] = vals data_type = h5py.special_dtype(vlen=str) if self.inline_data_writes: with HDFLockingFile(brick_file_path, 'a') as f: f.require_dataset(bid, shape=bD, dtype=data_type, chunks=cD, fillvalue=None) f[bid][0] = set_arr else: work_key = bid work = ((0,), set_arr) work_metrics = (brick_file_path, bD, cD, data_type, None) # If the brick file doesn't exist, 'touch' it to make sure it's immediately available if not os.path.exists(brick_file_path): with HDFLockingFile(brick_file_path, 'a') as f: # TODO: Due to usage concerns, currently locking chunking to "auto" f.require_dataset(bid, shape=bD, dtype=data_type, chunks=cD, fillvalue=None) if self.auto_flush: # Immediately submit work to the dispatcher self.brick_dispatcher.put_work(work_key, work_metrics, work) else: # Queue the work for later flushing self._queue_work(work_key, work_metrics, work)
def __setitem__(self, slice_, value): # Always storing in first slot - ignore slice bid = 'sparse_value_brick' bD = (1,) cD = None brick_file_path = '{0}/{1}.hdf5'.format(self.brick_path, bid) vals = [self.__serialize(v) for v in value] vals = pack(vals) set_arr = np.empty(1, dtype=object) set_arr[0] = vals data_type = h5py.special_dtype(vlen=str) if self.inline_data_writes: with h5py.File(brick_file_path, 'a') as f: f.require_dataset(bid, shape=bD, dtype=data_type, chunks=cD, fillvalue=None) f[bid][0] = set_arr else: work_key = bid work = ((0,), set_arr) work_metrics = (brick_file_path, bD, cD, data_type, None) # If the brick file doesn't exist, 'touch' it to make sure it's immediately available if not os.path.exists(brick_file_path): with h5py.File(brick_file_path, 'a') as f: # TODO: Due to usage concerns, currently locking chunking to "auto" f.require_dataset(bid, shape=bD, dtype=data_type, chunks=cD, fillvalue=None) if self.auto_flush: # Immediately submit work to the dispatcher self.brick_dispatcher.put_work(work_key, work_metrics, work) else: # Queue the work for later flushing self._queue_work(work_key, work_metrics, work)
def __setitem__(self, slice_, value): # Always storing in first slot - ignore slice bid = 'sparse_value_brick' bD = (1,) cD = None brick_file_path = '{0}/{1}.hdf5'.format(self.brick_path, bid) vals = [self.__serialize(v) for v in value] vals = pack(vals) set_arr = np.empty(1, dtype=object) set_arr[0] = vals data_type = h5py.special_dtype(vlen=str) if self.inline_data_writes: with HDFLockingFile(brick_file_path, 'a') as f: f.require_dataset(bid, shape=bD, dtype=data_type, chunks=cD, fillvalue=None) f[bid][0] = set_arr else: work_key = bid work = ((0,), set_arr) work_metrics = (brick_file_path, bD, cD, data_type, None) # If the brick file doesn't exist, 'touch' it to make sure it's immediately available if not os.path.exists(brick_file_path): with HDFLockingFile(brick_file_path, 'a') as f: # TODO: Due to usage concerns, currently locking chunking to "auto" f.require_dataset(bid, shape=bD, dtype=data_type, chunks=cD, fillvalue=None) if self.auto_flush: # Immediately submit work to the dispatcher self.brick_dispatcher.put_work(work_key, work_metrics, work) else: # Queue the work for later flushing self._queue_work(work_key, work_metrics, work) if self.parameter_manager.parameter_name in self.master_manager.param_groups: try: log.trace("Parameter: %s , Values: %s , Fill: %s", self.parameter_manager.parameter_name, value, self.fill_value) min_val = min(value) max_val = max(value) log.trace("Value type: %s %s", type(value[0]), len(value)) if len(value) > 0 and isinstance(value[0], Span): min_val = self.fill_value max_val = self.fill_value mins = [] maxes = [] for span in value: log.trace("Span min/max %s", span) if isinstance(span, Span): tup = span.tuplize() tup = [x for x in tup if x is not None and x is not self.fill_value and isinstance(x, numbers.Number)] if len(tup) > 0: mins.append(min(tup)) maxes.append(max(tup)) if len(mins) > 0: min_val = min(mins) if len(maxes) > 0: max_val = max(maxes) if max_val is not None and min_val is not None: log.trace("SparsePersistedStorage saving %s min/max %s/%s", self.parameter_manager.parameter_name, min_val, max_val) self.master_manager.track_data_written_to_brick(bid, 0, self.parameter_manager.parameter_name, min_val, max_val) except TypeError as e: log.debug("Don't store extents for types for which extents are meaningless: %s", type(v)) raise except ValueError as e: log.warning("Values stored for extents were invalid for brick=%s, slice=%s, param=%s min/max=%s", bid, 0, self.parameter_manager.parameter_name, str( (min(value), max(value)) )) raise except Exception as e: log.warning("Could not store Span extents for %s. Unexpected error %s", str( (bid, 0, self.parameter_manager.parameter_name)), e.message ) raise
def __setitem__(self, slice_, value): """ Called to implement assignment of self[slice_, value]. Not implemented by the abstract class @param slice A set of valid constraints - int, [int,], (int,), or slice @param value The value to assign to the storage at location slice_ @raise ValueError when brick contains no values for specified slice """ if self.mode == 'r': raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode)) if not isinstance(slice_, (list,tuple)): slice_ = [slice_] log.debug('setitem slice_: %s', slice_) val = np.asanyarray(value) val_origin = [0 for x in range(val.ndim)] brick_origin_offset = 0 bricks = self._bricks_from_slice(slice_) log.trace('Slice %s indicates bricks: %s', slice_, bricks) for idx, brick_guid in bricks: # Figuring out which part of brick to set values try: brick_slice, value_slice, brick_origin_offset = self._calc_slices(slice_, brick_guid, val, val_origin, brick_origin_offset) log.trace('brick_slice: %s, value_slice: %s, brick_origin_offset: %s', brick_slice, value_slice, brick_origin_offset) if brick_slice is None: raise ValueError('Brick contains no values for specified slice') except ValueError as ve: log.warn(ve.message + '; moving to next brick') continue brick_file_path = os.path.join(self.brick_path, '{0}.hdf5'.format(brick_guid)) log.trace('Brick slice to fill: %s', brick_slice) log.trace('Value slice to extract: %s', value_slice) # Create the HDF5 dataset that represents one brick bD = tuple(self.brick_domains[1]) cD = self.brick_domains[2] v = val[value_slice] if val.ndim == 0 and len(val.shape) == 0 and np.iterable(v): # Prevent single value strings from being iterated v = [v] # Check for object type data_type = self.dtype fv = self.fill_value # Check for object type if data_type == '|O8': if np.iterable(v): v = [pack(x) for x in v] else: v = pack(v) work_key = brick_guid work = (brick_slice, v) work_metrics = (brick_file_path, bD, cD, data_type, fv) log.trace('Work key: %s', work_key) log.trace('Work metrics: %s', work_metrics) log.trace('Work[0]: %s', work[0]) if self.inline_data_writes: if data_type == '|O8': data_type = h5py.special_dtype(vlen=str) if 0 in cD or 1 in cD: cD = True with h5py.File(brick_file_path, 'a') as f: # TODO: Due to usage concerns, currently locking chunking to "auto" f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=True, fillvalue=fv) if isinstance(brick_slice, tuple): brick_slice = list(brick_slice) f[brick_guid].__setitem__(*brick_slice, val=v) else: # If the brick file doesn't exist, 'touch' it to make sure it's immediately available if not os.path.exists(brick_file_path): if data_type == '|O8': data_type = h5py.special_dtype(vlen=str) if 0 in cD or 1 in cD: cD = True with h5py.File(brick_file_path, 'a') as f: # TODO: Due to usage concerns, currently locking chunking to "auto" f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=True, fillvalue=fv) if self.auto_flush: # Immediately submit work to the dispatcher self.brick_dispatcher.put_work(work_key, work_metrics, work) else: # Queue the work for later flushing self._queue_work(work_key, work_metrics, work)
def _get_master_attribute(self, att): if att == "inline_data_writes" or "auto_flush_values": return pack(True) else: raise NotImplementedError("Not sure what to do with attribute: {0}".format(att))
def _get_master_attribute(self, att): if att == 'inline_data_writes' or 'auto_flush_values': return pack(True) else: raise NotImplementedError( 'Not sure what to do with attribute: {0}'.format(att))
def __setitem__(self, slice_, value): if not isinstance(slice_, (list,tuple)): slice_ = [slice_] log.debug('setitem slice_: %s', slice_) val = np.asanyarray(value) val_origin = [0 for x in range(val.ndim)] brick_origin_offset = 0 bricks = self._bricks_from_slice(slice_) log.trace('Slice %s indicates bricks: %s', slice_, bricks) for idx, brick_guid in bricks: # Figuring out which part of brick to set values try: brick_slice, value_slice, brick_origin_offset = self._calc_slices(slice_, brick_guid, val, val_origin, brick_origin_offset) log.trace('brick_slice: %s, value_slice: %s, brick_origin_offset: %s', brick_slice, value_slice, brick_origin_offset) if brick_slice is None: raise ValueError('Brick contains no values for specified slice') except ValueError as ve: log.warn(ve.message + '; moving to next brick') continue brick_file_path = os.path.join(self.brick_path, '{0}.hdf5'.format(brick_guid)) log.trace('Brick slice to fill: %s', brick_slice) log.trace('Value slice to extract: %s', value_slice) # Create the HDF5 dataset that represents one brick bD = tuple(self.brick_domains[1]) cD = self.brick_domains[2] v = val[value_slice] if val.ndim == 0 and len(val.shape) == 0 and np.iterable(v): # Prevent single value strings from being iterated v = [v] # Check for object type data_type = self.dtype fv = self.fill_value # Check for object type if data_type == '|O8': if np.iterable(v): v = [pack(x) for x in v] else: v = pack(v) work_key = brick_guid work = (brick_slice, v) work_metrics = (brick_file_path, bD, cD, data_type, fv) log.trace('Work key: %s', work_key) log.trace('Work metrics: %s', work_metrics) log.trace('Work[0]: %s', work[0]) # If the brick file doesn't exist, 'touch' it to make sure it's immediately available if not os.path.exists(brick_file_path): if data_type == '|O8': data_type = h5py.special_dtype(vlen=str) # TODO: Uncomment this to properly turn 0 & 1 chunking into True if 0 in cD or 1 in cD: cD = True with h5py.File(brick_file_path, 'a') as f: # TODO: Due to usage concerns, currently locking chunking to "auto" f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=True, fillvalue=fv) #region FOR TESTING WITHOUT OUT-OF-BAND WRITES - IN-LINE WRITING OF VALUES # if data_type == '|O8': # data_type = h5py.special_dtype(vlen=str) # # TODO: Uncomment this to properly turn 0 & 1 chunking into True # if 0 in cD or 1 in cD: # cD = True # with h5py.File(brick_file_path, 'a') as f: # # TODO: Due to usage concerns, currently locking chunking to "auto" # f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=True, fillvalue=fv) # if isinstance(brick_slice, tuple): # brick_slice = list(brick_slice) # # f[brick_guid].__setitem__(*brick_slice, val=v) #endregion if self.auto_flush: # Immediately submit work to the dispatcher self.brick_dispatcher.put_work(work_key, work_metrics, work) else: # Queue the work for later flushing self._queue_work(work_key, work_metrics, work)