def _check_dset_shape(self, dset_data): """ Check to ensure that dataset array is of the proper shape Parameters ---------- dset_data : ndarray Dataset data array """ dset_shape = dset_data.shape if len(dset_shape) == 1: shape = len(self) if shape: shape = (shape,) if dset_shape != shape: raise HandlerValueError("data is not of the proper shape:" " {}".format(shape)) else: raise HandlerRuntimeError("'meta' has not been loaded") else: shape = self.shape if shape: if dset_shape != shape: raise HandlerValueError("data is not of the proper shape:" " {}".format(shape)) else: raise HandlerRuntimeError("'meta' and 'time_index' have not " "been loaded")
def _compute_stdev(self, dset_out, means=None): """ Compute multi-year standard deviation for given dataset Parameters ---------- dset_out : str Multi-year stdev dataset name means : ndarray Array of pre-computed means Returns ------- my_stdev : ndarray Array of multi-year standard deviations """ if means is None: means = self._compute_means("{}-means".format(dset_out)) source_dsets = self._get_source_dsets(dset_out) my_stdev = np.zeros(means.shape, dtype='float32') for ds in source_dsets: if self.h5[ds].shape == my_stdev.shape: my_stdev += (self[ds] - means)**2 else: raise HandlerRuntimeError("{} shape {} should be {}" .format(ds, self.h5[ds].shape, my_stdev.shape)) my_stdev = np.sqrt(my_stdev / len(source_dsets)) self._update_dset(dset_out, my_stdev) return my_stdev
def _compute_means(self, dset_out): """ Compute multi-year means for given dataset Parameters ---------- dset_out : str Multi-year means dataset name Returns ------- my_means : ndarray Array of multi-year means """ source_dsets = self._get_source_dsets(dset_out) logger.debug('\t- Computing {} from {}'.format(dset_out, source_dsets)) my_means = np.zeros(len(self), dtype='float32') for ds in source_dsets: if self.h5[ds].shape == my_means.shape: my_means += self[ds] else: raise HandlerRuntimeError("{} shape {} should be {}" .format(ds, self.h5[ds].shape, my_means.shape)) my_means /= len(source_dsets) self._update_dset(dset_out, my_means) return my_means
def _check_data_dtype(data, dtype, scale_factor=1): """ Check data dtype and scale if needed Parameters ---------- data : ndarray Data to be written to disc dtype : str dtype of data on disc scale_factor : int Scale factor to scale data to integer (if needed) Returns ------- data : ndarray Data ready for writing to disc: - Scaled and converted to dtype """ if not np.issubdtype(data.dtype, np.dtype(dtype)): if scale_factor == 1: raise HandlerRuntimeError("A scale_factor is needed to" "scale data to {}.".format(dtype)) # apply scale factor and dtype data = np.multiply(data, scale_factor) if np.issubdtype(dtype, np.integer): data = np.round(data) data = data.astype(dtype) return data
def _copy_dset(self, source_h5, dset, meta=None): """ Copy dset_in from source_h5 to multiyear .h5 Parameters ---------- source_h5 : str Path to source .h5 file to copy data from dset : str Dataset to copy meta : pandas.DataFrame If provided confirm that source meta matches given meta """ dset_out = self._create_dset_name(source_h5, dset) if dset_out not in self.datasets: logger.debug("- Collecting {} from {}".format( dset, os.path.basename(source_h5))) with Outputs(source_h5, unscale=False, mode='r') as f_in: if meta is not None: cols = ['latitude', 'longitude'] source_meta = f_in.meta if not meta[cols].equals(source_meta[cols]): raise HandlerRuntimeError('Coordinates do not match') _, ds_dtype, ds_chunks = f_in.get_dset_properties(dset) ds_attrs = f_in.get_attrs(dset=dset) ds_data = f_in[dset] self._create_dset(dset_out, ds_data.shape, ds_dtype, chunks=ds_chunks, attrs=ds_attrs, data=ds_data)
def available_capacity(self, gid): """ Get available capacity for given line Parameters ---------- gid : int Unique id of feature of interest Returns ------- avail_cap : float Available capacity = capacity * available fraction default = 10% """ feature = self[gid] if 'avail_cap' in feature: avail_cap = feature['avail_cap'] elif 'lines' in feature: avail_cap = self._substation_capacity(feature['lines']) else: msg = ('Could not parse available capacity from feature: {}'. format(feature)) logger.error(msg) raise HandlerRuntimeError(msg) return avail_cap
def _create_dset(self, ds_name, shape, dtype, chunks=None, attrs=None, data=None, replace=True): """ Initialize dataset Parameters ---------- ds_name : str Dataset name shape : tuple Dataset shape dtype : str Dataset numpy dtype chunks : tuple Dataset chunk size attrs : dict Dataset attributes data : ndarray Dataset data array replace : bool If previous dataset exists with the same name, it will be replaced. """ if self.writable: if ds_name in self.datasets and replace: del self.h5[ds_name] elif ds_name in self.datasets: old_shape, old_dtype, _ = self.get_dset_properties(ds_name) if old_shape != shape or old_dtype != dtype: e = ('Trying to create dataset "{}", but already exists ' 'with mismatched shape and dtype. New shape/dtype ' 'is {}/{}, previous shape/dtype is {}/{}' .format(ds_name, shape, dtype, old_shape, old_dtype)) logger.error(e) raise HandlerRuntimeError(e) if ds_name not in self.datasets: chunks = self._check_chunks(chunks, data=data) ds = self.h5.create_dataset(ds_name, shape=shape, dtype=dtype, chunks=chunks) if attrs is not None: for key, value in attrs.items(): ds.attrs[key] = value if data is not None: ds[...] = data
def _check_group(self, group): """ Ensure group is in .h5 file Parameters ---------- group : str Group of interest """ if group is not None: if group not in self._h5: try: if self.writable: self._h5.create_group(group) except Exception as ex: msg = ('Cannot create group {}: {}' .format(group, ex)) raise HandlerRuntimeError(msg) return group
def _set_ds_array(self, ds_name, arr, ds_slice): """ Write ds to disk Parameters ---------- ds_name : str Dataset name arr : ndarray Dataset data array ds_slice : tuple Dataset slicing that corresponds to arr """ if ds_name not in self.datasets: msg = '{} must be initialized!'.format(ds_name) raise HandlerRuntimeError(msg) dtype = self.h5[ds_name].dtype scale_factor = self.get_scale(ds_name) ds_slice = parse_slice(ds_slice) self.h5[ds_name][ds_slice] = self._check_data_dtype(arr, dtype, scale_factor)
def _copy_dset(self, source_h5, dset, meta=None, pass_through=False): """ Copy dset_in from source_h5 to multiyear .h5 Parameters ---------- source_h5 : str Path to source .h5 file to copy data from dset : str Dataset to copy meta : pandas.DataFrame If provided confirm that source meta matches given meta pass_through : bool Flag to just pass through dataset without name modifications (no differences between years, no means or stdevs) """ if pass_through: dset_out = dset else: dset_out = self._create_dset_name(source_h5, dset) if dset_out not in self.datasets: logger.debug("- Collecting {} from {}" .format(dset, os.path.basename(source_h5))) with Outputs(source_h5, unscale=False, mode='r') as f_in: if meta is not None: cols = get_lat_lon_cols(meta) source_meta = f_in.meta if not meta[cols].equals(source_meta[cols]): raise HandlerRuntimeError('Coordinates do not match') _, ds_dtype, ds_chunks = f_in.get_dset_properties(dset) ds_attrs = f_in.get_attrs(dset=dset) ds_data = f_in[dset] self._create_dset(dset_out, ds_data.shape, ds_dtype, chunks=ds_chunks, attrs=ds_attrs, data=ds_data)