def __init__(self, excl_fpath, h5_fpath, excl_dict=None, area_filter_kernel='queen', min_area=None, check_excl_layers=False): """ Parameters ---------- excl_fpath : str Filepath to exclusions h5 with techmap dataset. h5_fpath : str Filepath to .h5 file to be aggregated excl_dict : dict | None Dictionary of exclusion LayerMask arugments {layer: {kwarg: value}} area_filter_kernel : str Contiguous area filter method to use on final exclusions mask min_area : float | None Minimum required contiguous area filter in sq-km check_excl_layers : bool Run a pre-flight check on each exclusion layer to ensure they contain un-excluded values """ super().__init__(excl_fpath, excl_dict=excl_dict, area_filter_kernel=area_filter_kernel, min_area=min_area, check_excl_layers=check_excl_layers) self._h5 = Resource(h5_fpath)
class AggFileHandler(AbstractAggFileHandler): """ Framework to handle aggregation file context manager: - exclusions .h5 file - h5 file to be aggregated """ def __init__(self, excl_fpath, h5_fpath, excl_dict=None, area_filter_kernel='queen', min_area=None, check_excl_layers=False): """ Parameters ---------- excl_fpath : str Filepath to exclusions h5 with techmap dataset. h5_fpath : str Filepath to .h5 file to be aggregated excl_dict : dict | None Dictionary of exclusion LayerMask arugments {layer: {kwarg: value}} area_filter_kernel : str Contiguous area filter method to use on final exclusions mask min_area : float | None Minimum required contiguous area filter in sq-km check_excl_layers : bool Run a pre-flight check on each exclusion layer to ensure they contain un-excluded values """ super().__init__(excl_fpath, excl_dict=excl_dict, area_filter_kernel=area_filter_kernel, min_area=min_area, check_excl_layers=check_excl_layers) self._h5 = Resource(h5_fpath) @property def h5(self): """ Get the h5 file handler object. Returns ------- _h5 : Outputs reV h5 outputs handler object. """ return self._h5 def close(self): """Close all file handlers.""" self._excl.close() self._h5.close()
def __init__(self, h5_file, hsds=False): """ Parameters ---------- h5_file : str .h5 file containing exclusion layers and techmap hsds : bool Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS behind HSDS """ self.h5_file = h5_file self._h5 = Resource(h5_file, hsds=hsds) self._iarr = None
def _preflight_check(self): """ Check time_index and coordinates accross files """ time_index = None lat_lon = None bad_files = [] for file in self.h5_files: with Resource(file) as f: if 'time_index' in f: ti = f.time_index if time_index is None: time_index = ti.copy() else: check = time_index.equals(ti) if not check: bad_files.append(file) ll = f.lat_lon if lat_lon is None: lat_lon = ll.copy() else: check = np.allclose(lat_lon, ll) if not check: bad_files.append(file) bad_files = list(set(bad_files)) if bad_files: msg = ("The following files' coordinates and time-index do not " "match:\n{}".format(bad_files)) raise ResourceRuntimeError(msg)
def _parse_gen_index(h5_fpath): """Parse gen outputs for an array of generation gids corresponding to the resource gids. Parameters ---------- h5_fpath : str Filepath to reV compliant .h5 file Returns ------- gen_index : np.ndarray Array of generation gids with array index equal to resource gid. Array value is -1 if the resource index was not used in the generation run. """ with Resource(h5_fpath) as f: gen_index = f.meta if 'gid' in gen_index: gen_index = gen_index.rename(columns={'gid': 'res_gids'}) gen_index['gen_gids'] = gen_index.index gen_index = gen_index[['res_gids', 'gen_gids']] gen_index = gen_index.set_index(keys='res_gids') gen_index = \ gen_index.reindex(range(int(gen_index.index.max() + 1))) gen_index = gen_index['gen_gids'].values gen_index[np.isnan(gen_index)] = -1 gen_index = gen_index.astype(np.int32) else: gen_index = None return gen_index
def test_agg_profile(): """Test aggregation of weighted meanoid profile for each SC point.""" gen_fpath = os.path.join(TESTDATADIR, 'offshore/ri_offshore_baseline.h5') rev_sc_fpath = os.path.join(TESTDATADIR, 'sc_out/ri_wind_farm_sc.csv') rev_summary = pd.read_csv(rev_sc_fpath, index_col=0).iloc[0:2] profiles = AggregatedRepProfiles.run(gen_fpath, rev_summary, cf_dset='cf_profile', scaled_precision=False, max_workers=None)[0] for index in rev_summary.index: gen_gids = json.loads(rev_summary.loc[index, 'gen_gids']) weights = np.array(json.loads(rev_summary.loc[index, 'gid_counts'])) with Resource(gen_fpath) as res: raw_profiles = res['cf_profile', :, gen_gids] last = res['cf_profile', :, gen_gids[-1]] assert np.allclose(raw_profiles[:, -1], last) truth = raw_profiles * weights assert len(truth) == 8760 truth = truth.sum(axis=1) assert len(truth) == 8760 truth = truth / weights.sum() assert np.allclose(profiles[0][:, index], truth)
def _combine_dataset(self, dset_name, dset_attrs, process_size=None): """ Load data from ds_in to ds_out Parameters ---------- dset_name : str Name of dataset to initialize dset_attrs : dict Dictionary of dataset and properties and attributes process_size : int, optional Ammount of data to be transfered at a time, by default None """ logger.info('Combining {}'.format(dset_name)) ds_comb = self._init_dataset(dset_name, dset_attrs) start = 0 for h5_path in self.source_h5: logger.debug('Transfering data from {}'.format( os.path.basename(h5_path))) with Resource(h5_path) as f: ds_in = f.h5[dset_name] self._load_data(ds_in, ds_comb, start, process_size=process_size) start += ds_in.shape[self._axis]
def get_sites_per_worker(res_file, default=100): """Get the nominal sites per worker (x-chunk size) for a given file. This is based on the concept that it is most efficient for one core to perform one read on one chunk of resource data, such that chunks will not have to be read into memory twice and no sites will be read redundantly. Parameters ---------- res_file : str Filepath to single resource file, multi-h5 directory, or /h5_dir/prefix*suffix default : int Sites to be analyzed on a single core if the chunk size cannot be determined from res_file. Returns ------- sites_per_worker : int Nominal sites to be analyzed per worker. This is set to the x-axis chunk size for windspeed and dni datasets for the WTK and NSRDB data, respectively. """ if not res_file or not os.path.isfile(res_file): return default with Resource(res_file) as res: if 'wtk' in res_file.lower(): for dset in res.datasets: if 'speed' in dset: # take nominal WTK chunks from windspeed _, _, chunks = res.get_dset_properties(dset) break elif 'nsrdb' in res_file.lower(): # take nominal NSRDB chunks from dni _, _, chunks = res.get_dset_properties('dni') else: warn('Could not infer dataset chunk size as the resource type ' 'could not be determined from the filename: {}' .format(res_file)) chunks = None if chunks is None: # if chunks not set, go to default sites_per_worker = default logger.debug('Sites per worker being set to {} (default) based on ' 'no set chunk size in {}.' .format(sites_per_worker, res_file)) else: sites_per_worker = chunks[1] logger.debug('Sites per worker being set to {} based on chunk ' 'size of {}.'.format(sites_per_worker, res_file)) return sites_per_worker
def test_meanoid(): """Test the simple meanoid method""" sites = np.arange(100) rev_summary = pd.DataFrame({'gen_gids': sites, 'res_gids': sites}) r = RegionRepProfile(GEN_FPATH, rev_summary) meanoid = RepresentativeMethods.meanoid(r.source_profiles) with Resource(GEN_FPATH) as res: truth_profiles = res['cf_profile', :, sites] truth = truth_profiles.mean(axis=1).reshape(meanoid.shape) assert np.allclose(meanoid, truth)
def _combine_coordinates(self): """ combine coordinates """ logger.info('Combining coordinates') if 'coordinates' in self.datasets: with Resource(self.source_h5[0]) as f: chunks = f.get_dset_properties('coordinates')[-1] attrs = f.get_attrs('coordinates') else: chunks = None attrs = {} if self._axis == 1: coords = None for h5_path in self.source_h5: with Resource(h5_path) as f: c = f.lat_lon if coords is None: coords = c else: coords = np.append(coords, c, axis=0) else: with Resource(self.source_h5[0]) as f: coords = f.lat_lon logger.debug('Combined coordinates have:\n' 'shape: {}\n' 'dtype: {}\n' 'chunks: {}'.format(coords.shape, coords.dtype, chunks)) ds = self._dst_h5.create_dataset('coordinates', shape=coords.shape, dtype=coords.dtype, chunks=chunks, data=coords) if attrs: for k, v in attrs.items(): logger.debug("- Transfering attr {}: {}".format(k, v)) ds.attrs[k] = v
def save_agg_to_h5(self, out_fpath, aggregation): """ Save aggregated data to disc in .h5 format Parameters ---------- out_fpath : str Output .h5 file path aggregation : dict Aggregated values for each aggregation dataset """ agg_out = aggregation.copy() meta = agg_out.pop('meta') for c in meta.columns: try: meta[c] = pd.to_numeric(meta[c]) except (ValueError, TypeError): pass dsets = [] shapes = {} attrs = {} chunks = {} dtypes = {} time_index = None with Resource(self._h5_fpath) as f: for dset, data in agg_out.items(): dsets.append(dset) shape = data.shape shapes[dset] = shape if len(data.shape) == 2: if ('time_index' in f) and (shape[0] == f.shape[0]): if time_index is None: time_index = f.time_index attrs[dset] = f.get_attrs(dset=dset) _, dtype, chunk = f.get_dset_properties(dset) chunks[dset] = chunk dtypes[dset] = dtype Outputs.init_h5(out_fpath, dsets, shapes, attrs, chunks, dtypes, meta, time_index=time_index) with Outputs(out_fpath, mode='a') as out: for dset, data in agg_out.items(): out[dset] = data
def _combine_time_index(self): """ Combine time_index """ logger.info('Combining time_index') if self._axis == 0: time_index = None chunks = None attrs = {} for h5_path in self.source_h5: with Resource(h5_path) as f: ti = f.h5['time_index'][...] attrs.update(f.get_attrs('time_index')) if time_index is None: time_index = ti chunks = f.get_dset_properties('time_index')[-1] else: time_index = np.append(time_index, ti) else: with Resource(self.source_h5[0]) as f: time_index = f.h5['time_index'][...] attrs = f.get_attrs('time_index') chunks = f.get_dset_properties('time_index')[-1] logger.debug('Combined time_index has:\n' 'shape: {}\n' 'dtype: {}\n' 'chunks: {}'.format(time_index.shape, time_index.dtype, chunks)) ds = self._dst_h5.create_dataset('time_index', shape=time_index.shape, dtype=time_index.dtype, chunks=chunks, data=time_index) if attrs: for k, v in attrs.items(): logger.debug("- Transfering attr {}: {}".format(k, v)) ds.attrs[k] = v
def _transfer_global_attrs(self): """ Transfer global attributes """ global_attrs = {} for h5_path in self.source_h5: with Resource(h5_path) as f: global_attrs.update(f.get_attrs()) if global_attrs: logger.info('Transfering global attributes') for k, v in global_attrs.items(): logger.debug("- Transfering {}: {}".format(k, v)) self._dst_h5.attrs[k] = v
def _combine_meta(self): """ Combine meta """ logger.info('Combining meta') if self._axis == 1: meta = None chunks = None attrs = {} for h5_path in self.source_h5: with Resource(h5_path) as f: m = f.h5['meta'][...] attrs.update(f.get_attrs('meta')) if meta is None: meta = m chunks = f.get_dset_properties('meta')[-1] else: meta = np.append(meta, m) else: with Resource(self.source_h5[0]) as f: meta = f.h5['meta'][...] attrs = f.get_attrs('meta') chunks = f.get_dset_properties('meta')[-1] logger.debug('Combined meta has:\n' 'shape: {}\n' 'dtype: {}\n' 'chunks: {}'.format(meta.shape, meta.dtype, chunks)) ds = self._dst_h5.create_dataset('meta', shape=meta.shape, dtype=meta.dtype, chunks=chunks, data=meta) if attrs: for k, v in attrs.items(): logger.debug("- Transfering attr {}: {}".format(k, v)) ds.attrs[k] = v
def compute_mean_wind_dirs(res_path, dset, gids, fracs): """ Compute mean wind directions for given dset and gids """ with Resource(res_path) as f: wind_dirs = np.radians(f[dset, :, gids]) sin = np.mean(np.sin(wind_dirs) * fracs, axis=1) cos = np.mean(np.cos(wind_dirs) * fracs, axis=1) mean_wind_dirs = np.degrees(np.arctan2(sin, cos)) mask = mean_wind_dirs < 0 mean_wind_dirs[mask] += 360 return mean_wind_dirs
def test_to_records_array(): """ Test converstion of pandas DataFrame to numpy records array for .h5 ingestion """ path = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5') with Resource(path) as f: meta = f.meta truth = f.h5['meta'][...] test = to_records_array(meta) for c in truth.dtype.names: msg = "{} did not get converted propertly!".format(c) assert np.all(test[c] == truth[c]), msg
def datasets(self): """ Datasets to combine Returns ------- list """ if self._datasets is None: datasets = [] for h5_path in self._source_h5: with Resource(h5_path) as f: datasets.append(f.datasets) self._datasets = list(set(datasets[0]).intersection(*datasets[1:])) return self._datasets
def _parse_sites(points, res_file=None): """Parse project points from list or slice Parameters ---------- points : str | pd.DataFrame | slice | list Slice specifying project points, string pointing to a project points csv, or a dataframe containing the effective csv contents. res_file : str | NoneType Optional resource file to find maximum length of project points if points slice stop is None. Returns ------- df : pd.DataFrame DataFrame mapping sites (gids) to SAM technology (config) """ df = pd.DataFrame(columns=['gid', 'config']) if isinstance(points, (list, tuple)): # explicit site list, set directly df['gid'] = points elif isinstance(points, slice): stop = points.stop if stop is None: if res_file is None: raise ValueError('Must supply a resource file if ' 'points is a slice of type ' ' slice(*, None, *)') multi_h5_res, _ = check_res_file(res_file) if multi_h5_res: stop = MultiFileResource(res_file).shape[1] else: stop = Resource(res_file).shape[1] df['gid'] = list(range(*points.indices(stop))) else: raise TypeError('Project Points sites needs to be set as a list, ' 'tuple, or slice, but was set as: {}'.format( type(points))) df['config'] = None return df
def test_sc_points(): """Test rep profiles for each SC point.""" sites = np.arange(10) timezone = np.random.choice([-4, -5, -6, -7], 10) rev_summary = pd.DataFrame({ 'sc_gid': sites, 'gen_gids': sites, 'res_gids': sites, 'timezone': timezone }) p1 = RepProfiles.run(GEN_FPATH, rev_summary, 'sc_gid', weight=None, max_workers=1)[0] with Resource(GEN_FPATH) as res: truth = res['cf_profile', :, slice(0, 10)] assert np.allclose(p1[0], truth)
def check_agg(agg_out, baseline_h5): """ Compare agg_out to baseline data Parameters ---------- agg_out : dict Aggregation data baseline_h5 : str h5 file containing baseline data """ with Resource(baseline_h5) as f: for dset, test in agg_out.items(): truth = f[dset] if dset == 'meta': truth.index.name = None for c in ['source_gids', 'gid_counts']: test[c] = test[c].astype(str) assert_frame_equal(truth, test, check_dtype=False, rtol=0.0001) else: assert np.allclose(truth, test, rtol=RTOL, atol=ATOL)
def _check_files(self): """Do a preflight check on input files""" if not os.path.exists(self._excl_fpath): raise FileNotFoundError('Could not find required exclusions file: ' '{}'.format(self._excl_fpath)) if not os.path.exists(self._h5_fpath): raise FileNotFoundError('Could not find required h5 file: ' '{}'.format(self._h5_fpath)) with h5py.File(self._excl_fpath, 'r') as f: if self._tm_dset not in f: raise FileInputError('Could not find techmap dataset "{}" ' 'in exclusions file: {}'.format( self._tm_dset, self._excl_fpath)) with Resource(self._h5_fpath) as f: for dset in self._agg_dsets: if dset not in f: raise FileInputError('Could not find provided dataset "{}"' ' in h5 file: {}'.format( dset, self._h5_fpath))
def test_write_to_file(): """Test rep profiles with file write.""" sites = np.arange(100) zeros = np.zeros((100, )) regions = (['r0'] * 7) + (['r1'] * 33) + (['r2'] * 60) timezone = np.random.choice([-4, -5, -6, -7], 100) rev_summary = pd.DataFrame({ 'gen_gids': sites, 'res_gids': sites, 'res_class': zeros, 'region': regions, 'timezone': timezone }) fout = os.path.join(TESTDATADIR, 'sc_out/temp_rep_profiles.h5') p1, m1, _ = RepProfiles.run(GEN_FPATH, rev_summary, 'region', fout=fout, n_profiles=3, weight=None) with Resource(fout) as res: disk_profiles = res['rep_profiles_0'] disk_meta = res.meta assert 'rep_profiles_2' in res.datasets assert not np.array_equal(res['rep_profiles_0'], res['rep_profiles_1']) assert np.allclose(p1[0], disk_profiles) assert len(disk_meta) == 3 for i in m1.index: v1 = json.loads(m1.loc[i, 'rep_gen_gid']) v2 = json.loads(disk_meta.loc[i, 'rep_gen_gid']) assert v1 == v2 if PURGE_OUT: os.remove(fout)
def test_file_options(): """Test rep profiles with file write.""" sites = np.arange(100) zeros = np.zeros((100, )) regions = (['r0'] * 7) + (['r1'] * 33) + (['r2'] * 60) timezone = np.random.choice([-4, -5, -6, -7], 100) rev_summary = pd.DataFrame({ 'gen_gids': sites, 'res_gids': sites, 'res_class': zeros, 'region': regions, 'timezone': timezone }) fout = os.path.join(TESTDATADIR, 'sc_out/temp_rep_profiles.h5') p1, _, _ = RepProfiles.run(GEN_FPATH, rev_summary, 'region', fout=fout, n_profiles=3, save_rev_summary=False, scaled_precision=True, weight=None) with Resource(fout) as res: dtype = res.get_dset_properties('rep_profiles_0')[1] attrs = res.get_attrs('rep_profiles_0') disk_profiles = res['rep_profiles_0'] disk_dsets = res.datasets assert np.issubdtype(dtype, np.integer) assert attrs['scale_factor'] == 1000 assert np.allclose(p1[0], disk_profiles) assert 'rev_summary' not in disk_dsets if PURGE_OUT: os.remove(fout)
def distance_upper_bound(self): """Get the upper bound on NN distance between excl and res points. Returns ------- distance_upper_bound : float Estimate of the upper bound distance based on the distance between resource points. Calculated as half of the diagonal between closest resource points, with an extra 5% margin. """ if self._distance_upper_bound is None: with Resource(self._res_fpath, str_decode=False) as res: lats = res.get_meta_arr('latitude') dists = np.abs(lats - np.roll(lats, 1)) dists = dists[(dists != 0)] self._distance_upper_bound = 1.05 * (2**0.5) * (dists.min() / 2) logger.info('Distance upper bound was infered to be: {}'.format( self._distance_upper_bound)) return self._distance_upper_bound
class ExclusionLayers: """ Handler of .h5 file and techmap for Exclusion Layers """ def __init__(self, h5_file, hsds=False): """ Parameters ---------- h5_file : str .h5 file containing exclusion layers and techmap hsds : bool Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS behind HSDS """ self.h5_file = h5_file self._h5 = Resource(h5_file, hsds=hsds) self._iarr = None def __repr__(self): msg = "{} for {}".format(self.__class__.__name__, self.h5_file) return msg def __enter__(self): return self def __exit__(self, type, value, traceback): self.close() if type is not None: raise def __len__(self): return len(self.layers) def __getitem__(self, keys): ds, ds_slice = parse_keys(keys) if ds.lower().startswith('lat'): out = self._get_latitude(*ds_slice) elif ds.lower().startswith('lon'): out = self._get_longitude(*ds_slice) else: out = self._get_layer(ds, *ds_slice) return out def close(self): """ Close h5 instance """ self._h5.close() @property def h5(self): """ Open h5py File instance. Returns ------- h5 : rex.Resource """ return self._h5 @property def iarr(self): """Get an array of 1D index values for the flattened h5 excl extent. Returns ------- iarr : np.ndarray Uint array with same shape as exclusion extent, representing the 1D index values if the geotiff extent was flattened (with default flatten order 'C') """ if self._iarr is None: N = self.shape[0] * self.shape[1] self._iarr = np.arange(N, dtype=np.uint32) self._iarr = self._iarr.reshape(self.shape) return self._iarr @property def profile(self): """ GeoTiff profile for exclusions Returns ------- profile : dict """ return json.loads(self.h5.attrs['profile']) @property def crs(self): """ GeoTiff projection crs Returns ------- str """ return self.profile['crs'] @property def pixel_area(self): """Get pixel area in km2 from the transform profile of the excl file. Returns ------- area : float Exclusion pixel area in km2. Will return None if the appropriate transform attribute is not found. """ area = None if 'transform' in self.profile: transform = self.profile['transform'] area = np.abs(transform[0] * transform[4]) area /= 1000 ** 2 return area @property def layers(self): """ Available exclusions layers Returns ------- layers : list """ layers = self.h5.datasets return layers @property def shape(self): """ Exclusion shape (latitude, longitude) Returns ------- shape : tuple """ shape = self.h5.attrs.get('shape', None) if shape is None: shape = self.h5['latitude'].shape return tuple(shape) @property def chunks(self): """ Exclusion layers chunks default chunk size Returns ------- chunks : tuple | None Chunk size of exclusion layers """ chunks = self.h5.attrs.get('chunks', None) if chunks is None: chunks = self.h5['latitude'].chunks if isinstance(chunks, dict): chunks = tuple(chunks.get('dims', None)) return chunks @property def latitude(self): """ Latitude coordinates array Returns ------- ndarray """ return self['latitude'] @property def longitude(self): """ Longitude coordinates array Returns ------- ndarray """ return self['longitude'] def get_layer_profile(self, layer): """ Get profile for a specific exclusion layer Parameters ---------- layer : str Layer to get profile for Returns ------- profile : dict | None GeoTiff profile for single exclusion layer """ profile = self.h5.get_attrs(dset=layer).get('profile', None) if profile is not None: profile = json.loads(profile) return profile def get_layer_crs(self, layer): """ Get crs for a specific exclusion layer Parameters ---------- layer : str Layer to get profile for Returns ------- crs : str | None GeoTiff projection crs """ profile = self.get_layer_profile(layer) if profile is not None: crs = profile['crs'] else: crs = None return crs def get_layer_values(self, layer): """ Get values for given layer in Geotiff format (bands, y, x) Parameters ---------- layer : str Layer to get values for Returns ------- values : ndarray GeoTiff values for single exclusion layer """ values = self.h5[layer] return values def get_layer_description(self, layer): """ Get description for given layer Parameters ---------- layer : str Layer to get description for Returns ------- description : str Description of layer """ description = self.h5.get_attrs(dset=layer).get('description', None) return description def get_nodata_value(self, layer): """ Get the nodata value for a given layer Parameters ---------- layer : str Layer to get nodata value for Returns ------- nodata : int | float | None nodata value for layer or None if not found """ profile = self.get_layer_profile(layer) nodata = profile.get('nodata', None) return nodata def _get_latitude(self, *ds_slice): """ Extract latitude coordinates Parameters ---------- ds_slice : tuple of int | list | slice Pandas slicing describing which sites and columns to extract Returns ------- lat : ndarray Latitude coordinates """ if 'latitude' not in self.h5: msg = ('"latitude" is missing from {}' .format(self.h5_file)) logger.error(msg) raise HandlerKeyError(msg) ds_slice = ('latitude', ) + ds_slice lat = self.h5[ds_slice] return lat def _get_longitude(self, *ds_slice): """ Extract longitude coordinates Parameters ---------- ds_slice : tuple of int | list | slice Pandas slicing describing which sites and columns to extract Returns ------- lon : ndarray Longitude coordinates """ if 'longitude' not in self.h5: msg = ('"longitude" is missing from {}' .format(self.h5_file)) logger.error(msg) raise HandlerKeyError(msg) ds_slice = ('longitude', ) + ds_slice lon = self.h5[ds_slice] return lon def _get_layer(self, layer_name, *ds_slice): """ Extract data from given dataset Parameters ---------- layer_name : str Exclusion layer to extract ds_slice : tuple of int | list | slice tuple describing slice of layer array to extract Returns ------- layer_data : ndarray Array of exclusion data """ if layer_name not in self.layers: msg = ('{} not in available layers: {}' .format(layer_name, self.layers)) logger.error(msg) raise HandlerKeyError(msg) shape = self.h5.get_dset_properties(layer_name)[0] if len(shape) == 3: ds_slice = (layer_name, 0) + ds_slice else: ds_slice = (layer_name, ) + ds_slice layer_data = self.h5[ds_slice] return layer_data
def _check_dset_properties(self, dset_name): """ Check to ensure dataset is in both domains and extract dataset attributes Parameters ---------- dset_name : str Dataset to check Returns ------- attrs : dict Dataset attributes {k: v} shape : tuple Dataset shape dtype : str | np.dtype Dataset dtype chunks : tuple | None Dataset chunk size """ attrs = {} shape = None dtype = None chunks = None for h5_path in self.source_h5: with Resource(h5_path) as f: if dset_name in f: dset_attrs = f.get_attrs(dset=dset_name) dset_shape, dset_dtype, dset_chunks = \ f.get_dset_properties(dset_name) else: msg = '{} not in {}'.format(dset_name, h5_path) logger.error(msg) raise ValueError(msg) attrs.update(dset_attrs) if shape is None: shape = list(dset_shape) chunks = dset_chunks dtype = dset_dtype else: if dset_chunks != chunks: msg = ("{} chunks ({} != {}) do not match between source " "files!".format(dset_name, chunks, dset_chunks)) logger.error(msg) raise RuntimeError(msg) if dset_dtype != dtype: msg = ("{} dtypes ({} != {}) do not match between source " "files!".format(dset_name, dtype, dset_dtype)) logger.error(msg) raise RuntimeError(msg) for i, s in enumerate(dset_shape): # pylint: disable=unsubscriptable-object if i != self._axis and s != shape[i]: msg = ("{} shape ({} != {}) does not match between " "source files!".format(dset_name, dset_shape, shape)) logger.error(msg) raise RuntimeError(msg) if self._axis <= len(shape): # pylint: disable=unsupported-assignment-operation shape[self._axis] += dset_shape[self._axis] return attrs, tuple(shape), dtype, chunks
def map_resource_gids(cls, gids, excl_fpath, res_fpath, distance_upper_bound, map_chunk, margin=0.1): """Map exclusion gids to the resource meta. Parameters ---------- gids : np.ndarray Supply curve gids with tech exclusion points to map to the resource meta points. excl_fpath : str Filepath to exclusions h5 (tech layer). dset will be created in excl_fpath. res_fpath : str Filepath to .h5 resource file that we're mapping to. distance_upper_bound : float | None Upper boundary distance for KNN lookup between exclusion points and resource points. map_chunk : int Calculation chunk used for the tech mapping calc. margin : float Margin when reducing the resource lat/lon. Returns ------- ind : list List of arrays of index values from the NN. List entries correspond to input gids. coords : np.ndarray List of arrays of the un-projected latitude, longitude array of tech exclusion points. List entries correspond to input gids. """ logger.debug( 'Getting tech layer coordinates for chunks {} through {}'.format( gids[0], gids[-1])) ind_out = [] coord_labels = ['latitude', 'longitude'] with SupplyCurveExtent(excl_fpath, resolution=map_chunk) as sc: coords_out, lat_range, lon_range = cls._unpack_coords( gids, sc, excl_fpath, coord_labels=coord_labels) with Resource(res_fpath, str_decode=False) as res: res_meta = np.vstack((res.get_meta_arr(coord_labels[0]), res.get_meta_arr(coord_labels[1]))).T mask = ((res_meta[:, 0] > lat_range[0] - margin) & (res_meta[:, 0] < lat_range[1] + margin) & (res_meta[:, 1] > lon_range[0] - margin) & (res_meta[:, 1] < lon_range[1] + margin)) # pylint: disable-msg=C0121 mask_ind = np.where(mask == True)[0] # noqa: E712 if np.sum(mask) > 0: # pylint: disable=not-callable res_tree = cKDTree(res_meta[mask, :]) logger.debug( 'Running tech mapping for chunks {} through {}'.format( gids[0], gids[-1])) for i, _ in enumerate(gids): dist, ind = res_tree.query(coords_out[i]) ind = mask_ind[ind] ind[(dist > distance_upper_bound)] = -1 ind_out.append(ind) else: logger.debug('No close res points for chunks {} through {}'.format( gids[0], gids[-1])) for _ in gids: ind_out.append(-1) return ind_out, coords_out