Esempio n. 1
0
    def __init__(self, excl_fpath, h5_fpath, excl_dict=None,
                 area_filter_kernel='queen', min_area=None,
                 check_excl_layers=False):
        """
        Parameters
        ----------
        excl_fpath : str
            Filepath to exclusions h5 with techmap dataset.
        h5_fpath : str
            Filepath to .h5 file to be aggregated
        excl_dict : dict | None
            Dictionary of exclusion LayerMask arugments {layer: {kwarg: value}}
        area_filter_kernel : str
            Contiguous area filter method to use on final exclusions mask
        min_area : float | None
            Minimum required contiguous area filter in sq-km
        check_excl_layers : bool
            Run a pre-flight check on each exclusion layer to ensure they
            contain un-excluded values
        """
        super().__init__(excl_fpath, excl_dict=excl_dict,
                         area_filter_kernel=area_filter_kernel,
                         min_area=min_area,
                         check_excl_layers=check_excl_layers)

        self._h5 = Resource(h5_fpath)
Esempio n. 2
0
class AggFileHandler(AbstractAggFileHandler):
    """
    Framework to handle aggregation file context manager:
    - exclusions .h5 file
    - h5 file to be aggregated
    """
    def __init__(self,
                 excl_fpath,
                 h5_fpath,
                 excl_dict=None,
                 area_filter_kernel='queen',
                 min_area=None,
                 check_excl_layers=False):
        """
        Parameters
        ----------
        excl_fpath : str
            Filepath to exclusions h5 with techmap dataset.
        h5_fpath : str
            Filepath to .h5 file to be aggregated
        excl_dict : dict | None
            Dictionary of exclusion LayerMask arugments {layer: {kwarg: value}}
        area_filter_kernel : str
            Contiguous area filter method to use on final exclusions mask
        min_area : float | None
            Minimum required contiguous area filter in sq-km
        check_excl_layers : bool
            Run a pre-flight check on each exclusion layer to ensure they
            contain un-excluded values
        """
        super().__init__(excl_fpath,
                         excl_dict=excl_dict,
                         area_filter_kernel=area_filter_kernel,
                         min_area=min_area,
                         check_excl_layers=check_excl_layers)

        self._h5 = Resource(h5_fpath)

    @property
    def h5(self):
        """
        Get the h5 file handler object.

        Returns
        -------
        _h5 : Outputs
            reV h5 outputs handler object.
        """
        return self._h5

    def close(self):
        """Close all file handlers."""
        self._excl.close()
        self._h5.close()
Esempio n. 3
0
    def __init__(self, h5_file, hsds=False):
        """
        Parameters
        ----------
        h5_file : str
            .h5 file containing exclusion layers and techmap
        hsds : bool
            Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS
            behind HSDS
        """
        self.h5_file = h5_file
        self._h5 = Resource(h5_file, hsds=hsds)

        self._iarr = None
Esempio n. 4
0
    def _preflight_check(self):
        """
        Check time_index and coordinates accross files
        """
        time_index = None
        lat_lon = None

        bad_files = []
        for file in self.h5_files:
            with Resource(file) as f:
                if 'time_index' in f:
                    ti = f.time_index
                    if time_index is None:
                        time_index = ti.copy()
                    else:
                        check = time_index.equals(ti)
                        if not check:
                            bad_files.append(file)

                ll = f.lat_lon
                if lat_lon is None:
                    lat_lon = ll.copy()
                else:
                    check = np.allclose(lat_lon, ll)
                    if not check:
                        bad_files.append(file)

        bad_files = list(set(bad_files))
        if bad_files:
            msg = ("The following files' coordinates and time-index do not "
                   "match:\n{}".format(bad_files))
            raise ResourceRuntimeError(msg)
Esempio n. 5
0
    def _parse_gen_index(h5_fpath):
        """Parse gen outputs for an array of generation gids corresponding to
        the resource gids.

        Parameters
        ----------
        h5_fpath : str
            Filepath to reV compliant .h5 file

        Returns
        -------
        gen_index : np.ndarray
            Array of generation gids with array index equal to resource gid.
            Array value is -1 if the resource index was not used in the
            generation run.
        """

        with Resource(h5_fpath) as f:
            gen_index = f.meta

        if 'gid' in gen_index:
            gen_index = gen_index.rename(columns={'gid': 'res_gids'})
            gen_index['gen_gids'] = gen_index.index
            gen_index = gen_index[['res_gids', 'gen_gids']]
            gen_index = gen_index.set_index(keys='res_gids')
            gen_index = \
                gen_index.reindex(range(int(gen_index.index.max() + 1)))
            gen_index = gen_index['gen_gids'].values
            gen_index[np.isnan(gen_index)] = -1
            gen_index = gen_index.astype(np.int32)
        else:
            gen_index = None

        return gen_index
Esempio n. 6
0
def test_agg_profile():
    """Test aggregation of weighted meanoid profile for each SC point."""

    gen_fpath = os.path.join(TESTDATADIR, 'offshore/ri_offshore_baseline.h5')

    rev_sc_fpath = os.path.join(TESTDATADIR, 'sc_out/ri_wind_farm_sc.csv')
    rev_summary = pd.read_csv(rev_sc_fpath, index_col=0).iloc[0:2]

    profiles = AggregatedRepProfiles.run(gen_fpath,
                                         rev_summary,
                                         cf_dset='cf_profile',
                                         scaled_precision=False,
                                         max_workers=None)[0]

    for index in rev_summary.index:

        gen_gids = json.loads(rev_summary.loc[index, 'gen_gids'])
        weights = np.array(json.loads(rev_summary.loc[index, 'gid_counts']))

        with Resource(gen_fpath) as res:
            raw_profiles = res['cf_profile', :, gen_gids]
            last = res['cf_profile', :, gen_gids[-1]]

        assert np.allclose(raw_profiles[:, -1], last)

        truth = raw_profiles * weights
        assert len(truth) == 8760
        truth = truth.sum(axis=1)
        assert len(truth) == 8760
        truth = truth / weights.sum()

        assert np.allclose(profiles[0][:, index], truth)
Esempio n. 7
0
    def _combine_dataset(self, dset_name, dset_attrs, process_size=None):
        """
        Load data from ds_in to ds_out

        Parameters
        ----------
        dset_name : str
            Name of dataset to initialize
        dset_attrs : dict
            Dictionary of dataset and properties and attributes
        process_size : int, optional
            Ammount of data to be transfered at a time, by default None
        """
        logger.info('Combining {}'.format(dset_name))
        ds_comb = self._init_dataset(dset_name, dset_attrs)
        start = 0
        for h5_path in self.source_h5:
            logger.debug('Transfering data from {}'.format(
                os.path.basename(h5_path)))
            with Resource(h5_path) as f:
                ds_in = f.h5[dset_name]
                self._load_data(ds_in,
                                ds_comb,
                                start,
                                process_size=process_size)
                start += ds_in.shape[self._axis]
Esempio n. 8
0
    def get_sites_per_worker(res_file, default=100):
        """Get the nominal sites per worker (x-chunk size) for a given file.

        This is based on the concept that it is most efficient for one core to
        perform one read on one chunk of resource data, such that chunks will
        not have to be read into memory twice and no sites will be read
        redundantly.

        Parameters
        ----------
        res_file : str
            Filepath to single resource file, multi-h5 directory,
            or /h5_dir/prefix*suffix
        default : int
            Sites to be analyzed on a single core if the chunk size cannot be
            determined from res_file.

        Returns
        -------
        sites_per_worker : int
            Nominal sites to be analyzed per worker. This is set to the x-axis
            chunk size for windspeed and dni datasets for the WTK and NSRDB
            data, respectively.
        """
        if not res_file or not os.path.isfile(res_file):
            return default

        with Resource(res_file) as res:
            if 'wtk' in res_file.lower():
                for dset in res.datasets:
                    if 'speed' in dset:
                        # take nominal WTK chunks from windspeed
                        _, _, chunks = res.get_dset_properties(dset)
                        break
            elif 'nsrdb' in res_file.lower():
                # take nominal NSRDB chunks from dni
                _, _, chunks = res.get_dset_properties('dni')
            else:
                warn('Could not infer dataset chunk size as the resource type '
                     'could not be determined from the filename: {}'
                     .format(res_file))
                chunks = None

        if chunks is None:
            # if chunks not set, go to default
            sites_per_worker = default
            logger.debug('Sites per worker being set to {} (default) based on '
                         'no set chunk size in {}.'
                         .format(sites_per_worker, res_file))
        else:
            sites_per_worker = chunks[1]
            logger.debug('Sites per worker being set to {} based on chunk '
                         'size of {}.'.format(sites_per_worker, res_file))

        return sites_per_worker
Esempio n. 9
0
def test_meanoid():
    """Test the simple meanoid method"""
    sites = np.arange(100)
    rev_summary = pd.DataFrame({'gen_gids': sites, 'res_gids': sites})
    r = RegionRepProfile(GEN_FPATH, rev_summary)

    meanoid = RepresentativeMethods.meanoid(r.source_profiles)

    with Resource(GEN_FPATH) as res:
        truth_profiles = res['cf_profile', :, sites]
    truth = truth_profiles.mean(axis=1).reshape(meanoid.shape)
    assert np.allclose(meanoid, truth)
Esempio n. 10
0
    def _combine_coordinates(self):
        """
        combine coordinates
        """
        logger.info('Combining coordinates')
        if 'coordinates' in self.datasets:
            with Resource(self.source_h5[0]) as f:
                chunks = f.get_dset_properties('coordinates')[-1]
                attrs = f.get_attrs('coordinates')
        else:
            chunks = None
            attrs = {}

        if self._axis == 1:
            coords = None
            for h5_path in self.source_h5:
                with Resource(h5_path) as f:
                    c = f.lat_lon
                    if coords is None:
                        coords = c
                    else:
                        coords = np.append(coords, c, axis=0)
        else:
            with Resource(self.source_h5[0]) as f:
                coords = f.lat_lon

        logger.debug('Combined coordinates have:\n'
                     'shape: {}\n'
                     'dtype: {}\n'
                     'chunks: {}'.format(coords.shape, coords.dtype, chunks))
        ds = self._dst_h5.create_dataset('coordinates',
                                         shape=coords.shape,
                                         dtype=coords.dtype,
                                         chunks=chunks,
                                         data=coords)

        if attrs:
            for k, v in attrs.items():
                logger.debug("- Transfering attr {}: {}".format(k, v))
                ds.attrs[k] = v
Esempio n. 11
0
    def save_agg_to_h5(self, out_fpath, aggregation):
        """
        Save aggregated data to disc in .h5 format

        Parameters
        ----------
        out_fpath : str
            Output .h5 file path
        aggregation : dict
            Aggregated values for each aggregation dataset
        """
        agg_out = aggregation.copy()
        meta = agg_out.pop('meta')
        for c in meta.columns:
            try:
                meta[c] = pd.to_numeric(meta[c])
            except (ValueError, TypeError):
                pass

        dsets = []
        shapes = {}
        attrs = {}
        chunks = {}
        dtypes = {}
        time_index = None
        with Resource(self._h5_fpath) as f:
            for dset, data in agg_out.items():
                dsets.append(dset)
                shape = data.shape
                shapes[dset] = shape
                if len(data.shape) == 2:
                    if ('time_index' in f) and (shape[0] == f.shape[0]):
                        if time_index is None:
                            time_index = f.time_index

                attrs[dset] = f.get_attrs(dset=dset)
                _, dtype, chunk = f.get_dset_properties(dset)
                chunks[dset] = chunk
                dtypes[dset] = dtype

        Outputs.init_h5(out_fpath,
                        dsets,
                        shapes,
                        attrs,
                        chunks,
                        dtypes,
                        meta,
                        time_index=time_index)

        with Outputs(out_fpath, mode='a') as out:
            for dset, data in agg_out.items():
                out[dset] = data
Esempio n. 12
0
    def _combine_time_index(self):
        """
        Combine time_index
        """
        logger.info('Combining time_index')
        if self._axis == 0:
            time_index = None
            chunks = None
            attrs = {}
            for h5_path in self.source_h5:
                with Resource(h5_path) as f:
                    ti = f.h5['time_index'][...]
                    attrs.update(f.get_attrs('time_index'))
                    if time_index is None:
                        time_index = ti
                        chunks = f.get_dset_properties('time_index')[-1]
                    else:
                        time_index = np.append(time_index, ti)
        else:
            with Resource(self.source_h5[0]) as f:
                time_index = f.h5['time_index'][...]
                attrs = f.get_attrs('time_index')
                chunks = f.get_dset_properties('time_index')[-1]

        logger.debug('Combined time_index has:\n'
                     'shape: {}\n'
                     'dtype: {}\n'
                     'chunks: {}'.format(time_index.shape, time_index.dtype,
                                         chunks))
        ds = self._dst_h5.create_dataset('time_index',
                                         shape=time_index.shape,
                                         dtype=time_index.dtype,
                                         chunks=chunks,
                                         data=time_index)
        if attrs:
            for k, v in attrs.items():
                logger.debug("- Transfering attr {}: {}".format(k, v))
                ds.attrs[k] = v
Esempio n. 13
0
    def _transfer_global_attrs(self):
        """
        Transfer global attributes
        """
        global_attrs = {}
        for h5_path in self.source_h5:
            with Resource(h5_path) as f:
                global_attrs.update(f.get_attrs())

        if global_attrs:
            logger.info('Transfering global attributes')
            for k, v in global_attrs.items():
                logger.debug("- Transfering {}: {}".format(k, v))
                self._dst_h5.attrs[k] = v
Esempio n. 14
0
    def _combine_meta(self):
        """
        Combine meta
        """
        logger.info('Combining meta')
        if self._axis == 1:
            meta = None
            chunks = None
            attrs = {}
            for h5_path in self.source_h5:
                with Resource(h5_path) as f:
                    m = f.h5['meta'][...]
                    attrs.update(f.get_attrs('meta'))
                    if meta is None:
                        meta = m
                        chunks = f.get_dset_properties('meta')[-1]
                    else:
                        meta = np.append(meta, m)
        else:
            with Resource(self.source_h5[0]) as f:
                meta = f.h5['meta'][...]
                attrs = f.get_attrs('meta')
                chunks = f.get_dset_properties('meta')[-1]

        logger.debug('Combined meta has:\n'
                     'shape: {}\n'
                     'dtype: {}\n'
                     'chunks: {}'.format(meta.shape, meta.dtype, chunks))
        ds = self._dst_h5.create_dataset('meta',
                                         shape=meta.shape,
                                         dtype=meta.dtype,
                                         chunks=chunks,
                                         data=meta)
        if attrs:
            for k, v in attrs.items():
                logger.debug("- Transfering attr {}: {}".format(k, v))
                ds.attrs[k] = v
Esempio n. 15
0
def compute_mean_wind_dirs(res_path, dset, gids, fracs):
    """
    Compute mean wind directions for given dset and gids
    """
    with Resource(res_path) as f:
        wind_dirs = np.radians(f[dset, :, gids])

    sin = np.mean(np.sin(wind_dirs) * fracs, axis=1)
    cos = np.mean(np.cos(wind_dirs) * fracs, axis=1)
    mean_wind_dirs = np.degrees(np.arctan2(sin, cos))

    mask = mean_wind_dirs < 0
    mean_wind_dirs[mask] += 360

    return mean_wind_dirs
def test_to_records_array():
    """
    Test converstion of pandas DataFrame to numpy records array for .h5
    ingestion
    """
    path = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5')
    with Resource(path) as f:
        meta = f.meta
        truth = f.h5['meta'][...]

    test = to_records_array(meta)

    for c in truth.dtype.names:
        msg = "{} did not get converted propertly!".format(c)
        assert np.all(test[c] == truth[c]), msg
Esempio n. 17
0
    def datasets(self):
        """
        Datasets to combine

        Returns
        -------
        list
        """
        if self._datasets is None:
            datasets = []
            for h5_path in self._source_h5:
                with Resource(h5_path) as f:
                    datasets.append(f.datasets)

            self._datasets = list(set(datasets[0]).intersection(*datasets[1:]))

        return self._datasets
Esempio n. 18
0
    def _parse_sites(points, res_file=None):
        """Parse project points from list or slice

        Parameters
        ----------
        points : str | pd.DataFrame | slice | list
            Slice specifying project points, string pointing to a project
            points csv, or a dataframe containing the effective csv contents.
        res_file : str | NoneType
            Optional resource file to find maximum length of project points if
            points slice stop is None.

        Returns
        -------
        df : pd.DataFrame
            DataFrame mapping sites (gids) to SAM technology (config)
        """
        df = pd.DataFrame(columns=['gid', 'config'])
        if isinstance(points, (list, tuple)):
            # explicit site list, set directly
            df['gid'] = points
        elif isinstance(points, slice):
            stop = points.stop
            if stop is None:
                if res_file is None:
                    raise ValueError('Must supply a resource file if '
                                     'points is a slice of type '
                                     ' slice(*, None, *)')

                multi_h5_res, _ = check_res_file(res_file)
                if multi_h5_res:
                    stop = MultiFileResource(res_file).shape[1]
                else:
                    stop = Resource(res_file).shape[1]

            df['gid'] = list(range(*points.indices(stop)))
        else:
            raise TypeError('Project Points sites needs to be set as a list, '
                            'tuple, or slice, but was set as: {}'.format(
                                type(points)))

        df['config'] = None

        return df
Esempio n. 19
0
def test_sc_points():
    """Test rep profiles for each SC point."""
    sites = np.arange(10)
    timezone = np.random.choice([-4, -5, -6, -7], 10)
    rev_summary = pd.DataFrame({
        'sc_gid': sites,
        'gen_gids': sites,
        'res_gids': sites,
        'timezone': timezone
    })

    p1 = RepProfiles.run(GEN_FPATH,
                         rev_summary,
                         'sc_gid',
                         weight=None,
                         max_workers=1)[0]

    with Resource(GEN_FPATH) as res:
        truth = res['cf_profile', :, slice(0, 10)]

    assert np.allclose(p1[0], truth)
Esempio n. 20
0
def check_agg(agg_out, baseline_h5):
    """
    Compare agg_out to baseline data

    Parameters
    ----------
    agg_out : dict
        Aggregation data
    baseline_h5 : str
        h5 file containing baseline data
    """
    with Resource(baseline_h5) as f:
        for dset, test in agg_out.items():
            truth = f[dset]
            if dset == 'meta':
                truth.index.name = None
                for c in ['source_gids', 'gid_counts']:
                    test[c] = test[c].astype(str)

                assert_frame_equal(truth, test, check_dtype=False, rtol=0.0001)
            else:
                assert np.allclose(truth, test, rtol=RTOL, atol=ATOL)
Esempio n. 21
0
    def _check_files(self):
        """Do a preflight check on input files"""

        if not os.path.exists(self._excl_fpath):
            raise FileNotFoundError('Could not find required exclusions file: '
                                    '{}'.format(self._excl_fpath))

        if not os.path.exists(self._h5_fpath):
            raise FileNotFoundError('Could not find required h5 file: '
                                    '{}'.format(self._h5_fpath))

        with h5py.File(self._excl_fpath, 'r') as f:
            if self._tm_dset not in f:
                raise FileInputError('Could not find techmap dataset "{}" '
                                     'in exclusions file: {}'.format(
                                         self._tm_dset, self._excl_fpath))

        with Resource(self._h5_fpath) as f:
            for dset in self._agg_dsets:
                if dset not in f:
                    raise FileInputError('Could not find provided dataset "{}"'
                                         ' in h5 file: {}'.format(
                                             dset, self._h5_fpath))
Esempio n. 22
0
def test_write_to_file():
    """Test rep profiles with file write."""

    sites = np.arange(100)
    zeros = np.zeros((100, ))
    regions = (['r0'] * 7) + (['r1'] * 33) + (['r2'] * 60)
    timezone = np.random.choice([-4, -5, -6, -7], 100)
    rev_summary = pd.DataFrame({
        'gen_gids': sites,
        'res_gids': sites,
        'res_class': zeros,
        'region': regions,
        'timezone': timezone
    })
    fout = os.path.join(TESTDATADIR, 'sc_out/temp_rep_profiles.h5')
    p1, m1, _ = RepProfiles.run(GEN_FPATH,
                                rev_summary,
                                'region',
                                fout=fout,
                                n_profiles=3,
                                weight=None)
    with Resource(fout) as res:
        disk_profiles = res['rep_profiles_0']
        disk_meta = res.meta
        assert 'rep_profiles_2' in res.datasets
        assert not np.array_equal(res['rep_profiles_0'], res['rep_profiles_1'])

    assert np.allclose(p1[0], disk_profiles)
    assert len(disk_meta) == 3

    for i in m1.index:
        v1 = json.loads(m1.loc[i, 'rep_gen_gid'])
        v2 = json.loads(disk_meta.loc[i, 'rep_gen_gid'])
        assert v1 == v2

    if PURGE_OUT:
        os.remove(fout)
Esempio n. 23
0
def test_file_options():
    """Test rep profiles with file write."""

    sites = np.arange(100)
    zeros = np.zeros((100, ))
    regions = (['r0'] * 7) + (['r1'] * 33) + (['r2'] * 60)
    timezone = np.random.choice([-4, -5, -6, -7], 100)
    rev_summary = pd.DataFrame({
        'gen_gids': sites,
        'res_gids': sites,
        'res_class': zeros,
        'region': regions,
        'timezone': timezone
    })
    fout = os.path.join(TESTDATADIR, 'sc_out/temp_rep_profiles.h5')
    p1, _, _ = RepProfiles.run(GEN_FPATH,
                               rev_summary,
                               'region',
                               fout=fout,
                               n_profiles=3,
                               save_rev_summary=False,
                               scaled_precision=True,
                               weight=None)
    with Resource(fout) as res:
        dtype = res.get_dset_properties('rep_profiles_0')[1]
        attrs = res.get_attrs('rep_profiles_0')
        disk_profiles = res['rep_profiles_0']
        disk_dsets = res.datasets

    assert np.issubdtype(dtype, np.integer)
    assert attrs['scale_factor'] == 1000
    assert np.allclose(p1[0], disk_profiles)
    assert 'rev_summary' not in disk_dsets

    if PURGE_OUT:
        os.remove(fout)
Esempio n. 24
0
    def distance_upper_bound(self):
        """Get the upper bound on NN distance between excl and res points.

        Returns
        -------
        distance_upper_bound : float
            Estimate of the upper bound distance based on the distance between
            resource points. Calculated as half of the diagonal between
            closest resource points, with an extra 5% margin.
        """

        if self._distance_upper_bound is None:

            with Resource(self._res_fpath, str_decode=False) as res:
                lats = res.get_meta_arr('latitude')

            dists = np.abs(lats - np.roll(lats, 1))
            dists = dists[(dists != 0)]
            self._distance_upper_bound = 1.05 * (2**0.5) * (dists.min() / 2)

            logger.info('Distance upper bound was infered to be: {}'.format(
                self._distance_upper_bound))

        return self._distance_upper_bound
Esempio n. 25
0
class ExclusionLayers:
    """
    Handler of .h5 file and techmap for Exclusion Layers
    """
    def __init__(self, h5_file, hsds=False):
        """
        Parameters
        ----------
        h5_file : str
            .h5 file containing exclusion layers and techmap
        hsds : bool
            Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS
            behind HSDS
        """
        self.h5_file = h5_file
        self._h5 = Resource(h5_file, hsds=hsds)

        self._iarr = None

    def __repr__(self):
        msg = "{} for {}".format(self.__class__.__name__, self.h5_file)

        return msg

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.close()

        if type is not None:
            raise

    def __len__(self):
        return len(self.layers)

    def __getitem__(self, keys):
        ds, ds_slice = parse_keys(keys)

        if ds.lower().startswith('lat'):
            out = self._get_latitude(*ds_slice)
        elif ds.lower().startswith('lon'):
            out = self._get_longitude(*ds_slice)
        else:
            out = self._get_layer(ds, *ds_slice)

        return out

    def close(self):
        """
        Close h5 instance
        """
        self._h5.close()

    @property
    def h5(self):
        """
        Open h5py File instance.

        Returns
        -------
        h5 : rex.Resource
        """
        return self._h5

    @property
    def iarr(self):
        """Get an array of 1D index values for the flattened h5 excl extent.

        Returns
        -------
        iarr : np.ndarray
            Uint array with same shape as exclusion extent, representing the 1D
            index values if the geotiff extent was flattened
            (with default flatten order 'C')
        """
        if self._iarr is None:
            N = self.shape[0] * self.shape[1]
            self._iarr = np.arange(N, dtype=np.uint32)
            self._iarr = self._iarr.reshape(self.shape)

        return self._iarr

    @property
    def profile(self):
        """
        GeoTiff profile for exclusions

        Returns
        -------
        profile : dict
        """
        return json.loads(self.h5.attrs['profile'])

    @property
    def crs(self):
        """
        GeoTiff projection crs

        Returns
        -------
        str
        """
        return self.profile['crs']

    @property
    def pixel_area(self):
        """Get pixel area in km2 from the transform profile of the excl file.

        Returns
        -------
        area : float
            Exclusion pixel area in km2. Will return None if the
            appropriate transform attribute is not found.
        """

        area = None
        if 'transform' in self.profile:
            transform = self.profile['transform']
            area = np.abs(transform[0] * transform[4])
            area /= 1000 ** 2

        return area

    @property
    def layers(self):
        """
        Available exclusions layers

        Returns
        -------
        layers : list
        """
        layers = self.h5.datasets

        return layers

    @property
    def shape(self):
        """
        Exclusion shape (latitude, longitude)

        Returns
        -------
        shape : tuple
        """
        shape = self.h5.attrs.get('shape', None)
        if shape is None:
            shape = self.h5['latitude'].shape

        return tuple(shape)

    @property
    def chunks(self):
        """
        Exclusion layers chunks default chunk size

        Returns
        -------
        chunks : tuple | None
            Chunk size of exclusion layers
        """
        chunks = self.h5.attrs.get('chunks', None)
        if chunks is None:
            chunks = self.h5['latitude'].chunks

        if isinstance(chunks, dict):
            chunks = tuple(chunks.get('dims', None))

        return chunks

    @property
    def latitude(self):
        """
        Latitude coordinates array

        Returns
        -------
        ndarray
        """
        return self['latitude']

    @property
    def longitude(self):
        """
        Longitude coordinates array

        Returns
        -------
        ndarray
        """
        return self['longitude']

    def get_layer_profile(self, layer):
        """
        Get profile for a specific exclusion layer

        Parameters
        ----------
        layer : str
            Layer to get profile for

        Returns
        -------
        profile : dict | None
            GeoTiff profile for single exclusion layer
        """
        profile = self.h5.get_attrs(dset=layer).get('profile', None)
        if profile is not None:
            profile = json.loads(profile)

        return profile

    def get_layer_crs(self, layer):
        """
        Get crs for a specific exclusion layer

        Parameters
        ----------
        layer : str
            Layer to get profile for

        Returns
        -------
        crs : str | None
            GeoTiff projection crs
        """
        profile = self.get_layer_profile(layer)
        if profile is not None:
            crs = profile['crs']
        else:
            crs = None

        return crs

    def get_layer_values(self, layer):
        """
        Get values for given layer in Geotiff format (bands, y, x)

        Parameters
        ----------
        layer : str
            Layer to get values for

        Returns
        -------
        values : ndarray
            GeoTiff values for single exclusion layer
        """
        values = self.h5[layer]

        return values

    def get_layer_description(self, layer):
        """
        Get description for given layer

        Parameters
        ----------
        layer : str
            Layer to get description for

        Returns
        -------
        description : str
            Description of layer
        """
        description = self.h5.get_attrs(dset=layer).get('description', None)

        return description

    def get_nodata_value(self, layer):
        """
        Get the nodata value for a given layer

        Parameters
        ----------
        layer : str
            Layer to get nodata value for

        Returns
        -------
        nodata : int | float | None
            nodata value for layer or None if not found
        """
        profile = self.get_layer_profile(layer)
        nodata = profile.get('nodata', None)

        return nodata

    def _get_latitude(self, *ds_slice):
        """
        Extract latitude coordinates

        Parameters
        ----------
        ds_slice : tuple of int | list | slice
            Pandas slicing describing which sites and columns to extract

        Returns
        -------
        lat : ndarray
            Latitude coordinates
        """
        if 'latitude' not in self.h5:
            msg = ('"latitude" is missing from {}'
                   .format(self.h5_file))
            logger.error(msg)
            raise HandlerKeyError(msg)

        ds_slice = ('latitude', ) + ds_slice

        lat = self.h5[ds_slice]

        return lat

    def _get_longitude(self, *ds_slice):
        """
        Extract longitude coordinates

        Parameters
        ----------
        ds_slice : tuple of int | list | slice
            Pandas slicing describing which sites and columns to extract

        Returns
        -------
        lon : ndarray
            Longitude coordinates
        """
        if 'longitude' not in self.h5:
            msg = ('"longitude" is missing from {}'
                   .format(self.h5_file))
            logger.error(msg)
            raise HandlerKeyError(msg)

        ds_slice = ('longitude', ) + ds_slice

        lon = self.h5[ds_slice]

        return lon

    def _get_layer(self, layer_name, *ds_slice):
        """
        Extract data from given dataset

        Parameters
        ----------
        layer_name : str
            Exclusion layer to extract
        ds_slice : tuple of int | list | slice
            tuple describing slice of layer array to extract

        Returns
        -------
        layer_data : ndarray
            Array of exclusion data
        """
        if layer_name not in self.layers:
            msg = ('{} not in available layers: {}'
                   .format(layer_name, self.layers))
            logger.error(msg)
            raise HandlerKeyError(msg)

        shape = self.h5.get_dset_properties(layer_name)[0]
        if len(shape) == 3:
            ds_slice = (layer_name, 0) + ds_slice
        else:
            ds_slice = (layer_name, ) + ds_slice

        layer_data = self.h5[ds_slice]

        return layer_data
Esempio n. 26
0
    def _check_dset_properties(self, dset_name):
        """
        Check to ensure dataset is in both domains and extract
        dataset attributes

        Parameters
        ----------
        dset_name : str
            Dataset to check

        Returns
        -------
        attrs : dict
            Dataset attributes {k: v}
        shape : tuple
            Dataset shape
        dtype : str | np.dtype
            Dataset dtype
        chunks : tuple | None
            Dataset chunk size
        """
        attrs = {}
        shape = None
        dtype = None
        chunks = None
        for h5_path in self.source_h5:
            with Resource(h5_path) as f:
                if dset_name in f:
                    dset_attrs = f.get_attrs(dset=dset_name)
                    dset_shape, dset_dtype, dset_chunks = \
                        f.get_dset_properties(dset_name)
                else:
                    msg = '{} not in {}'.format(dset_name, h5_path)
                    logger.error(msg)
                    raise ValueError(msg)

            attrs.update(dset_attrs)

            if shape is None:
                shape = list(dset_shape)
                chunks = dset_chunks
                dtype = dset_dtype
            else:
                if dset_chunks != chunks:
                    msg = ("{} chunks ({} != {}) do not match between source "
                           "files!".format(dset_name, chunks, dset_chunks))
                    logger.error(msg)
                    raise RuntimeError(msg)

                if dset_dtype != dtype:
                    msg = ("{} dtypes ({} != {}) do not match between source "
                           "files!".format(dset_name, dtype, dset_dtype))
                    logger.error(msg)
                    raise RuntimeError(msg)

                for i, s in enumerate(dset_shape):
                    # pylint: disable=unsubscriptable-object
                    if i != self._axis and s != shape[i]:
                        msg = ("{} shape ({} != {}) does not match between "
                               "source files!".format(dset_name, dset_shape,
                                                      shape))
                        logger.error(msg)
                        raise RuntimeError(msg)

                if self._axis <= len(shape):
                    # pylint: disable=unsupported-assignment-operation
                    shape[self._axis] += dset_shape[self._axis]

        return attrs, tuple(shape), dtype, chunks
Esempio n. 27
0
    def map_resource_gids(cls,
                          gids,
                          excl_fpath,
                          res_fpath,
                          distance_upper_bound,
                          map_chunk,
                          margin=0.1):
        """Map exclusion gids to the resource meta.

        Parameters
        ----------
        gids : np.ndarray
            Supply curve gids with tech exclusion points to map to the
            resource meta points.
        excl_fpath : str
            Filepath to exclusions h5 (tech layer). dset will be
            created in excl_fpath.
        res_fpath : str
            Filepath to .h5 resource file that we're mapping to.
        distance_upper_bound : float | None
            Upper boundary distance for KNN lookup between exclusion points and
            resource points.
        map_chunk : int
            Calculation chunk used for the tech mapping calc.
        margin : float
            Margin when reducing the resource lat/lon.

        Returns
        -------
        ind : list
            List of arrays of index values from the NN. List entries correspond
            to input gids.
        coords : np.ndarray
            List of arrays of the un-projected latitude, longitude array of
            tech exclusion points. List entries correspond to input gids.
        """

        logger.debug(
            'Getting tech layer coordinates for chunks {} through {}'.format(
                gids[0], gids[-1]))

        ind_out = []
        coord_labels = ['latitude', 'longitude']

        with SupplyCurveExtent(excl_fpath, resolution=map_chunk) as sc:
            coords_out, lat_range, lon_range = cls._unpack_coords(
                gids, sc, excl_fpath, coord_labels=coord_labels)

        with Resource(res_fpath, str_decode=False) as res:
            res_meta = np.vstack((res.get_meta_arr(coord_labels[0]),
                                  res.get_meta_arr(coord_labels[1]))).T

        mask = ((res_meta[:, 0] > lat_range[0] - margin)
                & (res_meta[:, 0] < lat_range[1] + margin)
                & (res_meta[:, 1] > lon_range[0] - margin)
                & (res_meta[:, 1] < lon_range[1] + margin))

        # pylint: disable-msg=C0121
        mask_ind = np.where(mask == True)[0]  # noqa: E712

        if np.sum(mask) > 0:
            # pylint: disable=not-callable
            res_tree = cKDTree(res_meta[mask, :])

            logger.debug(
                'Running tech mapping for chunks {} through {}'.format(
                    gids[0], gids[-1]))
            for i, _ in enumerate(gids):
                dist, ind = res_tree.query(coords_out[i])
                ind = mask_ind[ind]
                ind[(dist > distance_upper_bound)] = -1
                ind_out.append(ind)
        else:
            logger.debug('No close res points for chunks {} through {}'.format(
                gids[0], gids[-1]))
            for _ in gids:
                ind_out.append(-1)

        return ind_out, coords_out