def _split_locs(placements, groups): if groups == 1: yield placements else: locs = gk.LocationSet(placements.index) for loc_group in locs.splitKMeans(groups=groups): yield placements.loc[loc_group[:]]
def location_to_tilt(locs, convention="Ryberg2020", **kwargs): """ def location_to_tilt(locs, convention="Ryberg2020", **kwargs) Simple system tilt estimator based off latitude and longitude coordinates Parameters ---------- locs : geokit.LocationSet or iterable of (lon,lat) pairs The locations at which to estimate system tilt angle convention : str, optional The calculation method used to suggest system tilts Options are: * "Ryberg2020" * A string consumable by 'eval' - Can use the variable 'latitude' - Ex. "latitude*0.76" * A path to a raster file kwargs: Optional keyword arguments to use in geokit.raster.interpolateValues(...). Only applies when `convention` is a path to a raster file Returns ------- np.ndarray Suggested tilt angle at each of the provided `locs`. Has the same length as the number of `locs`. Notes ----- "Ryberg2020" When `convention` equals "Ryberg2020", the following equation is followed: .. math:: 42.327719357601396 * arctan( 1.5 * abs(latitude) ) .. [1] TODO: Cite future Ryberg2020 publication """ locs = gk.LocationSet(locs) if convention == 'Ryberg2020': tilt = 42.327719357601396 * np.arctan(1.5 * np.radians(np.abs(locs.lats))) elif isfile(convention): tilt = gk.raster.interpolateValues(convention, locs, **kwargs) else: try: tilt = eval(convention, {}, {"latitude": locs.lats}) except: raise ResError("Failed to apply tilt convention") return tilt
def test_SolarWorkflowManager_estimate_azimuth_from_latitude( pt_SolarWorkflowManager_initialized): man = pt_SolarWorkflowManager_initialized man.estimate_azimuth_from_latitude() assert np.isclose(man.placements['azimuth'], [180, 180, 180, 180, 180]).all() man.placements['lat'] *= -1 man.locs = gk.LocationSet(man.placements[['lon', 'lat']].values) man.estimate_azimuth_from_latitude() assert np.isclose(man.placements['azimuth'], [0, 0, 0, 0, 0]).all()
def __init__(self, placements: pd.DataFrame): # arrange placements, locs, and extent assert isinstance(placements, pd.DataFrame) self.placements = placements.copy() self.locs = None if 'geom' in placements.columns: self.locs = gk.LocationSet(placements.geom) self.placements['lon'] = self.locs.lons self.placements['lat'] = self.locs.lats del self.placements['geom'] assert 'lon' in self.placements.columns assert 'lat' in self.placements.columns if self.locs is None: self.locs = gk.LocationSet(self.placements[['lon', 'lat']].values) self.ext = gk.Extent.fromLocationSet(self.locs) # Initialize simulation data self.sim_data = OrderedDict() self.time_index = None self.workflow_parameters = OrderedDict()
def roughness_from_land_cover_source(source, loc, land_cover_type='clc'): """ Estimate roughness value from a given land cover raster source Parameters ---------- source : str The path to the Corine Land Cover raster file on the disk. loc : Anything acceptable to geokit.LocationSet Arguments accepted include: str, OGR point object (must have an SRS within the object, default = 4326 (for Europe)), lat and lon coordinates (tuple (lat,lon)). Refers to https://github.com/FZJ-IEK3-VSA/geokit/blob/master/geokit/core/location.py for more information. land_cover_type : str, optional Accepted arguments are 'clc', 'clc-code', 'globCover', 'modis', or 'cci', by default 'clc' 'clc': Corine Land Cover (CLC) 'clc-code': Corine Land Cover (CLC) codes 'globCover': Global Wind Atlas 'modis': Modis number for "no data" points of Global Wind Atlas (mostly in areas North of 60°) 'cci': Climate Change Initiative land cover classification Returns ------- int or numpy.ndarray Roughness lengths factors A single int is returned in the event that a single location is specified in `locations`. A one-dimensional Numpy array is returned in the event that multiple locations are specified in `locations`. See Also -------- roughness_from_levels(low_wind_speed, low_height, high_wind_speed, high_height) roughness_from_clc(clc_path, loc, window_range) roughness_from_land_cover_classification(classification, land_cover_type) """ loc = gk.LocationSet(loc) classifications = gk.raster.interpolateValues(source, loc, noDataOkay=False) return roughness_from_land_cover_classification(classifications, land_cover_type=land_cover_type)
def distribute_workflow(workflow_function: FunctionType, placements: pd.DataFrame, jobs: int = 2, max_batch_size: int = None, intermediate_output_dir: str = None, **kwargs) -> xarray.Dataset: """Distributes a RESKit simulation workflow across multiple CPUs Parallelism is achieved by breaking up the placements dataframe into placement groups via KMeans grouping Parameters ---------- workflow_function : FunctionType The workflow function to be parallelized - All RESKit workflow functions should be suitable here - If you want to make your own function, the only requirement is that its first argument should be a pandas DataFrame in the form of a placements table (i.e. has a 'lat' and 'lon' column) - Don't forget that that all inputs required for the workflow function are still required, and are passed on as constants through any specified `kwargs` placements : pandas.DataFrame A DataFrame describing the placements to be simulated For example, if you are simulating wind turbines, the following columns are likely required: ['lon','lat','capacity','hub_height','rotor_diam',] jobs : int, optional The number of parallel jobs - By default 2 max_batch_size : int, optional If given, limits the maximum number of total placements which are simulated in parallel - Use this to reduce the memory requirements of the simulations (in turn increasing overall simulation time) - By default None intermediate_output_dir : str, optional In case of very large outputs (which are too large to be joined into a singular XArray dataset), use this to write the individual simulation results to the specified directory - By default None **kwargs: All all key word arguments are passed on as constants to each simulation - Use these to set the required arguments for the given `workflow_function` Returns ------- xarray.Dataset An XArray Dataset which contains the combined results of the distributed simulations """ import xarray from multiprocessing import Pool assert isinstance(placements, pd.DataFrame) assert ("lon" in placements.columns and "lat" in placements.columns) or ("geom" in placements.columns) # Split placements into groups if "geom" in placements.columns: locs = gk.LocationSet(placements) else: locs = gk.LocationSet( np.column_stack([placements.lon.values, placements.lat.values])) placements.index = locs placements['location_id'] = np.arange(placements.shape[0]) if max_batch_size is None: max_batch_size = int(np.ceil(placements.shape[0] / jobs)) kmeans_groups = int(np.ceil(placements.shape[0] / max_batch_size)) placement_groups = [] for placement_group in _split_locs(placements, kmeans_groups): kmeans_groups_level2 = int( np.ceil(placement_group.shape[0] / max_batch_size)) for placement_sub_group in _split_locs(placement_group, kmeans_groups_level2): placement_groups.append(placement_sub_group) # Do simulations pool = Pool(jobs) results = [] for gid, placement_group in enumerate(placement_groups): kwargs_ = kwargs.copy() if intermediate_output_dir is not None: kwargs_['output_netcdf_path'] = join( intermediate_output_dir, "simulation_group_{:05d}.nc".format(gid)) results.append( pool.apply_async(func=workflow_function, args=(placement_group, ), kwds=kwargs_)) xdss = [] for result in results: xdss.append(result.get()) pool.close() pool.join() if intermediate_output_dir is None: return xarray.concat(xdss, dim="location").sortby('location') else: return xdss
index_col='name')['value'] # INPUT FILES gwaFile = "/data/s-ryberg/data/geography/global_wind_atlas/v3/gwa3_250_wind-speed_100m.tif" placementFile = "output_data/placements.shp" turbineDesign = windpower.TurbineLibrary.loc[PARAMS['turbineDesign']] # OUTPUTS outputFile = "output_data/generation.csv" # Get Placements placements = gk.vector.extractFeatures(placementFile) # Get mean windspeed for each placement placements['ws100m'] = gk.raster.interpolateValues(gwaFile, placements.geom) # Make synthetic wind speed data locs = gk.LocationSet(placements.geom) np.random.seed(0) windspeedValues = pd.DataFrame(np.random.normal(placements['ws100m'], placements['ws100m'] / 4, ( 8760, placements.shape[0], )), columns=locs) # Wind turbine simulation generation = windpower.simulateTurbine(windspeedValues, powerCurve=turbineDesign.PowerCurve) generation.to_csv(outputFile)
def roughness_from_clc(clc_path, loc, window_range=0): """ Estimates a roughness factor according to suggestions by Silva et al. [1] by the prominent land cover at given locations according to the Corine Land Cover dataset [2]. Parameters ---------- clc_path : str The path to the Corine Land Cover (CLC) raster file on the disk. This function currently only works for CLC versions before 2018. loc : Anything acceptable to geokit.LocationSet The locations for which roughness should be estimated. Arguments accepted include: str, OGR point object (must have an SRS within the object, default = 4326 (for Europe)), lat and lon coordinates tuple (lat,lon). Refers to https://github.com/FZJ-IEK3-VSA/geokit/blob/master/geokit/core/location.py for more information. window_range : int; optional An extra number of pixels to extract around the indicated locations, by default 0. A window_range = 0 means that only the CLC pixel value for each location is returned. A window_range of 1 means an extra pixel is extracted around each location in all directions. Leading to a 3x3 matrix of roughness values Use this if you need to do some operation on the roughnesses found around the indicated location Returns ------- float or numpy.ndarray Roughness lengths factors A single float is returned in the event that a single location is specified in `locations`. A one-dimensional Numpy array is returned in the event that multiple locations are specified in `locations`. See Also -------- roughness_from_levels(low_wind_speed, low_height, high_wind_speed, high_height) roughness_from_land_cover_classification(classification, land_cover_type) roughness_from_land_cover_source(source, loc, land_cover_type) Sources -------- [1] Silva, J., Ribeiro, C., & Guedes, R. (2007). Roughness length classification of corine land cover classes. European Wind Energy Conference and Exhibition 2007, EWEC 2007. [2] Copernicus (European Union’s Earth Observation Programme). (2018). Corine Land Cover (CLC) 2000, Version 2018. Copernicus. http://land.copernicus.eu/pan-european/corine-land-cover/clc-2000/view Roughness values [1] -------------------- Continuous urban fabric : 1.2 Broad-leaved forest : 0.75 Coniferous-leaved forest : 0.75 Mixed-leaved forest : 0.75 Green urban areas : 0.6 Transitional woodland/shrub : 0.6 Burnt areas : 0.6 Discontinuous urban fabric : 0.5 Construction sites : 0.5 Industrial or commercial units : 0.5 Sport and leisure facilities : 0.5 Port areas : 0.5 Agro-forestry areas : 0.3 Complex cultivation patterns : 0.3 Land principally occupied by agriculture, with significant areas of natural vegetation : 0.3 Annual crops associated with permanent crops : 0.1 Fruit trees and berry plantations : 0.1 Vineyard : 0.1 Olive groves : 0.1 Road and rail networks and associated land : 0.075 Non-irrigated arable land : 0.05 Permanently irrigated land : 0.05 Rice fields : 0.05 Inland marshes : 0.05 Salt marshes : 0.05 Sclerophylous vegetation : 0.03 Moors and heathland : 0.03 Natural grassland : 0.03 Pastures : 0.03 Dump sites : 0.005 Mineral extraction sites : 0.005 Airports : 0.005 Bare rock : 0.005 Sparsely vegetated areas : 0.005 Glaciers and perpetual snow : 0.001 Peatbogs : 0.0005 Salines : 0.0005 Intertidal flats : 0.0005 Beaches, dunes, and sand plains : 0.0003 Water courses # SUSPICIOUS : 0.001 Water bodies # SUSPISCIOUS : 0.0005 Costal lagoons # SUSPISCIOUS : 0.0005 Estuaries # SUSPISCIOUS : 0.0008 Sea and ocean # SUSPISCIOUS : 0.0002 """ # Ensure location is okay loc = gk.LocationSet(loc) # Get pixels values from clc clcGridValues = gk.raster.interpolateValues(clc_path, loc, winRange=window_range, noDataOkay=True) # make output array if window_range > 0: outputs = [] for v in clcGridValues: # Treat nodata as ocean v[np.isnan(v)] = 44 v[v > 44] = 44 v = v.astype(int) values, counts = np.unique(v, return_counts=True) total = 0 for val, cnt in zip(values, counts): total += cnt * clcCodeToRoughess[clcGridToCode_v2006[val]] outputs.append(total / counts.sum()) else: # Treat nodata as ocean clcGridValues[np.isnan(clcGridValues)] = 44 clcGridValues[clcGridValues > 44] = 44 clcGridValues = clcGridValues.astype(int) # Get the associated outputs = [clcCodeToRoughess[clcGridToCode_v2006[val]] for val in clcGridValues] # Done! if len(outputs) == 1: return outputs[0] else: return np.array(outputs)
def get(self, variable, locations, interpolation='near', force_as_data_frame=False, outside_okay=False, _indicies=None): """ Retrieve a time series for a variable from the source's data library at the given location(s) Can also use various interpolation schemes (e.g. near, bilinear, or cubic) Parameters: ----------- variable : str The variable within the data library to extract locations : Anything acceptable by geokit.LocationSet.load( ) The location(s) to search for * geokit.Location, or geokit.LocationSet are best * A single tuple with (lon, lat) is acceptable, or a list of such tuples * A single point geometry (as long as it has an SRS), or a list of geometries interpolation : str, optional The interpolation method to use * 'near' => For each location, extract the time series from the source's closest lat/lon index * 'bilinear' => For each location, use the time series of the source's surrounding +/- 1 index locations to create an estimated time series at the given location using a biliear interpolation scheme * 'cubic' => For each location, use the time series of the source's surrounding +/- 2 index locations to create an estimated time series at the given location using a cubic scheme force_as_data_frame : bool, optional If True, instructs the returned value to always take the form of a Pandas DataFrame regardless of how many locations are specified outside_okay : bool, optional Determines if points which are outside the source's lat/lon grid are allowed * If True, points outside this space will return as None * If False, an error is raised Returns: -------- If a single location is given: pandas.Series * Indexes match to the source's time dimension If multiple locations are given (or if `force_as_data_frame` is True): pandas.DataFrame * Indexes match to the source's time dimension * Columns match to the given order of locations """ # Ensure loc is a list locations = gk.LocationSet(locations) # Get the indicies if _indicies is None: # compute the closest indices if not self.dependent_coordinates or interpolation == 'near': as_int = True else: as_int = False indicies = self.loc_to_index(locations, outside_okay, as_int=as_int) else: # Assume indicies match locations indicies = _indicies if isinstance(indicies, Index): indicies = [ indicies, ] # Do interpolation if interpolation == 'near': # arrange the output data tmp = [] for i in indicies: if not i is None: tmp.append(self.data[variable][:, i.yi, i.xi]) else: tmp.append(np.array([ np.nan, ] * self.time_index.size)) output = np.column_stack(tmp) elif interpolation == "cubic" or interpolation == "bilinear": # set some arguments for later use if interpolation == "cubic": win = 4 rbsArgs = dict() else: win = 2 rbsArgs = dict(kx=1, ky=1) # Set up interpolation arrays yiMin = np.round(min([i.yi for i in indicies]) - win).astype(int) yiMax = np.round(max([i.yi for i in indicies]) + win).astype(int) xiMin = np.round(min([i.xi for i in indicies]) - win).astype(int) xiMax = np.round(max([i.xi for i in indicies]) + win).astype(int) # ensure boundaries are okay if yiMin < 0 or xiMin < 0 or yiMax > self._latN or xiMax > self._lonN: raise ResError( "Insufficient data. Try expanding the boundary of the extracted data" ) ########## # TODO: Update interpolation schemes to handle out-of-bounds indices ########## if self.dependent_coordinates: # do interpolations in 'index space' if isinstance(indicies[0][0], int): raise ResError( "Index must be float type for interpolation") gridYVals = np.arange(yiMin, yiMax + 1) gridXVals = np.arange(xiMin, xiMax + 1) yInterp = [i.yi for i in indicies] xInterp = [i.xi for i in indicies] else: # do interpolation in the expected 'coordinate space' gridYVals = self.lats[yiMin:yiMax + 1] gridXVals = self.lons[xiMin:xiMax + 1] yInterp = [loc.lat for loc in locations] xInterp = [loc.lon for loc in locations] # Do interpolation output = [] for ts in range(self.data[variable].shape[0]): # set up interpolation rbs = RectBivariateSpline( gridYVals, gridXVals, self.data[variable][ts, yiMin:yiMax + 1, xiMin:xiMax + 1], **rbsArgs) # interpolate for each location # lat/lon order switched to match index order output.append(rbs(yInterp, xInterp, grid=False)) output = np.stack(output) else: raise ResError( "Interpolation scheme not one of: 'near', 'cubic', or 'bilinear'" ) # Make output as Series objects if force_as_data_frame or (len(output.shape) > 1 and output.shape[1] > 1): return pd.DataFrame(output, index=self.time_index, columns=locations) else: try: return pd.Series(output[:, 0], index=self.time_index, name=locations[0]) except: return pd.Series(output, index=self.time_index, name=locations[0])
def loc_to_index(self, loc, outside_okay=False, as_int=True): """Returns the closest X and Y indexes corresponding to a given location or set of locations Parameters: ----------- loc : Anything acceptable by geokit.LocationSet The location(s) to search for * A single tuple with (lon, lat) is acceptable, or a list of such tuples * A single point geometry (as long as it has an SRS), or a list of geometries is okay * geokit,Location, or geokit.LocationSet are best! outside_okay : bool, optional Determines if points which are outside the source's lat/lon grid are allowed * If True, points outside this space will return as None * If False, an error is raised Returns: -------- If a single location is given: tuple * Format: (yIndex, xIndex) * y index can be accessed with '.yi' * x index can be accessed with '.xi' If multiple locations are given: list * Format: [ (yIndex1, xIndex1), (yIndex2, xIndex2), ...] * Order matches the given order of locations Note: ----- The default form of this function (which is the one used here) is not very efficient, ultimately leading to much longer look-up than they otherwise need to be. When the weather source has grid cells on a regular lat/lon grid then a more efficient form of this function can be configured using the function generator "_loc_to_index_rect". In these instances, this is the recommended function to use. For example, if the weather source uses a latitude spacing of 0.5, and a longitude spacing of 0.625, then the function generator can be used like: > source.loc_to_index = source._loc_to_index_rect(lat_step=0.5, lon_step=0.625) """ # Ensure loc is a list locations = gk.LocationSet(loc) # get closest indices idx = [] for lat, lon in zip(locations.lats, locations.lons): # Check the distance latDist = lat - self.lats lonDist = lon - self.lons # Get the best indices if self.dependent_coordinates: dist = lonDist * lonDist + latDist * latDist latI, lonI = np.unravel_index(np.argmin(dist), dist.shape) latDists = [] if latI < self._latN - 1: latDists.append( (self.lats[latI + 1, lonI] - self.lats[latI, lonI])) if latI > 0: latDists.append( (self.lats[latI, lonI] - self.lats[latI - 1, lonI])) latDistI = latDist[latI, lonI] / np.mean(latDists) lonDists = [] if lonI < self._lonN - 1: lonDists.append( (self.lons[latI, lonI + 1] - self.lons[latI, lonI])) if lonI > 0: lonDists.append( (self.lons[latI, lonI] - self.lons[latI, lonI - 1])) lonDistI = lonDist[latI, lonI] / np.mean(lonDists) else: lonI = np.argmin(np.abs(lonDist)) latI = np.argmin(np.abs(latDist)) latDists = [] if latI < self._latN - 1: latDists.append((self.lats[latI + 1] - self.lats[latI])) if latI > 0: latDists.append((self.lats[latI] - self.lats[latI - 1])) latDistI = latDist[latI] / np.mean(latDists) lonDists = [] if lonI < self._latN - 1: lonDists.append((self.lons[lonI + 1] - self.lons[lonI])) if lonI > 0: lonDists.append((self.lons[lonI] - self.lons[lonI - 1])) lonDistI = lonDist[lonI] / np.mean(lonDists) # Check for out of bounds if np.abs(latDistI) > self._maximal_lat_difference or np.abs( lonDistI) > self._maximal_lon_difference: if not outside_okay: raise ResError("(%f,%f) are outside the boundaries" % (lat, lon)) else: idx.append(None) continue # As int? if not as_int: latI = latI + latDistI lonI = lonI + lonDistI # append idx.append(Index(yi=latI, xi=lonI)) # Make output if locations.count == 1: return idx[0] else: return idx
def func(self, loc, outside_okay=False, as_int=True): """Returns the closest X and Y indexes corresponding to a given location or set of locations Parameters: ----------- loc : Anything acceptable by geokit.LocationSet The location(s) to search for * A single tuple with (lon, lat) is acceptable, or a list of such tuples * A single point geometry (as long as it has an SRS), or a list of geometries is okay * geokit,Location, or geokit.LocationSet are best! outside_okay : bool, optional Determines if points which are outside the source's lat/lon grid are allowed * If True, points outside this space will return as None * If False, an error is raised Returns: -------- If a single location is given: tuple * Format: (yIndex, xIndex) * y index can be accessed with '.yi' * x index can be accessed with '.xi' If multiple locations are given: list * Format: [ (yIndex1, xIndex1), (yIndex2, xIndex2), ...] * Order matches the given order of locations """ # Ensure loc is a list locations = gk.LocationSet(loc) # get closest indices latI = (locations.lats - self.lats[0]) / lat_step lonI = (locations.lons - self.lons[0]) / lon_step # Check for out of bounds oob = (latI < 0) | (latI >= self._latN) | (lonI < 0) | (lonI >= self._lonN) if oob.any(): if not outside_okay: print("The following locations are out of bounds") print(locations[oob]) raise ResError("Locations are outside the boundaries") # As int? if as_int: latI = np.round(latI).astype(int) lonI = np.round(lonI).astype(int) # Make output if locations.count == 1: if oob[0] is True: return None else: return Index(yi=latI[0], xi=lonI[0]) else: return [ None if _oob else Index(yi=y, xi=x) for _oob, y, x in zip(oob, latI, lonI) ]