def test_opsd2reegis(): path = os.path.join(os.path.dirname(__file__), 'data') cfg.tmp_set('paths', 'opsd', path) cfg.tmp_set('paths', 'powerplants', path) fn_opsd = opsd.opsd_power_plants() fn_reegis = powerplants.pp_opsd2reegis() os.remove(fn_opsd) filename = str(fn_reegis.split(os.sep)[-1]) geo_path = cfg.get('paths', 'geometry') geo_file = cfg.get('geometry', 'federalstates_polygon') gdf = geo.load(path=geo_path, filename=geo_file) powerplants.add_regions_to_powerplants( gdf, 'fed_states', filename=filename, path=path, dump=True) geo_path = cfg.get('paths', 'geometry') geo_file = cfg.get('coastdat', 'coastdatgrid_polygon') gdf = geo.load(path=geo_path, filename=geo_file) pp = powerplants.add_regions_to_powerplants( gdf, 'coastdat2', filename=filename, path=path, dump=False) os.remove(fn_reegis) eq_(int(pp.groupby('fed_states').sum().loc['BE', 'capacity']), 2427) year = 2000 pp = powerplants.get_reegis_powerplants(year, pp=pp) eq_(int(pp.groupby('fed_states').sum().loc['BE', 'capacity_2000']), 2391) eq_(coastdat.windzone_region_fraction( pp, name='fed_states', year=year).round(2).loc['NI', 3], 0.24)
def windzone_region_fraction(pp, name, year=None, dump=False): """ Parameters ---------- pp : pd.DataFrame year : int name : str dump : bool Returns ------- Examples -------- >>> my_fn=os.path.join(cfg.get('paths', 'powerplants'), ... cfg.get('powerplants', 'reegis_pp')) >>> my_pp=pd.DataFrame(pd.read_hdf(my_fn, 'pp')) # doctest: +SKIP >>> wz=windzone_region_fraction(my_pp, 'federal_states', 2014 ... dump=False) # doctest: +SKIP >>> round(float(wz.loc['NI', 1]), 2) # doctest: +SKIP 0.31 """ pp = pp.loc[pp.energy_source_level_2 == "Wind"] if year is None: capacity_col = "capacity" else: capacity_col = "capacity_{0}".format(year) path = cfg.get("paths", "geometry") filename = "windzones_germany.geojson" gdf = geometries.load(path=path, filename=filename) gdf.set_index("zone", inplace=True) geo_path = cfg.get("paths", "geometry") geo_file = cfg.get("coastdat", "coastdatgrid_polygon") coastdat_geo = geometries.load(path=geo_path, filename=geo_file) coastdat_geo["geometry"] = coastdat_geo.centroid points = geometries.spatial_join_with_buffer(coastdat_geo, gdf, "windzone") wz = pd.DataFrame(points["windzone"]) pp = pd.merge(pp, wz, left_on="coastdat2", right_index=True) pp["windzone"].fillna(0, inplace=True) pp = pp.groupby([name, "windzone"]).sum()[capacity_col] wz_regions = pp.groupby(level=0).apply(lambda x: x / float(x.sum())) if dump is True: filename = "windzone_{0}.csv".format(name) fn = os.path.join(cfg.get("paths", "powerplants"), filename) wz_regions.to_csv(fn, header=False) return wz_regions
def fetch_id_by_coordinates(latitude, longitude): """ Get nearest weather data set to a given location. Parameters ---------- latitude : float longitude : float Returns ------- int : coastdat id Examples -------- >>> fetch_id_by_coordinates(53.655119, 11.181475) 1132101 """ coastdat_polygons = geometries.load( cfg.get("paths", "geometry"), cfg.get("coastdat", "coastdatgrid_polygon"), ) location = Point(longitude, latitude) cid = coastdat_polygons[coastdat_polygons.contains(location)].index if len(cid) == 0: msg = "No id found for latitude {0} and longitude {1}." logging.warning(msg.format(latitude, longitude)) return None elif len(cid) == 1: return cid[0]
def fetch_data_coordinates_by_id(coastdat_id): """ Returns the coordinates of the weather data set. Parameters ---------- coastdat_id : int or str ID of the coastdat weather data set Returns ------- namedtuple : Fields are latitude and longitude Examples -------- >>> location=fetch_data_coordinates_by_id(1132101) >>> round(location.latitude, 3) 53.692 >>> round(location.longitude, 3) 11.351 """ coord = namedtuple("weather_location", "latitude, longitude") coastdat_polygons = geometries.load( cfg.get("paths", "geometry"), cfg.get("coastdat", "coastdatgrid_polygon"), ) c = coastdat_polygons.loc[int(coastdat_id)].geometry.centroid return coord(latitude=c.y, longitude=c.x)
def federal_state_average_weather(year, parameter): """ Example for spatial_average_weather() with federal states polygons. Parameters ---------- year parameter Returns ------- """ federal_states = geometries.load( cfg.get('paths', 'geometry'), cfg.get('geometry', 'federalstates_polygon')) filename = os.path.join( cfg.get('paths', 'coastdat'), 'average_{0}_BB_TH_{1}.csv'.format(parameter, year)) if not os.path.isfile(filename): spatial_average_weather(year, federal_states, parameter, 'federal_states', outfile=filename) return pd.read_csv(filename, index_col=[0], parse_dates=True)
def get_inhabitants(polygon, name): """The inhabitants of 2016 are used.""" table = 'ew' cfg_data = cfg.get_dict(table) ew_fn = os.path.join(cfg.get('paths', 'fis_broker'), cfg_data['table'], 'shp', cfg_data['table'] + '.shp') logging.debug("Reading {0}".format(ew_fn)) if not os.path.isfile(ew_fn): ew_fn = download.download_maps(single=table) ew = geometries.load(fullname=ew_fn) ew['centroid_column'] = ew.representative_point() ew = ew.set_geometry('centroid_column') neu = geometries.spatial_join_with_buffer( ew, polygon, name=name, limit=0, ) grp = neu.groupby(name).sum() grp['frac'] = grp['EW'].div(grp.sum()['EW']).multiply(100).round(1) return grp
def deflex_power_lines(rmap=None, rtype='lines'): if rmap is None: rmap = cfg.get('init', 'map') name = os.path.join(cfg.get('paths', 'geo_deflex'), cfg.get('geometry', 'powerlines').format( map=rmap, type=rtype)) lines = geo.load(fullname=name) return lines
def federal_states_feedin_example(): """Get fullload hours for renewable sources for a federal states.""" federal_states = geometries.load( cfg.get('paths', 'geometry'), cfg.get('geometry', 'federalstates_polygon')) get_feedin_per_region(2014, federal_states, 'federal_states') return scenario_feedin(2014, 'federal_states')
def get_ew_by_federal_states(year): """Get the inhabitants per federal state for a given year.""" geo = geometries.load( cfg.get("paths", "geometry"), cfg.get("geometry", "federalstates_polygon"), ) geo.set_index("iso", drop=True, inplace=True) geo.drop(["N0", "N1", "O0", "P0"], inplace=True) return get_inhabitants_by_region(year, geo, name="federal_states")
def get_ego_demand_by_federal_states(year=None): federal_states = geometries.load( cfg.get('paths', 'geometry'), cfg.get('geometry', 'federalstates_polygon')) if year is None: return ego_demand_by_region(federal_states, 'federal_states') else: return get_ego_demand_bmwi_by_region(year, federal_states, 'federal_states')
def calculate_inhabitants_friedrichshagen(year, geo=None): if geo is None: fhg_fn = os.path.join(cfg.get('paths', 'geo_berlin'), cfg.get('geometry', 'friedrichshagen_block')) geo_fhg = geometries.load(fullname=fhg_fn, index_col='BZR_NAME') else: geo_fhg = geo return get_inhabitants(geo_fhg, 'brz_name').loc[geo_fhg.index[0], 'EW']
def get_coastdat_onshore_polygons(): cstd = geometries.load(cfg.get('paths', 'geometry'), cfg.get('coastdat', 'coastdatgrid_polygon'), index_col='gid') de02 = geometries.load(cfg.get('paths', 'geo_deflex'), cfg.get('geometry', 'deflex_polygon').format(type='polygons', map='de02', suffix='.geojson'), index_col='region') cstd_pt = gpd.GeoDataFrame(cstd.centroid, columns=['geometry']) cstd_pt = geometries.spatial_join_with_buffer(cstd_pt, de02, 'coastdat', limit=0) reduced = cstd.loc[cstd_pt.coastdat == "DE01"] return reduced.sort_index()
def calculate_inhabitants_districts(year, geo=None): if geo is None: berlin_district_fn = os.path.join( cfg.get('paths', 'geo_berlin'), cfg.get('geometry', 'berlin_bezirke')) geo_bln = geometries.load(fullname=berlin_district_fn, index_col='BEZIRK') else: geo_bln = geo return get_inhabitants(geo_bln, 'bezirk')
def deflex_regions(suffix='reegis', rmap=None, rtype='polygon'): if rmap is None: rmap = cfg.get('init', 'map') name = os.path.join(cfg.get('paths', 'geo_deflex'), cfg.get('geometry', 'deflex_polygon').format( suffix=suffix, map=rmap, type=rtype)) regions = geo.load(fullname=name) # Add 'DE' and leading zero to index regions['region'] = regions.index.to_series().astype(str).apply( 'DE{:0>2}'.format) regions = regions.set_index('region') return regions
def fig_model_regions(): """Plot one or more model regions in one plot.""" maps = ["de02", "de17", "de21", "de22"] # maps = ["de22"] add_title = True top = 1 ax = None ax_ar = [] width = len(maps * 3) if len(maps) > 1: f, ax_ar = plt.subplots(1, len(maps), figsize=(width, 2.5)) else: ax = plt.figure(figsize=(width, 2.5)).add_subplot(1, 1, 1) i = 0 for rmap in maps: if len(maps) > 1: ax = ax_ar[i] de_map = geometries.load( cfg.get("paths", "geo_deflex"), cfg.get("geometry", "deflex_polygon").format(suffix=".geojson", map=rmap, type="polygons"), ) # greyscale colormap = LinearSegmentedColormap.from_list("mycmap", [(0, "#dddddd"), (1, "#333333")]) # colormap = None plot.plot_regions( rmap=de_map, ax=ax, legend=False, simple=0.005, offshore="auto", cmap=colormap, ) for spine in plt.gca().spines.values(): spine.set_visible(False) ax.axis("off") if add_title is True: ax.set_title(rmap) top = 0.88 i += 1 plt.subplots_adjust(right=1, left=0, wspace=0, bottom=0, top=top) return "model_regions", None
def setup_class(cls): path = os.path.join(os.path.dirname(__file__), "data") cfg.tmp_set("paths_pattern", "opsd", path) cfg.tmp_set("paths", "powerplants", path) fn_opsd = opsd.opsd_power_plants() os.remove(fn_opsd) fn_opsd = os.path.join(cfg.get("paths_pattern", "opsd"), cfg.get("opsd", "opsd_prepared")) fn_test = fn_opsd.replace(".h5", "_test.h5") copyfile(fn_test, fn_opsd) fn_reegis = powerplants.pp_opsd2reegis() os.remove(fn_opsd) filename = str(fn_reegis.split(os.sep)[-1]) cls.gdf1 = geo.get_federal_states_polygon() powerplants.add_regions_to_powerplants(cls.gdf1, "fed_states", filename=filename, path=path, dump=True) geo_path = cfg.get("paths", "geometry") geo_file = cfg.get("coastdat", "coastdatgrid_polygon") gdf2 = geo.load(path=geo_path, filename=geo_file) cls.pp = powerplants.add_regions_to_powerplants(gdf2, "coastdat2", filename=filename, path=path, dump=False) year = 2014 cls.pp2 = powerplants.get_powerplants_by_region( cls.gdf1, year, "my_states") cls.pp2["efficiency_{0}".format(year)] = cls.pp2["capacity_{0}".format( year)].div(cls.pp2["capacity_in_{0}".format(year)]) cls.pp2.drop( ["capacity", "capacity_in", "thermal_capacity"], axis=1, inplace=True, ) fn_reegis2 = fn_reegis.replace(".h5", "_my_states.h5") os.remove(fn_reegis2) os.remove(fn_reegis) rmtree(os.path.join(path, "messages"))
def deflex_regions(rmap=None, rtype='polygons'): """ Parameters ---------- rmap : str Name of the deflex map. rtype : str Type of the deflex map ('polygon', 'labels'). Returns ------- GeoDataFrame Examples -------- >>> regions=deflex_regions('de17') >>> len(regions) 17 >>> regions.geometry.iloc[0].geom_type 'MultiPolygon' >>> l=deflex_regions('de21', 'labels').loc['DE04', 'geometry'] >>> l.geom_type 'Point' >>> l.x 13.2 >>> l.y 51.1 >>> cfg.tmp_set('init', 'map', 'de22') >>> deflex_regions().name 'de22' >>> list(deflex_regions('de02').index) ['DE01', 'DE02'] """ if rmap is None: rmap = cfg.get("init", "map") name = os.path.join( cfg.get("paths", "geo_deflex"), cfg.get("geometry", "deflex_polygon").format(suffix=".geojson", map=rmap, type=rtype), ) regions = geo.load(fullname=name) regions.set_index("region", inplace=True) regions.name = rmap return regions
def divide_off_and_onshore(regions): """ Sort regions into onshore and offshore regions. A namedtuple with two list of regions ids will be returned. Fetch the `onshore` and `offshore` attribute of the named tuple to get the list. Parameters ---------- regions : GeoDataFrame A region set with the region id in the index. Returns ------- named tuple Examples -------- >>> reg=deflex_regions('de02') >>> divide_off_and_onshore(reg).onshore ['DE01'] >>> reg=deflex_regions('de21') >>> divide_off_and_onshore(reg).offshore ['DE19', 'DE20', 'DE21'] """ region_type = namedtuple("RegionType", "offshore onshore") regions_centroid = regions.copy() regions_centroid.geometry = regions_centroid.centroid germany_onshore = geo.load(cfg.get("paths", "geometry"), cfg.get("geometry", "germany_polygon")) gdf = geo.spatial_join_with_buffer(regions_centroid, germany_onshore, "onshore", limit=0) onshore = list(gdf.loc[gdf.onshore == 0].index) offshore = list(gdf.loc[gdf.onshore == "unknown"].index) return region_type(offshore=offshore, onshore=onshore)
def get_admin_by_region(region): """ Allocate admin keys to the given regions. Parameters ---------- region : geopandas.GeoDataFrame Returns ------- pd.DataFrame """ fn = os.path.join(cfg.get("paths", "geometry"), "vg1000_geodata.geojson") vg = geometries.load(fullname=fn) vg.set_index("RS", inplace=True) reg2vg = geometries.spatial_join_with_buffer(vg.representative_point(), region, "fs", limit=0) return pd.DataFrame(reg2vg.drop("geometry", axis=1))
def deflex_power_lines(rmap=None, rtype="lines"): """ Parameters ---------- rmap : str Name of the deflex powerline map. rtype : str Type of the deflex powerline map ('lines', 'labels'). Returns ------- Examples -------- >>> lines=deflex_power_lines('de17') >>> lines.geometry.iloc[0].geom_type 'LineString' >>> len(lines) 31 >>> deflex_power_lines('de02').index[0] 'DE01-DE02' >>> cfg.tmp_set('init', 'map', 'de21') >>> deflex_power_lines().name 'de21' """ if rmap is None: rmap = cfg.get("init", "map") name = os.path.join( cfg.get("paths", "geo_deflex"), cfg.get("geometry", "powerlines").format(map=rmap, type=rtype, suffix=".geojson"), ) lines = geo.load(fullname=name) lines.set_index("name", inplace=True) lines.name = rmap return lines
def prepare_ego_demand(egofile): ego_demand = geometries.create_geo_df(get_ego_data()) # Add column with name of the federal state (Bayern, Berlin,...) federal_states = geometries.load( cfg.get('paths', 'geometry'), cfg.get('geometry', 'federalstates_polygon')) # Add column with federal_states ego_demand = geometries.spatial_join_with_buffer(ego_demand, federal_states, 'federal_states') # Overwrite Geometry object with its DataFrame, because it is not # needed anymore. ego_demand = pd.DataFrame(ego_demand) ego_demand['geometry'] = ego_demand['geometry'].astype(str) # Write out file (hdf-format). ego_demand.to_hdf(egofile, 'demand') return ego_demand
import os from disaggregator import config, data from reegis import geometries as geo, config as rconfig, demand_elec, demand_heat nuts_geo_fn = os.path.join(rconfig.get('paths', 'geometry'), 'NUTS_RG_03M_2016_4326_LEVL_3_DE.geojson') nuts_geo = geo.load(fullname=nuts_geo_fn) nuts_geo.set_index('id', drop=True, inplace=True) fed_states = geo.get_federal_states_polygon() nuts_geo = geo.spatial_join_with_buffer(nuts_geo.centroid, fed_states, 'fs') fed_states['nuts'] = '0' for state in fed_states.index: fed_states.loc[state, 'nuts'] = list(nuts_geo.loc[nuts_geo['fs'] == state].index) cfg = config.get_config() dict_nuts3_name = config.region_id_to_nuts3(nuts3_to_name=True) df_spatial = data.database_description('spatial') df_temporal = data.database_description('temporal') elc_consumption_hh_spat = data.elc_consumption_HH_spatial() elc_consumption_hh_spattemp = data.elc_consumption_HH_spatiotemporal() print(elc_consumption_hh_spattemp[fed_states.loc['BB', 'nuts']]) print(elc_consumption_hh_spattemp[fed_states.loc['HH', 'nuts']]) # Testing Disaggregator reegis functions fed_states_nuts = fed_states.loc['BY', 'nuts'] demand_elec.get_household_powerload_by_NUTS3_profile(2014, fed_states_nuts,
def scenario_elec_demand(year, time_series): elec_demand = berlin_hp.electricity.get_electricity_demand(year) time_series['electricity', 'demand'] = elec_demand.usage.values * 1000 return time_series def create_basic_scenario(regions, year, name): table_collection = create_scenario(regions, year, name) name = '{0}_{1}_{2}'.format('berlin_hp', year, 'single') sce = scenario_tools.Scenario(table_collection=table_collection, name=name, year=year) path = os.path.join(cfg.get('paths', 'scenario'), 'berlin_hp', str(year)) sce.to_excel(os.path.join(path, name + '.xls')) sce.to_csv(os.path.join(path, '{0}_csv'.format(name))) if __name__ == "__main__": logger.define_logging() start = datetime.datetime.now() berlin_district_fn = os.path.join(cfg.get('paths', 'geo_berlin'), 'berlin.csv') reg = geometries.load(fullname=berlin_district_fn, index_col='gid') n = 'BE' for y in [2014, 2013, 2012]: create_basic_scenario(reg, y, n) mesg = "Basic scenario for {0} created: {1}" logging.info(mesg.format(y, datetime.datetime.now() - start)) logging.info("Done: {0}".format(datetime.datetime.now() - start))
def spatial_average_weather(year, geo, parameter, name, outpath=None, outfile=None): """ Calculate the mean temperature over all temperature data sets within each region for one year. Parameters ---------- year : int Select the year you want to calculate the average temperature for. geo : geometries.Geometry object Polygons to calculate the average parameter for. outpath : str Place to store the outputfile. outfile : str Set your own name for the outputfile. parameter : str Name of the item (temperature, wind speed,... of the weather data set. name : str Name of the regions table to be used as a column name. Returns ------- str : Full file name of the created file. """ logging.info("Getting average {0} for {1} in {2} from coastdat2.".format( parameter, name, year)) name = name.replace(' ', '_') # Create a Geometry object for the coastdat centroids. coastdat_geo = geometries.load(cfg.get('paths', 'geometry'), cfg.get('coastdat', 'coastdatgrid_polygon')) coastdat_geo['geometry'] = coastdat_geo.centroid # Join the tables to create a list of coastdat id's for each region. coastdat_geo = geometries.spatial_join_with_buffer(coastdat_geo, geo, name=name, limit=0) # Fix regions with no matches (no matches if a region ist too small). fix = {} for reg in set(geo.index) - set(coastdat_geo[name].unique()): reg_point = geo.representative_point().loc[reg] coastdat_poly = geometries.load( cfg.get('paths', 'geometry'), cfg.get('coastdat', 'coastdatgrid_polygon')) fix[reg] = coastdat_poly.loc[coastdat_poly.intersects( reg_point)].index[0] # Open the weather file weather_file = os.path.join( cfg.get('paths', 'coastdat'), cfg.get('coastdat', 'file_pattern').format(year=year)) if not os.path.isfile(weather_file): download_coastdat_data(year=year, filename=weather_file) weather = pd.HDFStore(weather_file, mode='r') # Calculate the average temperature for each region with more than one id. avg_value = pd.DataFrame() for region in geo.index: cd_ids = coastdat_geo[coastdat_geo[name] == region].index number_of_sets = len(cd_ids) tmp = pd.DataFrame() logging.debug((region, len(cd_ids))) for cid in cd_ids: try: cid = int(cid) except ValueError: pass if isinstance(cid, int): key = 'A' + str(cid) else: key = cid tmp[cid] = weather[key][parameter] if len(cd_ids) < 1: key = 'A' + str(fix[region]) avg_value[region] = weather[key][parameter] else: avg_value[region] = tmp.sum(1).div(number_of_sets) weather.close() # Create the name an write to file regions = sorted(geo.index) if outfile is None: out_name = '{0}_{1}'.format(regions[0], regions[-1]) outfile = os.path.join( outpath, 'average_{parameter}_{type}_{year}.csv'.format( year=year, type=out_name, parameter=parameter)) avg_value.to_csv(outfile) logging.info("Average temperature saved to {0}".format(outfile)) return outfile
def spatial_average_weather(year, geo, parameter, name, outpath=None, outfile=None): """ Calculate the mean value of a parameter over all data sets within each region for one year. Parameters ---------- year : int Select the year you want to calculate the average temperature for. geo : geometries.Geometry object Polygons to calculate the average parameter for. outpath : str Place to store the outputfile. outfile : str Set your own name for the outputfile. parameter : str Name of the item (temperature, wind speed,... of the weather data set. name : str Name of the regions table to be used as a column name. Returns ------- str : Full file name of the created file. Example ------- >>> germany_geo=geometries.load( ... cfg.get('paths', 'geometry'), ... cfg.get('geometry', 'germany_polygon')) >>> fn=spatial_average_weather(2012, germany_geo, 'temp_air', 'deTemp', ... outpath=os.path.expanduser('~') ... )# doctest: +SKIP >>> temp=pd.read_csv(fn, index_col=[0], parse_dates=True, squeeze=True ... )# doctest: +SKIP >>> round(temp.mean() - 273.15, 2)# doctest: +SKIP 8.28 >>> os.remove(fn)# doctest: +SKIP """ logging.info("Getting average {0} for {1} in {2} from coastdat2.".format( parameter, name, year)) name = name.replace(" ", "_") # Create a Geometry object for the coastdat centroids. coastdat_geo = geometries.load( cfg.get("paths", "geometry"), cfg.get("coastdat", "coastdatgrid_polygon"), ) coastdat_geo["geometry"] = coastdat_geo.centroid # Join the tables to create a list of coastdat id's for each region. coastdat_geo = geometries.spatial_join_with_buffer(coastdat_geo, geo, name=name, limit=0) # Fix regions with no matches (no matches if a region ist too small). fix = {} for reg in set(geo.index) - set(coastdat_geo[name].unique()): reg_point = geo.representative_point().loc[reg] coastdat_poly = geometries.load( cfg.get("paths", "geometry"), cfg.get("coastdat", "coastdatgrid_polygon"), ) fix[reg] = coastdat_poly.loc[coastdat_poly.intersects( reg_point)].index[0] # Open the weather file weather_file = os.path.join( cfg.get("paths", "coastdat"), cfg.get("coastdat", "file_pattern").format(year=year), ) if not os.path.isfile(weather_file): download_coastdat_data(year=year, filename=weather_file) weather = pd.HDFStore(weather_file, mode="r") # Calculate the average temperature for each region with more than one id. avg_value = pd.DataFrame() for region in geo.index: cd_ids = coastdat_geo[coastdat_geo[name] == region].index number_of_sets = len(cd_ids) tmp = pd.DataFrame() logging.debug((region, len(cd_ids))) for cid in cd_ids: try: cid = int(cid) except ValueError: pass if isinstance(cid, int): key = "A" + str(cid) else: key = cid tmp[cid] = weather[key][parameter] if len(cd_ids) < 1: key = "A" + str(fix[region]) avg_value[region] = weather[key][parameter] else: avg_value[region] = tmp.sum(1).div(number_of_sets) weather.close() # Create the name an write to file regions = sorted(geo.index) if outfile is None: out_name = "{0}_{1}".format(regions[0], regions[-1]) outfile = os.path.join( outpath, "average_{parameter}_{type}_{year}.csv".format( year=year, type=out_name, parameter=parameter), ) avg_value.to_csv(outfile) logging.info("Average temperature saved to {0}".format(outfile)) return outfile
def store_average_weather( data_type, weather_path=None, years=None, keys=None, out_file_pattern="average_data_{data_type}.csv", ): """ Get average wind speed over all years for each weather region. This can be used to select the appropriate wind turbine for each region (strong/low wind turbines). Parameters ---------- data_type : str The data_type of the coastdat weather data: 'dhi', 'dirhi', 'pressure', 'temp_air', 'v_wind', 'z0'. keys : list or None List of coastdat keys. If None all available keys will be used. years : list or None List of one or more years to calculate the average data from. You have to make sure that the weather data files for the given years exist in the weather path. weather_path : str Path to folder that contains all needed files. If None the default path defined in the config file will be used. out_file_pattern : str or None Name of the results file with a placeholder for the data type e.g. ``average_data_{data_type}.csv``). If None no file will be written. Examples -------- >>> store_average_weather('temp_air', years=[2014, 2013]) # doctest: +SKIP >>> v=store_average_weather('v_wind', years=[2014], ... out_file_pattern=None, keys=[1132101]) >>> float(v.loc[1132101].round(2)) 4.39 """ logging.info("Calculating the average wind speed...") weather_pattern = cfg.get("coastdat", "file_pattern") if weather_path is None: weather_path = cfg.get("paths", "coastdat") # Finding existing weather files. data_files = os.listdir(weather_path) # Possible time range for coastdat data set (reegis: 1998-2014). check = True if years is None: years = range(1948, 2017) check = False used_years = [] for year in years: if weather_pattern.format(year=year) in data_files: used_years.append(year) elif check is True: msg = "File not found".format(weather_pattern.format(year=year)) raise FileNotFoundError(msg) # Loading coastdat-grid as shapely geometries. coastdat_polygons = pd.DataFrame( geometries.load( cfg.get("paths", "geometry"), cfg.get("coastdat", "coastdatgrid_polygon"), )) coastdat_polygons.drop("geometry", axis=1, inplace=True) # Opening all weather files weather = dict() # open hdf files for year in used_years: weather[year] = pd.HDFStore( os.path.join(weather_path, weather_pattern.format(year=year)), mode="r", ) if keys is None: keys = coastdat_polygons.index n = len(list(keys)) logging.info("Remaining: {0}".format(n)) for key in keys: data_type_avg = pd.Series() n -= 1 if n % 100 == 0: logging.info("Remaining: {0}".format(n)) hdf_id = "/A{0}".format(key) for year in used_years: ws = weather[year][hdf_id][data_type] data_type_avg = data_type_avg.append(ws, verify_integrity=True) # calculate the average wind speed for one grid item coastdat_polygons.loc[ key, "{0}_avg".format(data_type)] = data_type_avg.mean() # Close hdf files for year in used_years: weather[year].close() if keys is not None: coastdat_polygons.dropna(inplace=True) # write results to csv file if out_file_pattern is not None: filename = out_file_pattern.format(data_type=data_type) fn = os.path.join(weather_path, filename) logging.info("Average temperature saved to {0}".format(fn)) coastdat_polygons.to_csv(fn) return coastdat_polygons
def test_load_csv(): path = os.path.join(os.path.dirname(__file__), "data") filename = "germany_with_awz.csv" gdf = geometries.load(path, filename) ok_(isinstance(gdf, GeoDataFrame))
def get_feedin_per_region( year, region, name, weather_year=None, windzones=True, subregion=False, pp=None, ): """ Aggregate feed-in time series for the given geometry set. Parameters ---------- year : int region : geopandas.geoDataFrame name : str weather_year : int windzones : bool pp : pd.DataFrame or None subregion : bool Set to True if all region polygons together are a subregion of Germany. This will switch off the buffer in the spatial_join function. Notes ----- The feedin is calculated per region entry (row of the region CSV / GeoDF), the output file will contain columns per region entry and generator set entry. E.g. a file with 10 regions and 2 wind generators will result in 20 different feedin timeseries. Example region file: federalstates_polygon.csv You may want to use geometries.load() to import a region CSV. """ # create and dump reegis basic powerplants table (created from opsd data) fn = powerplants.pp_opsd2reegis() filename = fn.split(os.sep)[-1] path = fn.replace(filename, "") # Add column name "coastdat2" with the id of the coastdat weather cell for # each power plant. geo_path = cfg.get("paths", "geometry") geo_file = cfg.get("coastdat", "coastdatgrid_polygon") gdf = geometries.load(path=geo_path, filename=geo_file) pp = powerplants.add_regions_to_powerplants(gdf, "coastdat2", filename=filename, path=path, pp=pp) # Add a column named with the name parameter, adding the region id to # each power plant pp = powerplants.add_regions_to_powerplants(region, name, filename=filename, path=path, pp=pp, subregion=subregion) # Get only the power plants that are online in the given year. pp = powerplants.get_reegis_powerplants(year, pp=pp) if windzones: windzone_region_fraction(pp, name, year=year, dump=True) # Aggregate feedin time series for each region return aggregate_feedin_by_region(year, pp, name, weather_year=weather_year)
def test_load_wrong_csv(): path = os.path.join(os.path.dirname(__file__), "data") filename = "csv_without_geometry.csv" with assert_raises_regexp( ValueError, "Could not create GeoDataFrame. Missing geometries."): geometries.load(path, filename)
def test_load_error(): path = os.path.join(os.path.dirname(__file__), "data") filename = "germany_with_awz.tiff" with assert_raises_regexp(ValueError, "Cannot load file with a 'tiff' extension."): geometries.load(path, filename)