def test_calc_bounding_box_usa(): turbines = load_turbines() north, west, south, east = calc_bounding_box_usa(turbines) # TODO this might be a very specific test, testing also turbines file... assert "{}".format(north) == "67.839905" assert (west, south, east) == (-172.713074, 16.970871, -64.610001)
def main(): turbines = load_turbines() output_folder = create_folder("power_in_wind", prefix=OUTPUT_DIR) params_calculations = [ { "name": "p_in", "average_wind": False, "height": None, }, { "name": "p_in_avg", "average_wind": True, "height": None, }, { "name": "p_in_avg80", "average_wind": True, "height": REFERENCE_HUB_HEIGHT_M, }, ] results = [] fnames = [] for params in params_calculations: with ProgressBar(): name = params["name"] logging.info(f"Starting calculation for {name}...") wind_speed = load_wind_speed(YEARS, params["height"]) logging.info("Calculating power in wind...") p_in, p_in_monthly = calc_power_in_wind( wind_speed, turbines, average_wind=params["average_wind"], ) results.append(p_in) fnames.append(output_folder / f"{name}.nc") if p_in_monthly is not None: logging.info("Calculating (lazy) monthly time p_in...") results.append(p_in_monthly) fnames.append(output_folder / f"{name}_monthly.nc") logging.info("Starting compute()...") results_computed = [] for result in results: with ProgressBar(): results_computed.append(da.compute(result)[0]) logging.info("Computation done!") for data, fname in zip(results_computed, fnames): logging.info(f"Saving result to NetCDF at {fname}...") data.to_netcdf(fname)
def capacity_growth(): turbines = load_turbines() for year in (2010, 2019): installed_capacity_gw = ( turbines.sel(turbines=turbines.p_year <= year).t_cap.sum().values * 1e-6) write_data_value( f"installed_capacity_gw_{year}", f"{installed_capacity_gw:.0f}", )
def number_of_turbines(): turbines = load_turbines() (turbines.p_year <= 2010).sum().compute() write_data_value( "number-of-turbines-start", f"{(turbines.p_year <= 2010).sum().values:,d}", ) write_data_value( "number-of-turbines-end", f"{(turbines.p_year <= 2019).sum().values:,d}", )
def create_wind_velocity(): turbines = load_turbines() dims = ("time", "latitude", "longitude") north, west, south, east = calc_bounding_box_usa(turbines) longitude = np.arange(west, east, step=0.25, dtype=np.float32) latitude = np.arange(south, north, step=0.25, dtype=np.float32) np.random.seed(42) for year in YEARS: for month in MONTHS: time_ = pd.date_range(f"{year}-{month}-01", periods=4, freq="7d") data = np.ones( (len(time_), len(latitude), len(longitude)), dtype=np.float32, ) wind_velocity = xr.Dataset({ "longitude": longitude, "latitude": latitude, "time": time_, "u100": ( dims, 3 * data + np.random.normal(scale=2.5, size=(len(time_), 1, 1)), ), "v100": (dims, -4 * data), "u10": (dims, data + np.random.normal(scale=0.5, size=(len(time_), 1, 1))), "v10": (dims, -data + np.random.normal(scale=0.5, size=(len(time_), 1, 1))), }) fname = "wind_velocity_usa_{year}-{month:02d}.nc".format( month=month, year=year) path = (create_folder( "wind_velocity_usa_era5", prefix=INPUT_DIR, ) / fname) if op.exists(path): raise RuntimeError( "wind velocity file already exists, won't overwrite, " f"path: {path}") wind_velocity.to_netcdf(path)
def check_turbines(): turbines = load_turbines(replace_nan_values=False) num_turbines = turbines.sizes["turbines"] assert np.isnan(turbines.xlong).sum() == 0 assert np.isnan(turbines.ylat).sum() == 0 # possible asserts: dtype, range from to, min np.diff? assert ( 0.01 < np.isnan(turbines.p_year).sum() / num_turbines < 0.1 ), "wrong number of NaNs in p_year's simulation data"
def specific_power(): turbines = load_turbines() rotor_swept_area = turbines.t_rd**2 / 4 * np.pi specific_power = ((turbines.t_cap * KILO_TO_ONE / rotor_swept_area).groupby(turbines.p_year).mean()) write_data_value( "specific-power-start", f"{specific_power.sel(p_year=2010).values:.0f}", ) write_data_value( "specific-power-end", f"{specific_power.sel(p_year=2019).values:.0f}", )
def missing_commissioning_year(): turbines = load_turbines() turbines_with_nans = load_turbines(replace_nan_values="") write_data_value( "percentage_missing_commissioning_year", f"{nanratio(turbines_with_nans.p_year).values * 100:.1f}", ) missing2010 = (np.isnan(turbines_with_nans.p_year).sum() / (turbines_with_nans.p_year <= 2010).sum()).values write_data_value( "percentage_missing_commissioning_year_2010", f"{missing2010 * 100:.1f}", ) write_data_value( "num_available_decommissioning_year", f"{(~np.isnan(turbines.d_year)).sum().values:,d}", ) write_data_value( "num_decommissioned_turbines", f"{turbines.is_decomissioned.sum().values:,d}", ) lifetime = 25 num_further_old_turbines = ( (turbines.sel(turbines=~turbines.is_decomissioned).p_year < (2019 - lifetime)).sum().values) write_data_value( "num_further_old_turbines", f"{num_further_old_turbines:,d}", ) write_data_value( "missing_ratio_rd_hh", f"{100 * nanratio(turbines_with_nans.t_hh + turbines_with_nans.t_rd).values:.1f}", )
def plot_missing_uswtdb_data(): fig, ax = plt.subplots(1, 1, figsize=FIGSIZE) turbines = load_turbines(replace_nan_values="") is_metadata_missing_hh = np.isnan(turbines.t_hh) is_metadata_missing_rd = np.isnan(turbines.t_rd) is_metadata_missing_cap = np.isnan(turbines.t_cap) num_turbines_per_year = turbines.p_year.groupby(turbines.p_year).count() num_missing_hh_per_year = is_metadata_missing_hh.groupby( turbines.p_year).sum() num_missing_rd_per_year = is_metadata_missing_rd.groupby( turbines.p_year).sum() num_missing_cap_per_year = is_metadata_missing_cap.groupby( turbines.p_year).sum() # note: this assumes that a turbine with installation year x is already operating in year x (100 * num_missing_hh_per_year.cumsum() / num_turbines_per_year.cumsum()).plot.line( label="Hub height", color=TURBINE_COLORS[1], ax=ax, ) (100 * num_missing_rd_per_year.cumsum() / num_turbines_per_year.cumsum()).plot( label="Rotor diameter", color=TURBINE_COLORS[3], ax=ax, ) percent_missing_cap_per_year = (100 * num_missing_cap_per_year.cumsum() / num_turbines_per_year.cumsum()) percent_missing_cap_per_year.plot( label="Capacity", color=TURBINE_COLORS[4], ax=ax, ) for year in (2000, 2010): write_data_value( f"percent_missing_capacity_per_year{year}", f"{percent_missing_cap_per_year.sel(p_year=year).values:.0f}", ) plt.legend() plt.ylabel("Turbines with missing metadata (%)") plt.xlabel("") plt.grid() return fig
def rotor_swept_area_avg(): rotor_swept_area = xr.load_dataarray(OUTPUT_DIR / "turbine-time-series" / "rotor_swept_area_yearly.nc") turbines = load_turbines() time = rotor_swept_area.time calc_rotor_swept_area(turbines, time) is_built = calc_is_built(turbines, time) rotor_swept_area_avg = (calc_rotor_swept_area(turbines, time) / is_built.sum(dim="turbines")).compute() write_data_value( "rotor_swept_area_avg-start", f"{int(rotor_swept_area_avg.sel(time='2010').values.round()):,d}", ) write_data_value( "rotor_swept_area_avg-end", f"{int(rotor_swept_area_avg.sel(time='2019').values.round()):,d}", )
def main(): # API documentation for downloading a subset: # https://confluence.ecmwf.int/display/CKB/Global+data%3A+Download+data+from+ECMWF+for+a+particular+area+and+resolution # https://retostauffer.org/code/Download-ERA5/ download_dir = create_folder("wind_velocity_usa_era5", prefix=INPUT_DIR) setup_logging() turbines = load_turbines() north, west, south, east = calc_bounding_box_usa(turbines) # Format for downloading ERA5: North/West/South/East bounding_box = "{}/{}/{}/{}".format(north, west, south, east) logging.info( "Downloading bounding_box=%s for years=%s and months=%s", bounding_box, YEARS, MONTHS, ) with Pool(processes=NUM_WORKERS) as pool: for year in YEARS: for month in MONTHS: pool.apply_async( download_one_month, ( bounding_box, download_dir, month, year, ), ) pool.close() pool.join()
def plot_growth_of_wind_power(): turbines = load_turbines() generated_energy_gwh_yearly = load_generated_energy_gwh_yearly() fig, ax = plt.subplots(1, 1, figsize=FIGSIZE) per_year = turbines.t_cap.groupby(turbines.p_year) capacity_yearly_gw = per_year.sum(dim="turbines").cumsum() * 1e-6 capacity_yearly_gw = capacity_yearly_gw.isel( p_year=capacity_yearly_gw.p_year >= generated_energy_gwh_yearly.time.dt.year.min()) capacity_yearly_gw.plot( label="Total installed capacity (GW)", ax=ax, marker="o", color="#efc220", ) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.legend() plt.xlabel("Year") plt.ylabel("Capacity (GW)") plt.grid(True) ax2 = ax.twinx() ax2.plot( generated_energy_gwh_yearly.time.dt.year, generated_energy_gwh_yearly * 1e-3, label="Yearly power generation (TWh/year)", marker="o", color="#0d8085", ) plt.ylabel("Power generation (TWh/year)") ax2.legend(loc=1) return fig
from src.config import MONTHS from src.config import YEARS from src.config import REFERENCE_HUB_HEIGHT_M from src.util import create_folder from src.load_data import ( load_turbines, load_generated_energy_gwh, ) from src.load_data import load_wind_velocity from src.calculations import calc_wind_speed_at_turbines from src.logging_config import setup_logging setup_logging() turbines = load_turbines() generated_energy_gwh = load_generated_energy_gwh() output_folder = create_folder("wind_speed") for height in (None, REFERENCE_HUB_HEIGHT_M): logging.info(f"Calculating wind speed at turbines with height={height}...") height_name = "hubheight" if height is None else height for year in YEARS: logging.info(f"year={year}...") wind_velocity = load_wind_velocity(year, MONTHS) with ProgressBar(): wind_speed = calc_wind_speed_at_turbines(wind_velocity, turbines, height)
def test_load_turbines(): turbines = load_data.load_turbines() assert np.isnan(turbines.t_cap).sum() == 0 assert turbines.p_year.min() == 1981 assert turbines.p_year.max() == 2019
def test_load_turbines_with_nans(): turbines_with_nans = load_data.load_turbines(replace_nan_values=False) assert (np.isnan(turbines_with_nans.t_cap)).sum() == 7231