# %% imports import os import pandas as pd import config as cfg from src.utils.data_processing import medea_path # %% settings directory = medea_path( 'data', 'raw', 'TotalCommercialSchedules_12.1.F') # 'ScheduledCommercialExchanges') df_imports = pd.DataFrame(columns=[f'imp_{zn}' for zn in cfg.zones]) df_exports = pd.DataFrame(columns=[f'exp_{zn}' for zn in cfg.zones]) df_X_endo = pd.DataFrame(columns=[ f'{zn}->{zzn}' for zn in cfg.zones for zzn in cfg.zones if zn != zzn ]) for file in os.listdir(directory): filename = os.fsdecode(file) print(filename) if filename.endswith('.csv'): df_flows = pd.read_csv(medea_path('data', 'raw', 'TotalCommercialSchedules_12.1.F', filename), sep='\t', encoding='utf-8') df_flows['DateTime'] = pd.to_datetime(df_flows['DateTime']) # for each zone: sum import and export flows from / to all other zones except the ones included in model # 1) import flows
# general example for reading a GAMS set from the gdx-database to a python dictionary: # dict_set = {rec.keys[0] for rec in db_input['set_name']} # --- # *** read in all parameters to be adjusted *** # general example for reading parameter 'PARAMETER_NAME' defined over 'set_name' to pandas DataFrame df # df = gdx2df(db_input, 'PARAMETER_NAME', ['set_name'], []) # --- # fuel_thermal = ['Biomass', 'Coal', 'Gas', 'Lignite', 'Nuclear', 'Oil'] # %% generate 'dynamic' parameter variations (modifications that constitute the scenarios, i.e. that change each run) # -------------------------------------------------------------------------------------------------------------------- # # ensure that we are in the correct model directory os.chdir(medea_path('projects', PROJECT_NAME, 'opt')) # create empty scenario parameter in GAMS database so that it can be modified subsequently RE_SHARE = df2gdx(db_input, pd.DataFrame(data=[0]), 'RE_SHARE', 'par', 0, 'Minimum share of RE generation') WIND_ON_LIMIT = df2gdx(db_input, pd.DataFrame(data=[0]), 'WIND_ON_LIMIT', 'par', 0, 'max wind_on capacity addition') CO2_BUDGET = df2gdx(db_input, pd.DataFrame(data=[0]), 'CO2_BUDGET', 'par', 0, 'max amount of co2 emitted') CO2_SCENARIO = df2gdx(db_input, pd.DataFrame(data=[0]), 'CO2_SCENARIO', 'par', 0, 'CO2 price scenario') SWITCH_ANCILLARY = df2gdx(db_input, pd.DataFrame(data=[0]), 'SWITCH_ANCILLARY', 'par', 0, 'switch to activate ancillary service demand') # modify scenario parameter and solve medea for each scenario (i.e. for each parameter modification) for campaign in dict_campaigns.keys(): # update campaign dictionary dict_camp = dict_base.copy() dict_camp.update(dict_campaigns[campaign])
# %% imports from datetime import datetime import matplotlib.dates as mdates import matplotlib.pyplot as plt import pandas as pd from src.utils.data_processing import medea_path APATH = medea_path('projects', 'asparagus') RPATH = APATH / 'results' FIGPATH = APATH / 'doc' / 'figures' REFUEL_COLORS = ['#c72321', '#0d8085', '#f0c220', '#595959', '#3b68f9', '#7794dd'] ANNUITY_FACTOR = 0.05827816 FLH_PV = 1003.36 FLH_WIND = 1983.16 idx = pd.IndexSlice # %% read data res = pd.read_csv(RPATH / 'hourly_res.csv', sep=';', decimal=',', index_col=[0, 1, 2, 3, 4], header=[0, 1]) thm = pd.read_csv(RPATH / 'hourly_thermal.csv', sep=';', decimal=',', index_col=[0, 1, 2, 3, 4], header=[0, 1, 2, 3]) s_in = pd.read_csv(RPATH / 'hourly_sin.csv', sep=';', decimal=',', index_col=[0, 1, 2, 3, 4], header=[0, 1]) s_out = pd.read_csv(RPATH / 'hourly_sout.csv', sep=';', decimal=',', index_col=[0, 1, 2, 3, 4], header=[0, 1]) nxp = pd.read_csv(RPATH / 'hourly_export.csv', sep=';', decimal=',', index_col=[0, 1, 2, 3, 4], header=[0, 1]) def to_timeindex(df, startdate, enddate, freq, timeset, firstindex, missing_elements=False): """ Converts a string-number time-index (typically from GAMS output) to a DateTimeIndex compatible with pandas. If missing is True, the conversion can handle missing time elements. However, this comes at the cost of drastically reduced speed. You might want to consider solving the issue in GAMS.
import subprocess import pandas as pd from gams import * import config as cfg from src.templates.settings_template import * from src.utils.data_processing import medea_path from src.utils.gams_io import reset_symbol, gdx2df, df2gdx # -------------------------------------------------------------------------------------------------------------------- # # %% initialize GAMS, GAMS workspace and load model data # -------------------------------------------------------------------------------------------------------------------- # # import base data from gdx ws = GamsWorkspace(system_directory=cfg.GMS_SYS_DIR) db_input = ws.add_database_from_gdx(medea_path('projects', project_name, 'opt', 'medea_main_data.gdx')) # %% read parameters that change in scenarios (and corresponding sets) # -------------------------------------------------------------------------------------------------------------------- # # *** read in all sets over which adjusted parameters are defined *** # general example for reading a GAMS set from the gdx-database to a python dictionary: # dict_set = {rec.keys[0] for rec in db_input['set_name']} # --- # read sets for calibration of power plant efficiencies dict_prd = {rec.keys[0] for rec in db_input['m']} dict_fuel = {rec.keys[0] for rec in db_input['f']} dict_tec = {rec.keys[0] for rec in db_input['i']} # *** read in all parameters to be adjusted *** # general example for reading parameter 'PARAMETER_NAME' defined over 'set_name' to pandas DataFrame df # df = gdx2df(db_input, 'PARAMETER_NAME', ['set_name'], [])
# %% imports import numpy as np import pandas as pd from src.utils.data_processing import medea_path # --------------------------------------------------------------------------- # # %% settings and initializing # --------------------------------------------------------------------------- # years = range(2012, 2020) STATIC_FNAME = medea_path('data', 'processed', 'data_static.xlsx') idx = pd.IndexSlice # --------------------------------------------------------------------------- # # %% functions # --------------------------------------------------------------------------- # # filter active thermal plants def active_thermal_capacity(db_plant, year, dict_country, dict_id): active_plant = db_plant.loc[ (db_plant['UnitOperOnlineDate'] < pd.Timestamp(year, 1, 1)) & (db_plant['UnitOperRetireDate'] > pd.Timestamp(year, 12, 31)) | np.isnat(db_plant['UnitOperRetireDate'])] active_plant = active_plant.loc[(active_plant['MedeaType'] < 60) | (active_plant['MedeaType'] >= 70)] aggregate_thermal_capacity = active_plant.groupby( ['MedeaType', 'PlantCountry'])['UnitNameplate'].sum().to_frame() / 1000 if dict_country: aggregate_thermal_capacity.rename(index=dict_country, columns={'UnitNameplate': 'cap'}, inplace=True)
# %% imports import matplotlib.pyplot as plt import numpy as np import pandas as pd from scipy.signal import savgol_filter import config as cfg from src.utils.data_processing import medea_path # %% settings FNAME = medea_path('data', 'processed', 'medea_regional_timeseries.csv') # %% read data df = pd.read_csv(FNAME, index_col=[0]) df.index = pd.to_datetime(df.index) cols = ['AT-pv-profile', 'AT-wind_on-profile', 'AT-ror-profile'] dfs = df.loc[str(cfg.year), cols] # %% extract trend from hourly time series of PV and wind trend = pd.DataFrame(data=np.nan, columns=cols, index=dfs.index) detrend = pd.DataFrame(data=np.nan, columns=cols, index=dfs.index) savgol_parameter = pd.DataFrame(columns=cols, index=['window_length', 'poly_order']) savgol_parameter.loc['poly_order', :] = 3 savgol_parameter.loc['window_length', 'AT-pv-profile'] = 673 savgol_parameter.loc['window_length', 'AT-wind_on-profile'] = 1161 savgol_parameter.loc['window_length', 'AT-ror-profile'] = 561 for i in [cols[0]]: trend[i] = savgol_filter(dfs[i], savgol_parameter.loc['window_length', i], savgol_parameter.loc['poly_order', i])
import os import pandas as pd import config as cfg from src.utils.data_processing import medea_path directory = medea_path('data', 'raw', 'AggregateFillingRateWaterReservoirs') df_resfill = pd.DataFrame() for file in os.listdir(directory): filename = os.fsdecode(file) print(filename) if filename.endswith('.csv'): # read data df_fill = pd.read_csv(medea_path( 'data', 'raw', 'AggregateFillingRateWaterReservoirs', filename), sep='\t', encoding='utf-16') df_fill.index = pd.DatetimeIndex(df_fill['DateTime']) df_fillreg = pd.DataFrame(columns=cfg.zones) for reg in cfg.zones: df_fillreg[f'{reg}'] = df_fill.loc[ df_fill['MapCode'] == reg, 'StoredEnergy'].drop_duplicates() df_resfill = df_resfill.append(df_fillreg) df_resfill = df_resfill.sort_index() # eliminate data errors for Austrian reservoirs filled below 200000
from logging_config import setup_logging from src.utils.data_processing import medea_path, download_energy_balance, resample_index, heat_yr2day, heat_day2hr # ---------------------------------------------------------------------------- # %% settings # ---------------------------------------------------------------------------- YEARS = range(2012, 2019) setup_logging() # ---------------------------------------------------------------------------- # %% download data from sources # ---------------------------------------------------------------------------- # Austrian energy balance as provided by Statistik Austria download_energy_balance('AT') enbal_at = medea_path('data', 'raw', 'enbal_AT.xlsx') ht_enduse_at = pd.read_excel(enbal_at, sheet_name='Fernwärme', header=[438], index_col=[0], nrows=24, na_values=['-']).astype('float') # German energy balance as provided by AGEB download_energy_balance('DE') ht_enduse_de = pd.DataFrame() for yr in [x - 2000 for x in YEARS]: # mix of xlsx and xls files... enebal_de = medea_path('data', 'raw', f'enbal_DE_20{yr}.xlsx') if not os.path.exists(enebal_de): # check if xls file exists...
from datetime import datetime import numpy as np import pandas as pd import config as cfg from src.utils.data_processing import download_file, medea_path, download_energy_balance, process_energy_balance idx = pd.IndexSlice eta_hydro_storage = 0.9 # ====================================================================================================================== # %% download and process opsd time series url_opsd = 'https://data.open-power-system-data.org/time_series/latest/time_series_60min_singleindex.csv' opsd_file = medea_path('data', 'raw', 'opsd_time_series_60min.csv') download_file(url_opsd, opsd_file) ts_opsd = pd.read_csv(opsd_file) # create medea time series dataframe ts_medea = ts_opsd[ ['utc_timestamp', 'cet_cest_timestamp', 'AT_load_actual_entsoe_transparency', 'AT_solar_generation_actual', 'AT_wind_onshore_generation_actual', 'DE_load_actual_entsoe_transparency', 'DE_solar_generation_actual', 'DE_solar_capacity', 'DE_wind_onshore_generation_actual', 'DE_wind_onshore_capacity', 'DE_wind_offshore_generation_actual', 'DE_wind_offshore_capacity', 'DE_LU_price_day_ahead']] del ts_opsd ts_medea = ts_medea.copy() ts_medea.set_index(pd.DatetimeIndex(ts_medea['utc_timestamp']), inplace=True) ts_medea.drop('utc_timestamp', axis=1, inplace=True) ts_medea.rename(columns={'AT_load_actual_entsoe_transparency': 'AT-power-load',
import logging import pandas as pd import yaml from logging_config import setup_logging from src.utils.data_processing import download_file, medea_path setup_logging() credentials = yaml.load(open(medea_path('credentials.yml')), Loader=yaml.SafeLoader) api_key = credentials['quandl']['apikey'] # ====================================================================================================================== # IMF commodity price data url_imf = 'https://www.imf.org/~/media/Files/Research/CommodityPrices/Monthly/ExternalData.ashx' imf_file = medea_path('data', 'raw', 'imf_price_data.xlsx') # ECB foreign exchange data url_fx = 'https://sdw.ecb.europa.eu/quickviewexport.do?SERIES_KEY=120.EXR.D.USD.EUR.SP00.A&type=xls' fx_file = medea_path('data', 'raw', 'ecb_fx_data.csv') logging.info(f'downloading monthly commodity prices from {url_imf}') download_file(url_imf, imf_file) df_imf = pd.read_excel(imf_file, index_col=[0], skiprows=[1, 2, 3]) df_imf.index = pd.to_datetime(df_imf.index, format='%YM%m') logging.info(f'downloading exchange rates from {url_fx}') download_file(url_fx, fx_file) df_fx = pd.read_csv(fx_file, header=[0], index_col=[0], skiprows=[0, 2, 3, 4], na_values=['-']).astype('float') df_fx.index = pd.to_datetime(df_fx.index, format='%Y-%m-%d')
import os from itertools import compress import pysftp import yaml from logging_config import setup_logging from src.utils.data_processing import medea_path # TODO: check file size and download larger files from ftp # TODO: download zipped csv and unpack after download setup_logging() SERVER = 'sftp-transparency.entsoe.eu' RAW_DATA_DIR = medea_path('data', 'raw') CATEGORIES = [ 'ActualGenerationOutputPerUnit', 'AggregatedGenerationPerType', 'AggregateFillingRateWaterReservoirs', 'ScheduledCommercialExchanges' ] credentials = yaml.load(open(medea_path('credentials.yml')), Loader=yaml.SafeLoader) USER = credentials['entsoe']['user'] PWD = credentials['entsoe']['pwd'] # ====================================================================================================================== # %% sFTP data download # ----------------------------------------------------------------------------------------------------------------------
import config as cfg from logging_config import setup_logging from src.utils.data_processing import download_era_temp, days_in_year, medea_path setup_logging() # data is generated using Copernicus Climate Change Service Information # Dataset citation: Copernicus Climate Change Service (C3S) (2019): C3S ERA5-Land reanalysis. # Copernicus Climate Change Service, _date of access_. https://cds.climate.copernicus.eu/cdsapp#!/home # ====================================================================================================================== # %% download era5 temperature (2 m) data YEARS = range(2012, 2019, 1) COUNTRY = {'AT': 'Austria', 'DE': 'Germany'} ERA_DIR = medea_path('data', 'raw', 'era5') PPLANT_DB = medea_path('data', 'processed', 'power_plant_db.xlsx') # format for downloading ERA5: north/west/south/east BBOX_CWE = [59.8612, -10.8043, 35.8443, 30.3285] for year in YEARS: filename = os.path.join(ERA_DIR, f'temperature_europe_{year}.nc') download_era_temp(filename, year, BBOX_CWE) # ====================================================================================================================== # %% calculate weighted mean temperatures for each country # get coordinates of co-gen plants db_plants = pd.read_excel(PPLANT_DB) # setup results df
# %% imports import numpy as np import pandas as pd import config as cfg from src.utils.data_processing import hours_in_year, medea_path # --------------------------------------------------------------------------- # # %% settings and initializing # --------------------------------------------------------------------------- # STATIC_FNAME = medea_path('data', 'processed', 'data_static.xlsx') idx = pd.IndexSlice # --------------------------------------------------------------------------- # # %% read in data # --------------------------------------------------------------------------- # static_data = { 'CAP_R': pd.read_excel(STATIC_FNAME, 'INITIAL_CAP_R', header=[0], index_col=[0, 1]), 'CAPCOST_R': pd.read_excel(STATIC_FNAME, 'CAPITALCOST_R', header=[0], index_col=[0, 1]), 'potentials': pd.read_excel(STATIC_FNAME, 'potentials', header=[0], index_col=[0]), 'tec': pd.read_excel(STATIC_FNAME, 'parameters_G'), 'feasops': pd.read_excel(STATIC_FNAME, 'FEASIBLE_INPUT-OUTPUT'), 'cost_transport': pd.read_excel(STATIC_FNAME, 'COST_TRANSPORT', header=[0], index_col=[0]),
ws = GamsWorkspace(system_directory=cfg.GMS_SYS_DIR) # specify GAMS symbols to be read from output .gdx-file along with the dimensions of the corresponding output DataFrame # example: generate pandas DataFrame holding system cost and co2 emissions for each CO2 price scenario symbols_to_read = { 'cost_system': ([], ['z']), 'emission_co2': ([], ['z']) } # iterate over all output .gdx-files df_results = pd.DataFrame() for price_co2 in range_co2price: # generate name of .gdx-file to read filename = f'medea_out_{output_naming}.gdx'.format(price_co2) # create database of .gdx-data db_output = ws.add_database_from_gdx(medea_path('projects', project_name, 'opt', filename)) # read symbols from database into DataFrames with symbol name as index and CO2-price/zone as multiindex-columns df_i = pd.DataFrame(columns=cfg.zones) for symbol, sets in symbols_to_read.items(): df = gdx2df(db_output, symbol, sets[0], sets[1]) # set index to symbol name if df.index.any() == 'Value': df.index = [symbol] df_i = df_i.append(df) # set column names to multiindex of CO2-price and market zone df_i.columns = pd.MultiIndex.from_product([[f'PCO2_{price_co2}'], df_i.columns]) df_results = pd.concat([df_results, df_i], axis=1) # write data to disk df_results = df_results.replace(False, np.nan)
# %% imports import pandas as pd import config as cfg from src.utils.data_processing import hours_in_year, medea_path # --------------------------------------------------------------------------- # # %% settings and initializing # --------------------------------------------------------------------------- # STATIC_FNAME = medea_path('data', 'processed', 'data_static.xlsx') idx = pd.IndexSlice # --------------------------------------------------------------------------- # # %% Read Data plant_data = { 'technology': pd.read_excel(STATIC_FNAME, 'Technologies', header=[2], index_col=[2]).dropna(axis=0, how='all'), 'chp': pd.read_excel(STATIC_FNAME, 'FEASIBLE_INPUT-OUTPUT', header=[0], index_col=[0, 1, 2]), 'installed': pd.read_excel(STATIC_FNAME, 'Capacities', header=[0], index_col=[0, 1, 2], skiprows=[0, 1, 2]), 'CAP_X': pd.read_excel(STATIC_FNAME, 'ATC', index_col=[0]),