Python medea_path Examples, src.utils.data_processing.medea_path Python Examples

Example #1

0

Show file

# %% imports
import os

import pandas as pd

import config as cfg
from src.utils.data_processing import medea_path

# %% settings
directory = medea_path(
    'data', 'raw',
    'TotalCommercialSchedules_12.1.F')  # 'ScheduledCommercialExchanges')

df_imports = pd.DataFrame(columns=[f'imp_{zn}' for zn in cfg.zones])
df_exports = pd.DataFrame(columns=[f'exp_{zn}' for zn in cfg.zones])
df_X_endo = pd.DataFrame(columns=[
    f'{zn}->{zzn}' for zn in cfg.zones for zzn in cfg.zones if zn != zzn
])

for file in os.listdir(directory):
    filename = os.fsdecode(file)
    print(filename)
    if filename.endswith('.csv'):
        df_flows = pd.read_csv(medea_path('data', 'raw',
                                          'TotalCommercialSchedules_12.1.F',
                                          filename),
                               sep='\t',
                               encoding='utf-8')
        df_flows['DateTime'] = pd.to_datetime(df_flows['DateTime'])
        # for each zone: sum import and export flows from / to all other zones except the ones included in model
        # 1) import flows

Example #2

0

Show file

# general example for reading a GAMS set from the gdx-database to a python dictionary:
# dict_set = {rec.keys[0] for rec in db_input['set_name']}
# ---

# *** read in all parameters to be adjusted ***
# general example for reading parameter 'PARAMETER_NAME' defined over 'set_name' to pandas DataFrame df
# df = gdx2df(db_input, 'PARAMETER_NAME', ['set_name'], [])
# ---

# fuel_thermal = ['Biomass', 'Coal', 'Gas', 'Lignite', 'Nuclear', 'Oil']


# %% generate 'dynamic' parameter variations (modifications that constitute the scenarios, i.e. that change each run)
# -------------------------------------------------------------------------------------------------------------------- #
# ensure that we are in the correct model directory
os.chdir(medea_path('projects', PROJECT_NAME, 'opt'))

# create empty scenario parameter in GAMS database so that it can be modified subsequently
RE_SHARE = df2gdx(db_input, pd.DataFrame(data=[0]), 'RE_SHARE', 'par', 0, 'Minimum share of RE generation')
WIND_ON_LIMIT = df2gdx(db_input, pd.DataFrame(data=[0]), 'WIND_ON_LIMIT', 'par', 0, 'max wind_on capacity addition')
CO2_BUDGET = df2gdx(db_input, pd.DataFrame(data=[0]), 'CO2_BUDGET', 'par', 0, 'max amount of co2 emitted')
CO2_SCENARIO = df2gdx(db_input, pd.DataFrame(data=[0]), 'CO2_SCENARIO', 'par', 0, 'CO2 price scenario')
SWITCH_ANCILLARY = df2gdx(db_input, pd.DataFrame(data=[0]), 'SWITCH_ANCILLARY', 'par', 0,
                          'switch to activate ancillary service demand')

# modify scenario parameter and solve medea for each scenario (i.e. for each parameter modification)

for campaign in dict_campaigns.keys():
    # update campaign dictionary
    dict_camp = dict_base.copy()
    dict_camp.update(dict_campaigns[campaign])

Example #3

0

Show file

File: monthly_dispatch.py Project: gallauneru/medea

# %% imports
from datetime import datetime

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd

from src.utils.data_processing import medea_path

APATH = medea_path('projects', 'asparagus')
RPATH = APATH / 'results'
FIGPATH = APATH / 'doc' / 'figures'
REFUEL_COLORS = ['#c72321', '#0d8085', '#f0c220', '#595959', '#3b68f9', '#7794dd']
ANNUITY_FACTOR = 0.05827816
FLH_PV = 1003.36
FLH_WIND = 1983.16
idx = pd.IndexSlice

# %% read data
res = pd.read_csv(RPATH / 'hourly_res.csv', sep=';', decimal=',', index_col=[0, 1, 2, 3, 4], header=[0, 1])
thm = pd.read_csv(RPATH / 'hourly_thermal.csv', sep=';', decimal=',', index_col=[0, 1, 2, 3, 4], header=[0, 1, 2, 3])
s_in = pd.read_csv(RPATH / 'hourly_sin.csv', sep=';', decimal=',', index_col=[0, 1, 2, 3, 4], header=[0, 1])
s_out = pd.read_csv(RPATH / 'hourly_sout.csv', sep=';', decimal=',', index_col=[0, 1, 2, 3, 4], header=[0, 1])
nxp = pd.read_csv(RPATH / 'hourly_export.csv', sep=';', decimal=',', index_col=[0, 1, 2, 3, 4], header=[0, 1])


def to_timeindex(df, startdate, enddate, freq, timeset, firstindex, missing_elements=False):
    """
    Converts a string-number time-index (typically from GAMS output) to a DateTimeIndex compatible with pandas.
    If missing is True, the conversion can handle missing time elements. However, this comes at the cost of drastically
    reduced speed. You might want to consider solving the issue in GAMS.

Example #4

0

Show file

import subprocess

import pandas as pd
from gams import *

import config as cfg
from src.templates.settings_template import *
from src.utils.data_processing import medea_path
from src.utils.gams_io import reset_symbol, gdx2df, df2gdx

# -------------------------------------------------------------------------------------------------------------------- #
# %% initialize GAMS, GAMS workspace and load model data
# -------------------------------------------------------------------------------------------------------------------- #
# import base data from gdx
ws = GamsWorkspace(system_directory=cfg.GMS_SYS_DIR)
db_input = ws.add_database_from_gdx(medea_path('projects', project_name, 'opt', 'medea_main_data.gdx'))

# %% read parameters that change in scenarios (and corresponding sets)
# -------------------------------------------------------------------------------------------------------------------- #
# *** read in all sets over which adjusted parameters are defined ***
# general example for reading a GAMS set from the gdx-database to a python dictionary:
# dict_set = {rec.keys[0] for rec in db_input['set_name']}
# ---
# read sets for calibration of power plant efficiencies
dict_prd = {rec.keys[0] for rec in db_input['m']}
dict_fuel = {rec.keys[0] for rec in db_input['f']}
dict_tec = {rec.keys[0] for rec in db_input['i']}

# *** read in all parameters to be adjusted ***
# general example for reading parameter 'PARAMETER_NAME' defined over 'set_name' to pandas DataFrame df
# df = gdx2df(db_input, 'PARAMETER_NAME', ['set_name'], [])

Example #5

0

Show file

# %% imports
import numpy as np
import pandas as pd

from src.utils.data_processing import medea_path

# --------------------------------------------------------------------------- #
# %% settings and initializing
# --------------------------------------------------------------------------- #
years = range(2012, 2020)
STATIC_FNAME = medea_path('data', 'processed', 'data_static.xlsx')
idx = pd.IndexSlice


# --------------------------------------------------------------------------- #
# %% functions
# --------------------------------------------------------------------------- #
# filter active thermal plants
def active_thermal_capacity(db_plant, year, dict_country, dict_id):
    active_plant = db_plant.loc[
        (db_plant['UnitOperOnlineDate'] < pd.Timestamp(year, 1, 1)) &
        (db_plant['UnitOperRetireDate'] > pd.Timestamp(year, 12, 31))
        | np.isnat(db_plant['UnitOperRetireDate'])]
    active_plant = active_plant.loc[(active_plant['MedeaType'] < 60) |
                                    (active_plant['MedeaType'] >= 70)]
    aggregate_thermal_capacity = active_plant.groupby(
        ['MedeaType', 'PlantCountry'])['UnitNameplate'].sum().to_frame() / 1000
    if dict_country:
        aggregate_thermal_capacity.rename(index=dict_country,
                                          columns={'UnitNameplate': 'cap'},
                                          inplace=True)

Example #6

0

Show file

# %% imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.signal import savgol_filter

import config as cfg
from src.utils.data_processing import medea_path

# %% settings
FNAME = medea_path('data', 'processed', 'medea_regional_timeseries.csv')

# %% read data
df = pd.read_csv(FNAME, index_col=[0])
df.index = pd.to_datetime(df.index)
cols = ['AT-pv-profile', 'AT-wind_on-profile', 'AT-ror-profile']
dfs = df.loc[str(cfg.year), cols]

# %% extract trend from hourly time series of PV and wind
trend = pd.DataFrame(data=np.nan, columns=cols, index=dfs.index)
detrend = pd.DataFrame(data=np.nan, columns=cols, index=dfs.index)
savgol_parameter = pd.DataFrame(columns=cols,
                                index=['window_length', 'poly_order'])
savgol_parameter.loc['poly_order', :] = 3
savgol_parameter.loc['window_length', 'AT-pv-profile'] = 673
savgol_parameter.loc['window_length', 'AT-wind_on-profile'] = 1161
savgol_parameter.loc['window_length', 'AT-ror-profile'] = 561

for i in [cols[0]]:
    trend[i] = savgol_filter(dfs[i], savgol_parameter.loc['window_length', i],
                             savgol_parameter.loc['poly_order', i])

Example #7

0

Show file

import os

import pandas as pd

import config as cfg
from src.utils.data_processing import medea_path

directory = medea_path('data', 'raw', 'AggregateFillingRateWaterReservoirs')

df_resfill = pd.DataFrame()

for file in os.listdir(directory):
    filename = os.fsdecode(file)
    print(filename)
    if filename.endswith('.csv'):
        # read data
        df_fill = pd.read_csv(medea_path(
            'data', 'raw', 'AggregateFillingRateWaterReservoirs', filename),
                              sep='\t',
                              encoding='utf-16')
        df_fill.index = pd.DatetimeIndex(df_fill['DateTime'])
        df_fillreg = pd.DataFrame(columns=cfg.zones)
        for reg in cfg.zones:
            df_fillreg[f'{reg}'] = df_fill.loc[
                df_fill['MapCode'] == reg, 'StoredEnergy'].drop_duplicates()

        df_resfill = df_resfill.append(df_fillreg)

df_resfill = df_resfill.sort_index()

# eliminate data errors for Austrian reservoirs filled below 200000

Example #8

0

Show file

from logging_config import setup_logging
from src.utils.data_processing import medea_path, download_energy_balance, resample_index, heat_yr2day, heat_day2hr

# ----------------------------------------------------------------------------
# %% settings
# ----------------------------------------------------------------------------
YEARS = range(2012, 2019)

setup_logging()

# ----------------------------------------------------------------------------
# %% download data from sources
# ----------------------------------------------------------------------------
# Austrian energy balance as provided by Statistik Austria
download_energy_balance('AT')
enbal_at = medea_path('data', 'raw', 'enbal_AT.xlsx')
ht_enduse_at = pd.read_excel(enbal_at,
                             sheet_name='Fernwärme',
                             header=[438],
                             index_col=[0],
                             nrows=24,
                             na_values=['-']).astype('float')

# German energy balance as provided by AGEB
download_energy_balance('DE')
ht_enduse_de = pd.DataFrame()
for yr in [x - 2000 for x in YEARS]:
    # mix of xlsx and xls files...
    enebal_de = medea_path('data', 'raw', f'enbal_DE_20{yr}.xlsx')
    if not os.path.exists(enebal_de):
        # check if xls file exists...

Example #9

0

Show file

from datetime import datetime

import numpy as np
import pandas as pd

import config as cfg
from src.utils.data_processing import download_file, medea_path, download_energy_balance, process_energy_balance

idx = pd.IndexSlice

eta_hydro_storage = 0.9
# ======================================================================================================================
# %% download and process opsd time series

url_opsd = 'https://data.open-power-system-data.org/time_series/latest/time_series_60min_singleindex.csv'
opsd_file = medea_path('data', 'raw', 'opsd_time_series_60min.csv')
download_file(url_opsd, opsd_file)
ts_opsd = pd.read_csv(opsd_file)

# create medea time series dataframe
ts_medea = ts_opsd[
    ['utc_timestamp', 'cet_cest_timestamp', 'AT_load_actual_entsoe_transparency', 'AT_solar_generation_actual',
     'AT_wind_onshore_generation_actual', 'DE_load_actual_entsoe_transparency',
     'DE_solar_generation_actual', 'DE_solar_capacity', 'DE_wind_onshore_generation_actual', 'DE_wind_onshore_capacity',
     'DE_wind_offshore_generation_actual', 'DE_wind_offshore_capacity', 'DE_LU_price_day_ahead']]
del ts_opsd

ts_medea = ts_medea.copy()
ts_medea.set_index(pd.DatetimeIndex(ts_medea['utc_timestamp']), inplace=True)
ts_medea.drop('utc_timestamp', axis=1, inplace=True)
ts_medea.rename(columns={'AT_load_actual_entsoe_transparency': 'AT-power-load',

Example #10

0

Show file

import logging

import pandas as pd
import yaml

from logging_config import setup_logging
from src.utils.data_processing import download_file, medea_path

setup_logging()

credentials = yaml.load(open(medea_path('credentials.yml')), Loader=yaml.SafeLoader)
api_key = credentials['quandl']['apikey']

# ======================================================================================================================
# IMF commodity price data
url_imf = 'https://www.imf.org/~/media/Files/Research/CommodityPrices/Monthly/ExternalData.ashx'
imf_file = medea_path('data', 'raw', 'imf_price_data.xlsx')
# ECB foreign exchange data
url_fx = 'https://sdw.ecb.europa.eu/quickviewexport.do?SERIES_KEY=120.EXR.D.USD.EUR.SP00.A&type=xls'
fx_file = medea_path('data', 'raw', 'ecb_fx_data.csv')

logging.info(f'downloading monthly commodity prices from {url_imf}')
download_file(url_imf, imf_file)
df_imf = pd.read_excel(imf_file, index_col=[0], skiprows=[1, 2, 3])
df_imf.index = pd.to_datetime(df_imf.index, format='%YM%m')

logging.info(f'downloading exchange rates from {url_fx}')
download_file(url_fx, fx_file)
df_fx = pd.read_csv(fx_file, header=[0], index_col=[0], skiprows=[0, 2, 3, 4], na_values=['-']).astype('float')
df_fx.index = pd.to_datetime(df_fx.index, format='%Y-%m-%d')

Example #11

0

Show file

import os
from itertools import compress

import pysftp
import yaml

from logging_config import setup_logging
from src.utils.data_processing import medea_path

# TODO: check file size and download larger files from ftp
# TODO: download zipped csv and unpack after download

setup_logging()

SERVER = 'sftp-transparency.entsoe.eu'
RAW_DATA_DIR = medea_path('data', 'raw')

CATEGORIES = [
    'ActualGenerationOutputPerUnit', 'AggregatedGenerationPerType',
    'AggregateFillingRateWaterReservoirs', 'ScheduledCommercialExchanges'
]

credentials = yaml.load(open(medea_path('credentials.yml')),
                        Loader=yaml.SafeLoader)
USER = credentials['entsoe']['user']
PWD = credentials['entsoe']['pwd']


# ======================================================================================================================
# %% sFTP data download
# ----------------------------------------------------------------------------------------------------------------------

Example #12

0

Show file

import config as cfg
from logging_config import setup_logging
from src.utils.data_processing import download_era_temp, days_in_year, medea_path

setup_logging()

# data is generated using Copernicus Climate Change Service Information
# Dataset citation: Copernicus Climate Change Service (C3S) (2019): C3S ERA5-Land reanalysis.
# Copernicus Climate Change Service, _date of access_. https://cds.climate.copernicus.eu/cdsapp#!/home

# ======================================================================================================================
# %% download era5 temperature (2 m) data

YEARS = range(2012, 2019, 1)
COUNTRY = {'AT': 'Austria', 'DE': 'Germany'}
ERA_DIR = medea_path('data', 'raw', 'era5')
PPLANT_DB = medea_path('data', 'processed', 'power_plant_db.xlsx')

# format for downloading ERA5: north/west/south/east
BBOX_CWE = [59.8612, -10.8043, 35.8443, 30.3285]

for year in YEARS:
    filename = os.path.join(ERA_DIR, f'temperature_europe_{year}.nc')
    download_era_temp(filename, year, BBOX_CWE)

# ======================================================================================================================
# %% calculate weighted mean temperatures for each country
# get coordinates of co-gen plants
db_plants = pd.read_excel(PPLANT_DB)

# setup results df

Example #13

0

Show file

File: prepare_data.py Project: gallauneru/medea

# %% imports

import numpy as np
import pandas as pd

import config as cfg
from src.utils.data_processing import hours_in_year, medea_path

# --------------------------------------------------------------------------- #
# %% settings and initializing
# --------------------------------------------------------------------------- #
STATIC_FNAME = medea_path('data', 'processed', 'data_static.xlsx')
idx = pd.IndexSlice

# --------------------------------------------------------------------------- #
# %% read in data
# --------------------------------------------------------------------------- #

static_data = {
    'CAP_R':
    pd.read_excel(STATIC_FNAME, 'INITIAL_CAP_R', header=[0], index_col=[0, 1]),
    'CAPCOST_R':
    pd.read_excel(STATIC_FNAME, 'CAPITALCOST_R', header=[0], index_col=[0, 1]),
    'potentials':
    pd.read_excel(STATIC_FNAME, 'potentials', header=[0], index_col=[0]),
    'tec':
    pd.read_excel(STATIC_FNAME, 'parameters_G'),
    'feasops':
    pd.read_excel(STATIC_FNAME, 'FEASIBLE_INPUT-OUTPUT'),
    'cost_transport':
    pd.read_excel(STATIC_FNAME, 'COST_TRANSPORT', header=[0], index_col=[0]),

Example #14

0

Show file

ws = GamsWorkspace(system_directory=cfg.GMS_SYS_DIR)

# specify GAMS symbols to be read from output .gdx-file along with the dimensions of the corresponding output DataFrame
# example: generate pandas DataFrame holding system cost and co2 emissions for each CO2 price scenario
symbols_to_read = {
    'cost_system': ([], ['z']),
    'emission_co2': ([], ['z'])
}

# iterate over all output .gdx-files
df_results = pd.DataFrame()
for price_co2 in range_co2price:
    # generate name of .gdx-file to read
    filename = f'medea_out_{output_naming}.gdx'.format(price_co2)
    # create database of .gdx-data
    db_output = ws.add_database_from_gdx(medea_path('projects', project_name, 'opt', filename))

    # read symbols from database into DataFrames with symbol name as index and CO2-price/zone as multiindex-columns
    df_i = pd.DataFrame(columns=cfg.zones)
    for symbol, sets in symbols_to_read.items():
        df = gdx2df(db_output, symbol, sets[0], sets[1])
        # set index to symbol name
        if df.index.any() == 'Value':
            df.index = [symbol]
        df_i = df_i.append(df)
        # set column names to multiindex of CO2-price and market zone
        df_i.columns = pd.MultiIndex.from_product([[f'PCO2_{price_co2}'], df_i.columns])
    df_results = pd.concat([df_results, df_i], axis=1)

# write data to disk
df_results = df_results.replace(False, np.nan)

Example #15

0

Show file

File: preprocess_data.py Project: gallauneru/medea

# %% imports
import pandas as pd

import config as cfg
from src.utils.data_processing import hours_in_year, medea_path

# --------------------------------------------------------------------------- #
# %% settings and initializing
# --------------------------------------------------------------------------- #
STATIC_FNAME = medea_path('data', 'processed', 'data_static.xlsx')
idx = pd.IndexSlice

# --------------------------------------------------------------------------- #
# %% Read Data
plant_data = {
    'technology':
    pd.read_excel(STATIC_FNAME, 'Technologies', header=[2],
                  index_col=[2]).dropna(axis=0, how='all'),
    'chp':
    pd.read_excel(STATIC_FNAME,
                  'FEASIBLE_INPUT-OUTPUT',
                  header=[0],
                  index_col=[0, 1, 2]),
    'installed':
    pd.read_excel(STATIC_FNAME,
                  'Capacities',
                  header=[0],
                  index_col=[0, 1, 2],
                  skiprows=[0, 1, 2]),
    'CAP_X':
    pd.read_excel(STATIC_FNAME, 'ATC', index_col=[0]),