Ejemplo n.º 1
0
    def build_data(self,
                   use_ex_cap: bool,
                   min_cap_pot: List[float] = None,
                   compute_load: bool = True,
                   regions_shapes: pd.DataFrame = None):
        """Preprocess data.

        Parameters:
        -----------
        use_ex_cap: bool
            Whether to compute or not existing capacity and use it in optimization.
        min_cap_pot: List[float] (default: None)
            List of thresholds per technology. Points with capacity potential under this threshold will be removed.
        """

        # TODO: this function needs to take as argument a vector data specifying which data it must compute
        # Compute total load (in GWh) for each region
        load_df = pd.DataFrame(0., index=self.timestamps, columns=self.regions)
        if compute_load:
            load_df = get_load(timestamps=self.timestamps,
                               regions=self.regions,
                               missing_data='interpolate')

        # Get shape of regions and list of subregions
        onshore_technologies = [
            get_config_values(tech, ["onshore"]) for tech in self.technologies
        ]
        if regions_shapes is None:
            regions_shapes = pd.DataFrame(columns=["onshore", "offshore"],
                                          index=self.regions)
            all_subregions = []
            for region in self.regions:
                subregions = get_subregions(region)
                all_subregions.extend(subregions)
                shapes = get_shapes(subregions, save=True)
                if any(onshore_technologies):
                    regions_shapes.loc[region, "onshore"] = unary_union(
                        shapes[~shapes['offshore']]['geometry'])
                if not all(onshore_technologies):
                    regions_shapes.loc[region, "offshore"] = unary_union(
                        shapes[shapes['offshore']]['geometry'])
        else:
            all_subregions = self.regions

        # Divide the union of all regions shapes into grid cells of a given spatial resolution
        # TODO: this is shitty because you cannot add different technologies in separate regions
        grid_cells_ds = get_grid_cells(self.technologies, self.spatial_res,
                                       regions_shapes["onshore"].dropna(),
                                       regions_shapes["offshore"].dropna())

        # Compute capacities potential
        tech_config = get_config_dict(self.technologies,
                                      ['filters', 'power_density'])
        cap_potential_ds = pd.Series(index=grid_cells_ds.index)
        for tech in self.technologies:
            cap_potential_ds[tech] = \
                get_capacity_potential_for_shapes(grid_cells_ds[tech].values, tech_config[tech]["filters"],
                                                  tech_config[tech]["power_density"])

        # Compute legacy capacity
        existing_cap_ds = pd.Series(0., index=cap_potential_ds.index)
        if use_ex_cap:
            for tech in self.technologies:
                tech_existing_cap_ds = \
                    get_legacy_capacity_in_regions(tech, grid_cells_ds.loc[tech].reset_index(drop=True),
                                                   all_subregions, raise_error=False)
                existing_cap_ds[tech] = tech_existing_cap_ds.values

        # Update capacity potential if existing capacity is bigger
        underestimated_capacity_indexes = existing_cap_ds > cap_potential_ds
        cap_potential_ds[underestimated_capacity_indexes] = existing_cap_ds[
            underestimated_capacity_indexes]

        # Remove sites that have a potential capacity under the desired value or equal to 0
        if min_cap_pot is None:
            min_cap_pot = [0] * len(self.technologies)
        assert len(min_cap_pot) == len(self.technologies), \
            "Error: If you specify threshold on capacity potentials, you need to specify it for each technology."
        min_cap_pot_dict = dict(zip(self.technologies, min_cap_pot))
        sites_to_drop = pd.DataFrame(cap_potential_ds).apply(
            lambda x: x[0] < min_cap_pot_dict[x.name[0]] or x[0] == 0, axis=1)
        # Don't drop sites with existing capacity
        # TODO: this is probably a shitty way to do it
        sites_to_drop = pd.DataFrame(sites_to_drop).apply(
            lambda x: (existing_cap_ds[x.name] == 0 and x[0]), axis=1)
        cap_potential_ds = cap_potential_ds[~sites_to_drop]
        existing_cap_ds = existing_cap_ds[~sites_to_drop]
        grid_cells_ds = grid_cells_ds[~sites_to_drop]

        # Compute capacity factors for each site
        tech_points_dict = {}
        techs = set(grid_cells_ds.index.get_level_values(0))
        for tech in techs:
            tech_points_dict[tech] = list(grid_cells_ds[tech].index)
        cap_factor_df = compute_capacity_factors(tech_points_dict,
                                                 self.spatial_res,
                                                 self.timestamps)

        # Associating coordinates to regions
        tech_points_regions_ds = pd.Series(index=grid_cells_ds.index)
        sites_index = tech_points_regions_ds.index
        for tech in set(sites_index.get_level_values(0)):
            on_off = 'onshore' if get_config_values(
                tech, ['onshore']) else 'offshore'
            tech_sites_index = sites_index[sites_index.get_level_values(0) ==
                                           tech]
            points = list(
                zip(tech_sites_index.get_level_values(1),
                    tech_sites_index.get_level_values(2)))
            tech_points_regions_ds[tech] = match_points_to_regions(
                points, regions_shapes[on_off].dropna()).values

        cap_credit_ds = compute_capacity_credit_from_potential(
            load_df, cap_factor_df, tech_points_regions_ds)

        # Save all data in object
        self.use_ex_cap = use_ex_cap
        self.min_cap_pot_dict = min_cap_pot_dict
        self.tech_points_tuples = grid_cells_ds.index.values
        self.tech_points_dict = tech_points_dict
        self.initial_sites_ds = grid_cells_ds
        self.tech_points_regions_ds = tech_points_regions_ds
        self.data_dict["load"] = load_df
        self.data_dict["cap_potential_ds"] = cap_potential_ds.round(3)
        self.data_dict["existing_cap_ds"] = existing_cap_ds.round(3)
        self.data_dict["cap_factor_df"] = cap_factor_df.round(3)
        self.data_dict["capacity_credit_ds"] = cap_credit_ds.round(3)
Ejemplo n.º 2
0
def get_capacity_potential_at_points(tech_points_dict: Dict[str, List[Tuple[float, float]]],
                                     spatial_resolution: float, countries: List[str],
                                     existing_capacity_ds: pd.Series = None) -> pd.Series:
    """
    Compute the potential capacity at a series of points for different technologies.

    Parameters
    ----------
    tech_points_dict : Dict[str, Dict[str, List[Tuple[float, float]]]
        Dictionary associating to each tech a list of points.
    spatial_resolution : float
        Spatial resolution of the points.
    countries: List[str]
        List of ISO codes of countries in which the points are situated
    existing_capacity_ds: pd.Series (default: None)
        Data series given for each tuple of (tech, point) the existing capacity.

    Returns
    -------
    capacity_potential_ds : pd.Series
        Gives for each pair of technology - point the associated capacity potential in GW
    """

    accepted_techs = ['wind_onshore', 'wind_offshore', 'wind_floating', 'pv_utility', 'pv_residential']
    for tech, points in tech_points_dict.items():
        assert tech in accepted_techs, f"Error: tech {tech} is not in {accepted_techs}"
        assert len(points) != 0, f"Error: List of points for tech {tech} is empty."
        assert all(map(lambda point: int(point[0]/spatial_resolution) == point[0]/spatial_resolution
                   and int(point[1]/spatial_resolution) == point[1]/spatial_resolution, points)), \
            f"Error: Some points do not have the correct resolution {spatial_resolution}"

    pop_density_array = load_population_density_data(spatial_resolution)

    # Create a modified copy of regions to deal with UK and EL
    iso_to_nuts0 = {"GB": "UK", "GR": "EL"}
    nuts0_regions = [iso_to_nuts0[c] if c in iso_to_nuts0 else c for c in countries]

    # Get NUTS2 and EEZ shapes
    nuts2_regions_list = get_available_regions("nuts2")
    codes = [code for code in nuts2_regions_list if code[:2] in nuts0_regions]

    region_shapes_dict = {"nuts2": get_shapes(codes, which='onshore')["geometry"],
                          "eez": get_shapes(countries, which='offshore', save=True)["geometry"]}
    region_shapes_dict["eez"].index = [f"EZ{code}" for code in region_shapes_dict["eez"].index]

    tech_points_tuples = sorted([(tech, point[0], point[1]) for tech, points in tech_points_dict.items()
                                 for point in points])
    capacity_potential_ds = pd.Series(0., index=pd.MultiIndex.from_tuples(tech_points_tuples))

    # Check that if existing capacity is defined for every point
    if existing_capacity_ds is not None:
        missing_existing_points = set(existing_capacity_ds.index) - set(capacity_potential_ds.index)
        assert not missing_existing_points, \
            f"Error: Missing following points in existing capacity series: {missing_existing_points}"

    for tech, points in tech_points_dict.items():

        # Compute potential for each NUTS2 or EEZ
        potential_per_region_ds = read_capacity_potential(tech, nuts_type='nuts2')

        # Find the geographical region code associated to each point
        if tech in ['wind_offshore', 'wind_floating']:
            region_shapes = region_shapes_dict["eez"]
        else:
            region_shapes = region_shapes_dict["nuts2"]

        point_regions_ds = match_points_to_regions(points, region_shapes).dropna()
        points = list(point_regions_ds.index)
        points_info_df = pd.DataFrame(point_regions_ds.values, point_regions_ds.index, columns=["region"])

        if tech in ['wind_offshore', 'wind_floating']:

            # For offshore sites, divide the total potential of the region by the number of points
            # associated to that region

            # Get how many points we have in each region and the potential capacity of those regions
            region_freq_ds = points_info_df.groupby(['region'])['region'].count()
            regions = region_freq_ds.index
            region_cap_pot_ds = potential_per_region_ds[regions]
            region_info_df = pd.concat([region_freq_ds, region_cap_pot_ds], axis=1)
            region_info_df.columns = ["freq", "cap_pot"]

            # Assign these values to each points depending on which region they fall in
            points_info_df = \
                points_info_df.merge(region_info_df, left_on='region', right_on='region', right_index=True)

            # Compute potential of each point by dividing the region potential by the number of points it contains
            cap_pot_per_point = points_info_df["cap_pot"]/points_info_df["freq"]

        else:  # tech in ['wind_onshore', 'pv_utility', 'pv_residential']:

            # For onshore sites, divide the total anti-proportionally (or proportionally for residential PV)
            # to population
            # Here were actually using population density, which is proportional to population because we consider
            # that each point is associated to an equivalent area.
            points_info_df['pop_dens'] = np.clip(pop_density_array.sel(locations=points).values, a_min=1., a_max=None)
            if tech in ['wind_onshore', 'pv_utility']:
                points_info_df['pop_dens'] = 1./points_info_df['pop_dens']

            # Aggregate per region and get capacity potential for regions in which the points fall
            regions_info_df = points_info_df.groupby(['region']).sum()
            regions_info_df["cap_pot"] = potential_per_region_ds[regions_info_df.index]
            regions_info_df.columns = ['sum_pop_dens', 'cap_pot']

            # Assign these values to each points depending on which region they fall in
            points_info_df = points_info_df.merge(regions_info_df, left_on='region', right_on='region',
                                                  right_index=True)
            # Compute potential
            cap_pot_per_point = points_info_df['pop_dens'] * points_info_df['cap_pot'] / points_info_df['sum_pop_dens']

        capacity_potential_ds.loc[tech, cap_pot_per_point.index] = cap_pot_per_point.values

    # Update capacity potential with existing potential if present
    if existing_capacity_ds is not None:
        underestimated_capacity = existing_capacity_ds[capacity_potential_ds.index] > capacity_potential_ds
        capacity_potential_ds[underestimated_capacity] = existing_capacity_ds[underestimated_capacity]

    return capacity_potential_ds
Ejemplo n.º 3
0
def get_legacy_capacity_in_regions_from_non_open(
        tech: str,
        regions_shapes: pd.Series,
        countries: List[str],
        match_distance: float = 50.,
        raise_error: bool = True) -> pd.Series:
    """
    Return the total existing capacity (in GW) for the given tech for a set of geographical regions.

    This function is using proprietary data.

    Parameters
    ----------
    tech: str
        Technology name.
    regions_shapes: pd.Series [Union[Polygon, MultiPolygon]]
        Geographical regions
    countries: List[str]
        List of ISO codes of countries in which the regions are situated
    match_distance: float (default: 50)
        Distance threshold (in km) used when associating points to shape.
    raise_error: bool (default: True)
        Whether to raise an error if no legacy data is available for this technology.

    Returns
    -------
    capacities: pd.Series
        Legacy capacities (in GW) of technology 'tech' for each region

    """

    path_legacy_data = f"{data_path}generation/vres/legacy/source/"

    capacities = pd.Series(0., index=regions_shapes.index)
    plant, plant_type = get_config_values(tech, ["plant", "type"])
    if (plant, plant_type) in [("Wind", "Onshore"), ("Wind", "Offshore"),
                               ("PV", "Utility")]:

        if plant == "Wind":

            data = pd.read_excel(
                f"{path_legacy_data}Windfarms_Europe_20200127.xls",
                sheet_name='Windfarms',
                header=0,
                usecols=[2, 5, 9, 10, 18, 23],
                skiprows=[1],
                na_values='#ND')
            data = data.dropna(subset=['Latitude', 'Longitude', 'Total power'])
            data = data[data['Status'] != 'Dismantled']
            if countries is not None:
                data = data[data['ISO code'].isin(countries)]

            if len(data) == 0:
                return capacities

            # Converting from kW to GW
            data['Total power'] *= 1e-6
            data["Location"] = data[["Longitude", "Latitude"
                                     ]].apply(lambda x:
                                              (x.Longitude, x.Latitude),
                                              axis=1)

            # Keep only onshore or offshore point depending on technology
            if plant_type == 'Onshore':
                data = data[data['Area'] != 'Offshore']
            else:  # Offshore
                data = data[data['Area'] == 'Offshore']

            if len(data) == 0:
                return capacities

        else:  # plant == "PV":

            data = pd.read_excel(
                f"{path_legacy_data}Solarfarms_Europe_20200208.xlsx",
                sheet_name='ProjReg_rpt',
                header=0,
                usecols=[0, 4, 8])
            data = data[pd.notnull(data['Coords'])]
            data["Location"] = data["Coords"].apply(
                lambda x: (float(x.split(',')[1]), float(x.split(',')[0])))
            if countries is not None:
                data['Country'] = convert_country_codes(
                    data['Country'].values, 'name', 'alpha_2')
                data = data[data['Country'].isin(countries)]

            if len(data) == 0:
                return capacities

            # Converting from MW to GW
            data['Total power'] = data['MWac'] * 1e-3

        data = data[["Location", "Total power"]]

        points_region = match_points_to_regions(
            data["Location"].values,
            regions_shapes,
            distance_threshold=match_distance).dropna()

        for region in regions_shapes.index:
            points_in_region = points_region[points_region ==
                                             region].index.values
            capacities[region] = data[data["Location"].isin(
                points_in_region)]["Total power"].sum()

    elif (plant, plant_type) == ("PV", "Residential"):

        legacy_capacity_fn = join(path_legacy_data,
                                  'SolarEurope_Residential_deployment.xlsx')
        data = pd.read_excel(legacy_capacity_fn,
                             header=0,
                             index_col=0,
                             usecols=[0, 4],
                             squeeze=True).sort_index()
        data = data[data.index.isin(countries)]

        if len(data) == 0:
            return capacities

        # Get countries shapes
        countries_shapes = get_shapes(data.index.values,
                                      which='onshore',
                                      save=True)["geometry"]

        for region_id, region_shape in regions_shapes.items():
            for country_id, country_shape in countries_shapes.items():
                capacities[region_id] += \
                    (region_shape.intersection(country_shape).area/country_shape.area) * data[country_id]

    else:
        if raise_error:
            raise ValueError(
                f"Error: No legacy data exists for tech {tech} with plant {plant} and type {plant_type}."
            )
        else:
            warnings.warn(f"Warning: No legacy data exists for tech {tech}.")

    return capacities
Ejemplo n.º 4
0
def get_legacy_capacity_in_regions(tech: str,
                                   regions_shapes: pd.Series,
                                   countries: List[str],
                                   match_distance: float = 50.,
                                   raise_error: bool = True) -> pd.Series:
    """
    Return the total existing capacity (in GW) for the given tech for a set of geographical regions.

    Parameters
    ----------
    tech: str
        Technology name.
    regions_shapes: pd.Series [Union[Polygon, MultiPolygon]]
        Geographical regions
    countries: List[str]
        List of ISO codes of countries in which the regions are situated.
    match_distance: float (default: 50)
        Distance threshold (in km) used when associating points to shape.
    raise_error: bool (default: True)
        Whether to raise an error if no legacy data is available for this technology.

    Returns
    -------
    capacities: pd.Series
        Legacy capacities (in GW) of technology 'tech' for each region

    """

    # Read per grid cell capacity file
    legacy_dir = f"{data_path}generation/vres/legacy/generated/"
    capacities_df = pd.read_csv(f"{legacy_dir}aggregated_capacity.csv",
                                index_col=[0, 1])

    plant, plant_type = get_config_values(tech, ["plant", "type"])
    available_plant_types = set(capacities_df.index)
    if (plant, plant_type) not in available_plant_types:
        if raise_error:
            raise ValueError(
                f"Error: no legacy data exists for tech {tech} with plant {plant} and type {plant_type}."
            )
        else:
            warnings.warn(f"Warning: No legacy data exists for tech {tech}.")
            return pd.Series(0.,
                             name="Legacy capacity (GW)",
                             index=regions_shapes.index,
                             dtype=float)

    # Get only capacity for the desired technology and desired countries
    capacities_df = capacities_df.loc[(plant, plant_type)]
    capacities_df = capacities_df[capacities_df.ISO2.isin(countries)]
    if len(capacities_df) == 0:
        return pd.Series(0.,
                         name="Legacy capacity (GW)",
                         index=regions_shapes.index,
                         dtype=float)

    # Aggregate capacity per region by adding capacity of points falling in those regions
    capacities_df["Location"] = capacities_df[["Longitude",
                                               "Latitude"]].apply(lambda x:
                                                                  (x[0], x[1]),
                                                                  axis=1)
    points_region = match_points_to_regions(
        capacities_df["Location"].values,
        regions_shapes,
        distance_threshold=match_distance).dropna()
    capacities_ds = pd.Series(0.,
                              name="Legacy capacity (GW)",
                              index=regions_shapes.index,
                              dtype=float)
    for region in regions_shapes.index:
        points_in_region = points_region[points_region == region].index.values
        capacities_ds[region] = capacities_df[capacities_df["Location"].isin(
            points_in_region)]["Capacity (GW)"].sum()

    return capacities_ds
Ejemplo n.º 5
0
def match_powerplants_to_regions(
        pp_df: pd.DataFrame,
        shapes_ds: gpd.GeoSeries,
        shapes_countries: Optional[List[str]] = None,
        dist_threshold: Optional[float] = 5.) -> pd.Series:
    """
    Match each power plant to a region defined by its geographical shape.

    Parameters
    ----------
    pp_df: pd.DataFrame
        Power plant frame with columns ISO2, lon and lat.
    shapes_ds: gpd.GeoSeries
        GeoDataFrame containing shapes union to which plants are to be mapped.
    shapes_countries: List[str] (default: None)
        If relevant, indicates to which country each shape belongs too.
        Allows to make sure that points are not assigned to shapes which are not part of the same country.
    dist_threshold: Optional[float] (default: 5.)
        Maximal distance (km) from one shape for points outside of all shapes to be accepted.

    Returns
    -------
    pd.Series
        Indicates for each element in the input dataframe to which shape it belongs.
    """

    for col in ["ISO2", "lat", "lon"]:
        assert col in pp_df.columns, f"Error: Dataframe missing column {col}."
    assert all(
        len(c) == 2
        for c in pp_df["ISO2"]), "Error: ISO2 codes must be of length 2."
    assert shapes_countries is None or all(len(c) == 2 for c in shapes_countries), \
        "Error: Shapes countries must be given as ISO2 codes of length 2."

    def add_region(lon, lat):
        try:
            region_code = matched_locs[lon, lat]
            # Need the if because some points are exactly at the same position
            return region_code if (
                isinstance(region_code, str) or isinstance(region_code, float)
                or isinstance(region_code, int)) else region_code.iloc[0]
        except (AttributeError, KeyError):
            return None

    # Find to which region each plant belongs
    if shapes_countries is None:
        plants_locs = pp_df[["lon", "lat"]].apply(lambda xy: (xy[0], xy[1]),
                                                  axis=1).values
        matched_locs = match_points_to_regions(
            plants_locs, shapes_ds,
            distance_threshold=dist_threshold).dropna()
        plants_region_ds = pp_df[["lon", "lat"
                                  ]].apply(lambda x: add_region(x[0], x[1]),
                                           axis=1)
    else:
        unique_countries = sorted(list(set(pp_df["ISO2"])))
        plants_region_ds = pd.Series(index=pp_df.index)
        for country in unique_countries:
            pp_df_in_country = pp_df[pp_df["ISO2"] == country]
            plants_locs = pp_df_in_country[["lon",
                                            "lat"]].apply(lambda xy:
                                                          (xy[0], xy[1]),
                                                          axis=1).values
            shapes_in_country = shapes_ds[[
                c == country for c in shapes_countries
            ]]
            matched_locs = match_points_to_regions(
                plants_locs,
                shapes_in_country,
                distance_threshold=dist_threshold)
            plants_region_ds.loc[pp_df_in_country.index] = \
                pp_df_in_country[["lon", "lat"]].apply(lambda x: add_region(x[0], x[1]), axis=1)

    return plants_region_ds
Ejemplo n.º 6
0
def generate_eu_hydro_files(resolution: float, topology_unit: str,
                            timestamps: pd.DatetimeIndex):
    """
     Generating hydro files, i.e., capacities and inflows.

     Parameters
     ----------
     resolution: float
         Runoff data spatial resolution.
     topology_unit: str
         Topology in use ('countries', 'NUTS2', 'NUTS3').
     timestamps: pd.DatetimeIndex
         Time horizon for which inflows are computed.

     """

    assert topology_unit in ["countries", "NUTS2", "NUTS3"
                             ], "Error: requested topology_unit not available."

    # Load shapes based on topology
    if topology_unit == 'countries':
        shapes = get_natural_earth_shapes()
    else:  # topology in ['NUTS2', 'NUTS3']
        shapes = get_nuts_shapes(topology_unit[-1:])
    shapes_countries = replace_iso2_codes([code[:2] for code in shapes.index])
    countries = sorted(list(set(shapes_countries)))

    tech_dir = f"{data_path}technologies/"
    tech_config = yaml.load(open(join(tech_dir, 'tech_config.yml')),
                            Loader=yaml.FullLoader)

    # Runoff data
    runoff_dataset = read_runoff_data(resolution, timestamps)

    # Find to which nuts region each of the runoff points belong
    runoff_points_region_ds = \
        match_points_to_regions(runoff_dataset.locations.values, shapes, keep_outside=False).dropna()
    logger.info('Runoff measurement points mapped to regions shapes.')

    def add_region_code(pp_df: pd.DataFrame):
        if topology_unit == "countries":
            pp_df['region_code'] = pp_df["ISO2"]
        else:
            pp_df['region_code'] = match_powerplants_to_regions(
                pp_df, shapes, shapes_countries)
            pp_df = pp_df[~pp_df['region_code'].isnull()]
        return pp_df

    # Build ROR data
    # Get all ROR powerplants in the countries of interest and add region name
    logging.info('Building ROR data')
    ror_plants_df = get_powerplants('ror', countries)
    ror_plants_df = add_region_code(ror_plants_df)
    # Get capacity and inflow per region (for which inflow data exists)
    ror_capacity_ds, ror_inflows_df = build_ror_data(
        ror_plants_df.set_index(["region_code"])["Capacity"], timestamps,
        runoff_dataset, runoff_points_region_ds)

    # Build STO data
    logging.info('Building STO data')
    sto_plants_df = get_powerplants('sto', countries)
    sto_plants_df = add_region_code(sto_plants_df)
    sto_capacity_df, sto_inflows_df, sto_multipliers_ds = \
        build_sto_data(sto_plants_df.set_index(["region_code"])["Capacity"], timestamps,
                       runoff_dataset, runoff_points_region_ds, ror_capacity_ds, ror_inflows_df)

    # Build PHS data
    logging.info('Building PHS data')
    default_phs_duration = tech_config['phs']['default_duration']

    phs_plants_df = get_powerplants('phs', countries)
    phs_plants_df = add_region_code(phs_plants_df)
    phs_capacity_df = build_phs_data(phs_plants_df, default_phs_duration)

    # Merge capacities DataFrame.
    capacities_df = pd.concat(
        [ror_capacity_ds, sto_capacity_df, phs_capacity_df], axis=1,
        sort=True).round(3)
    capacities_df.columns = [
        'ROR_CAP [GW]', 'STO_CAP [GW]', 'STO_EN_CAP [GWh]', 'PSP_CAP [GW]',
        'PSP_EN_CAP [GWh]'
    ]
    capacities_df.replace(0., np.nan, inplace=True)
    capacities_df.dropna(how='all', inplace=True)
    ror_inflows_df = ror_inflows_df[
        capacities_df['ROR_CAP [GW]'].dropna().index]
    sto_inflows_df = sto_inflows_df[
        capacities_df['STO_CAP [GW]'].dropna().index]

    # Saving files
    save_dir = f"{data_path}hydro/generated/"
    capacities_df.to_csv(f"{save_dir}hydro_capacities_per_{topology_unit}.csv")
    ror_inflows_df.to_csv(
        f"{save_dir}hydro_ror_time_series_per_{topology_unit}_pu.csv")
    sto_inflows_df.to_csv(
        f"{save_dir}hydro_sto_inflow_time_series_per_{topology_unit}_GWh.csv")
    sto_multipliers_ds.to_csv(
        f"{save_dir}hydro_sto_multipliers_per_{topology_unit}.csv",
        header=['multiplier'])
    logger.info('Files saved to disk.')