Ejemplo n.º 1
    def build_data(self,
                   use_ex_cap: bool,
                   min_cap_pot: List[float] = None,
                   compute_load: bool = True,
                   regions_shapes: pd.DataFrame = None):
        """Preprocess data.

        use_ex_cap: bool
            Whether to compute or not existing capacity and use it in optimization.
        min_cap_pot: List[float] (default: None)
            List of thresholds per technology. Points with capacity potential under this threshold will be removed.

        # TODO: this function needs to take as argument a vector data specifying which data it must compute
        # Compute total load (in GWh) for each region
        load_df = pd.DataFrame(0., index=self.timestamps, columns=self.regions)
        if compute_load:
            load_df = get_load(timestamps=self.timestamps,

        # Get shape of regions and list of subregions
        onshore_technologies = [
            get_config_values(tech, ["onshore"]) for tech in self.technologies
        if regions_shapes is None:
            regions_shapes = pd.DataFrame(columns=["onshore", "offshore"],
            all_subregions = []
            for region in self.regions:
                subregions = get_subregions(region)
                shapes = get_shapes(subregions, save=True)
                if any(onshore_technologies):
                    regions_shapes.loc[region, "onshore"] = unary_union(
                if not all(onshore_technologies):
                    regions_shapes.loc[region, "offshore"] = unary_union(
            all_subregions = self.regions

        # Divide the union of all regions shapes into grid cells of a given spatial resolution
        # TODO: this is shitty because you cannot add different technologies in separate regions
        grid_cells_ds = get_grid_cells(self.technologies, self.spatial_res,

        # Compute capacities potential
        tech_config = get_config_dict(self.technologies,
                                      ['filters', 'power_density'])
        cap_potential_ds = pd.Series(index=grid_cells_ds.index)
        for tech in self.technologies:
            cap_potential_ds[tech] = \
                get_capacity_potential_for_shapes(grid_cells_ds[tech].values, tech_config[tech]["filters"],

        # Compute legacy capacity
        existing_cap_ds = pd.Series(0., index=cap_potential_ds.index)
        if use_ex_cap:
            for tech in self.technologies:
                tech_existing_cap_ds = \
                    get_legacy_capacity_in_regions(tech, grid_cells_ds.loc[tech].reset_index(drop=True),
                                                   all_subregions, raise_error=False)
                existing_cap_ds[tech] = tech_existing_cap_ds.values

        # Update capacity potential if existing capacity is bigger
        underestimated_capacity_indexes = existing_cap_ds > cap_potential_ds
        cap_potential_ds[underestimated_capacity_indexes] = existing_cap_ds[

        # Remove sites that have a potential capacity under the desired value or equal to 0
        if min_cap_pot is None:
            min_cap_pot = [0] * len(self.technologies)
        assert len(min_cap_pot) == len(self.technologies), \
            "Error: If you specify threshold on capacity potentials, you need to specify it for each technology."
        min_cap_pot_dict = dict(zip(self.technologies, min_cap_pot))
        sites_to_drop = pd.DataFrame(cap_potential_ds).apply(
            lambda x: x[0] < min_cap_pot_dict[x.name[0]] or x[0] == 0, axis=1)
        # Don't drop sites with existing capacity
        # TODO: this is probably a shitty way to do it
        sites_to_drop = pd.DataFrame(sites_to_drop).apply(
            lambda x: (existing_cap_ds[x.name] == 0 and x[0]), axis=1)
        cap_potential_ds = cap_potential_ds[~sites_to_drop]
        existing_cap_ds = existing_cap_ds[~sites_to_drop]
        grid_cells_ds = grid_cells_ds[~sites_to_drop]

        # Compute capacity factors for each site
        tech_points_dict = {}
        techs = set(grid_cells_ds.index.get_level_values(0))
        for tech in techs:
            tech_points_dict[tech] = list(grid_cells_ds[tech].index)
        cap_factor_df = compute_capacity_factors(tech_points_dict,

        # Associating coordinates to regions
        tech_points_regions_ds = pd.Series(index=grid_cells_ds.index)
        sites_index = tech_points_regions_ds.index
        for tech in set(sites_index.get_level_values(0)):
            on_off = 'onshore' if get_config_values(
                tech, ['onshore']) else 'offshore'
            tech_sites_index = sites_index[sites_index.get_level_values(0) ==
            points = list(
            tech_points_regions_ds[tech] = match_points_to_regions(
                points, regions_shapes[on_off].dropna()).values

        cap_credit_ds = compute_capacity_credit_from_potential(
            load_df, cap_factor_df, tech_points_regions_ds)

        # Save all data in object
        self.use_ex_cap = use_ex_cap
        self.min_cap_pot_dict = min_cap_pot_dict
        self.tech_points_tuples = grid_cells_ds.index.values
        self.tech_points_dict = tech_points_dict
        self.initial_sites_ds = grid_cells_ds
        self.tech_points_regions_ds = tech_points_regions_ds
        self.data_dict["load"] = load_df
        self.data_dict["cap_potential_ds"] = cap_potential_ds.round(3)
        self.data_dict["existing_cap_ds"] = existing_cap_ds.round(3)
        self.data_dict["cap_factor_df"] = cap_factor_df.round(3)
        self.data_dict["capacity_credit_ds"] = cap_credit_ds.round(3)
Ejemplo n.º 2
def get_capacity_potential_at_points(tech_points_dict: Dict[str, List[Tuple[float, float]]],
                                     spatial_resolution: float, countries: List[str],
                                     existing_capacity_ds: pd.Series = None) -> pd.Series:
    Compute the potential capacity at a series of points for different technologies.

    tech_points_dict : Dict[str, Dict[str, List[Tuple[float, float]]]
        Dictionary associating to each tech a list of points.
    spatial_resolution : float
        Spatial resolution of the points.
    countries: List[str]
        List of ISO codes of countries in which the points are situated
    existing_capacity_ds: pd.Series (default: None)
        Data series given for each tuple of (tech, point) the existing capacity.

    capacity_potential_ds : pd.Series
        Gives for each pair of technology - point the associated capacity potential in GW

    accepted_techs = ['wind_onshore', 'wind_offshore', 'wind_floating', 'pv_utility', 'pv_residential']
    for tech, points in tech_points_dict.items():
        assert tech in accepted_techs, f"Error: tech {tech} is not in {accepted_techs}"
        assert len(points) != 0, f"Error: List of points for tech {tech} is empty."
        assert all(map(lambda point: int(point[0]/spatial_resolution) == point[0]/spatial_resolution
                   and int(point[1]/spatial_resolution) == point[1]/spatial_resolution, points)), \
            f"Error: Some points do not have the correct resolution {spatial_resolution}"

    pop_density_array = load_population_density_data(spatial_resolution)

    # Create a modified copy of regions to deal with UK and EL
    iso_to_nuts0 = {"GB": "UK", "GR": "EL"}
    nuts0_regions = [iso_to_nuts0[c] if c in iso_to_nuts0 else c for c in countries]

    # Get NUTS2 and EEZ shapes
    nuts2_regions_list = get_available_regions("nuts2")
    codes = [code for code in nuts2_regions_list if code[:2] in nuts0_regions]

    region_shapes_dict = {"nuts2": get_shapes(codes, which='onshore')["geometry"],
                          "eez": get_shapes(countries, which='offshore', save=True)["geometry"]}
    region_shapes_dict["eez"].index = [f"EZ{code}" for code in region_shapes_dict["eez"].index]

    tech_points_tuples = sorted([(tech, point[0], point[1]) for tech, points in tech_points_dict.items()
                                 for point in points])
    capacity_potential_ds = pd.Series(0., index=pd.MultiIndex.from_tuples(tech_points_tuples))

    # Check that if existing capacity is defined for every point
    if existing_capacity_ds is not None:
        missing_existing_points = set(existing_capacity_ds.index) - set(capacity_potential_ds.index)
        assert not missing_existing_points, \
            f"Error: Missing following points in existing capacity series: {missing_existing_points}"

    for tech, points in tech_points_dict.items():

        # Compute potential for each NUTS2 or EEZ
        potential_per_region_ds = read_capacity_potential(tech, nuts_type='nuts2')

        # Find the geographical region code associated to each point
        if tech in ['wind_offshore', 'wind_floating']:
            region_shapes = region_shapes_dict["eez"]
            region_shapes = region_shapes_dict["nuts2"]

        point_regions_ds = match_points_to_regions(points, region_shapes).dropna()
        points = list(point_regions_ds.index)
        points_info_df = pd.DataFrame(point_regions_ds.values, point_regions_ds.index, columns=["region"])

        if tech in ['wind_offshore', 'wind_floating']:

            # For offshore sites, divide the total potential of the region by the number of points
            # associated to that region

            # Get how many points we have in each region and the potential capacity of those regions
            region_freq_ds = points_info_df.groupby(['region'])['region'].count()
            regions = region_freq_ds.index
            region_cap_pot_ds = potential_per_region_ds[regions]
            region_info_df = pd.concat([region_freq_ds, region_cap_pot_ds], axis=1)
            region_info_df.columns = ["freq", "cap_pot"]

            # Assign these values to each points depending on which region they fall in
            points_info_df = \
                points_info_df.merge(region_info_df, left_on='region', right_on='region', right_index=True)

            # Compute potential of each point by dividing the region potential by the number of points it contains
            cap_pot_per_point = points_info_df["cap_pot"]/points_info_df["freq"]

        else:  # tech in ['wind_onshore', 'pv_utility', 'pv_residential']:

            # For onshore sites, divide the total anti-proportionally (or proportionally for residential PV)
            # to population
            # Here were actually using population density, which is proportional to population because we consider
            # that each point is associated to an equivalent area.
            points_info_df['pop_dens'] = np.clip(pop_density_array.sel(locations=points).values, a_min=1., a_max=None)
            if tech in ['wind_onshore', 'pv_utility']:
                points_info_df['pop_dens'] = 1./points_info_df['pop_dens']

            # Aggregate per region and get capacity potential for regions in which the points fall
            regions_info_df = points_info_df.groupby(['region']).sum()
            regions_info_df["cap_pot"] = potential_per_region_ds[regions_info_df.index]
            regions_info_df.columns = ['sum_pop_dens', 'cap_pot']

            # Assign these values to each points depending on which region they fall in
            points_info_df = points_info_df.merge(regions_info_df, left_on='region', right_on='region',
            # Compute potential
            cap_pot_per_point = points_info_df['pop_dens'] * points_info_df['cap_pot'] / points_info_df['sum_pop_dens']

        capacity_potential_ds.loc[tech, cap_pot_per_point.index] = cap_pot_per_point.values

    # Update capacity potential with existing potential if present
    if existing_capacity_ds is not None:
        underestimated_capacity = existing_capacity_ds[capacity_potential_ds.index] > capacity_potential_ds
        capacity_potential_ds[underestimated_capacity] = existing_capacity_ds[underestimated_capacity]

    return capacity_potential_ds
Ejemplo n.º 3
def get_legacy_capacity_in_regions_from_non_open(
        tech: str,
        regions_shapes: pd.Series,
        countries: List[str],
        match_distance: float = 50.,
        raise_error: bool = True) -> pd.Series:
    Return the total existing capacity (in GW) for the given tech for a set of geographical regions.

    This function is using proprietary data.

    tech: str
        Technology name.
    regions_shapes: pd.Series [Union[Polygon, MultiPolygon]]
        Geographical regions
    countries: List[str]
        List of ISO codes of countries in which the regions are situated
    match_distance: float (default: 50)
        Distance threshold (in km) used when associating points to shape.
    raise_error: bool (default: True)
        Whether to raise an error if no legacy data is available for this technology.

    capacities: pd.Series
        Legacy capacities (in GW) of technology 'tech' for each region


    path_legacy_data = f"{data_path}generation/vres/legacy/source/"

    capacities = pd.Series(0., index=regions_shapes.index)
    plant, plant_type = get_config_values(tech, ["plant", "type"])
    if (plant, plant_type) in [("Wind", "Onshore"), ("Wind", "Offshore"),
                               ("PV", "Utility")]:

        if plant == "Wind":

            data = pd.read_excel(
                usecols=[2, 5, 9, 10, 18, 23],
            data = data.dropna(subset=['Latitude', 'Longitude', 'Total power'])
            data = data[data['Status'] != 'Dismantled']
            if countries is not None:
                data = data[data['ISO code'].isin(countries)]

            if len(data) == 0:
                return capacities

            # Converting from kW to GW
            data['Total power'] *= 1e-6
            data["Location"] = data[["Longitude", "Latitude"
                                     ]].apply(lambda x:
                                              (x.Longitude, x.Latitude),

            # Keep only onshore or offshore point depending on technology
            if plant_type == 'Onshore':
                data = data[data['Area'] != 'Offshore']
            else:  # Offshore
                data = data[data['Area'] == 'Offshore']

            if len(data) == 0:
                return capacities

        else:  # plant == "PV":

            data = pd.read_excel(
                usecols=[0, 4, 8])
            data = data[pd.notnull(data['Coords'])]
            data["Location"] = data["Coords"].apply(
                lambda x: (float(x.split(',')[1]), float(x.split(',')[0])))
            if countries is not None:
                data['Country'] = convert_country_codes(
                    data['Country'].values, 'name', 'alpha_2')
                data = data[data['Country'].isin(countries)]

            if len(data) == 0:
                return capacities

            # Converting from MW to GW
            data['Total power'] = data['MWac'] * 1e-3

        data = data[["Location", "Total power"]]

        points_region = match_points_to_regions(

        for region in regions_shapes.index:
            points_in_region = points_region[points_region ==
            capacities[region] = data[data["Location"].isin(
                points_in_region)]["Total power"].sum()

    elif (plant, plant_type) == ("PV", "Residential"):

        legacy_capacity_fn = join(path_legacy_data,
        data = pd.read_excel(legacy_capacity_fn,
                             usecols=[0, 4],
        data = data[data.index.isin(countries)]

        if len(data) == 0:
            return capacities

        # Get countries shapes
        countries_shapes = get_shapes(data.index.values,

        for region_id, region_shape in regions_shapes.items():
            for country_id, country_shape in countries_shapes.items():
                capacities[region_id] += \
                    (region_shape.intersection(country_shape).area/country_shape.area) * data[country_id]

        if raise_error:
            raise ValueError(
                f"Error: No legacy data exists for tech {tech} with plant {plant} and type {plant_type}."
            warnings.warn(f"Warning: No legacy data exists for tech {tech}.")

    return capacities
Ejemplo n.º 4
def get_legacy_capacity_in_regions(tech: str,
                                   regions_shapes: pd.Series,
                                   countries: List[str],
                                   match_distance: float = 50.,
                                   raise_error: bool = True) -> pd.Series:
    Return the total existing capacity (in GW) for the given tech for a set of geographical regions.

    tech: str
        Technology name.
    regions_shapes: pd.Series [Union[Polygon, MultiPolygon]]
        Geographical regions
    countries: List[str]
        List of ISO codes of countries in which the regions are situated.
    match_distance: float (default: 50)
        Distance threshold (in km) used when associating points to shape.
    raise_error: bool (default: True)
        Whether to raise an error if no legacy data is available for this technology.

    capacities: pd.Series
        Legacy capacities (in GW) of technology 'tech' for each region


    # Read per grid cell capacity file
    legacy_dir = f"{data_path}generation/vres/legacy/generated/"
    capacities_df = pd.read_csv(f"{legacy_dir}aggregated_capacity.csv",
                                index_col=[0, 1])

    plant, plant_type = get_config_values(tech, ["plant", "type"])
    available_plant_types = set(capacities_df.index)
    if (plant, plant_type) not in available_plant_types:
        if raise_error:
            raise ValueError(
                f"Error: no legacy data exists for tech {tech} with plant {plant} and type {plant_type}."
            warnings.warn(f"Warning: No legacy data exists for tech {tech}.")
            return pd.Series(0.,
                             name="Legacy capacity (GW)",

    # Get only capacity for the desired technology and desired countries
    capacities_df = capacities_df.loc[(plant, plant_type)]
    capacities_df = capacities_df[capacities_df.ISO2.isin(countries)]
    if len(capacities_df) == 0:
        return pd.Series(0.,
                         name="Legacy capacity (GW)",

    # Aggregate capacity per region by adding capacity of points falling in those regions
    capacities_df["Location"] = capacities_df[["Longitude",
                                               "Latitude"]].apply(lambda x:
                                                                  (x[0], x[1]),
    points_region = match_points_to_regions(
    capacities_ds = pd.Series(0.,
                              name="Legacy capacity (GW)",
    for region in regions_shapes.index:
        points_in_region = points_region[points_region == region].index.values
        capacities_ds[region] = capacities_df[capacities_df["Location"].isin(
            points_in_region)]["Capacity (GW)"].sum()

    return capacities_ds
Ejemplo n.º 5
def match_powerplants_to_regions(
        pp_df: pd.DataFrame,
        shapes_ds: gpd.GeoSeries,
        shapes_countries: Optional[List[str]] = None,
        dist_threshold: Optional[float] = 5.) -> pd.Series:
    Match each power plant to a region defined by its geographical shape.

    pp_df: pd.DataFrame
        Power plant frame with columns ISO2, lon and lat.
    shapes_ds: gpd.GeoSeries
        GeoDataFrame containing shapes union to which plants are to be mapped.
    shapes_countries: List[str] (default: None)
        If relevant, indicates to which country each shape belongs too.
        Allows to make sure that points are not assigned to shapes which are not part of the same country.
    dist_threshold: Optional[float] (default: 5.)
        Maximal distance (km) from one shape for points outside of all shapes to be accepted.

        Indicates for each element in the input dataframe to which shape it belongs.

    for col in ["ISO2", "lat", "lon"]:
        assert col in pp_df.columns, f"Error: Dataframe missing column {col}."
    assert all(
        len(c) == 2
        for c in pp_df["ISO2"]), "Error: ISO2 codes must be of length 2."
    assert shapes_countries is None or all(len(c) == 2 for c in shapes_countries), \
        "Error: Shapes countries must be given as ISO2 codes of length 2."

    def add_region(lon, lat):
            region_code = matched_locs[lon, lat]
            # Need the if because some points are exactly at the same position
            return region_code if (
                isinstance(region_code, str) or isinstance(region_code, float)
                or isinstance(region_code, int)) else region_code.iloc[0]
        except (AttributeError, KeyError):
            return None

    # Find to which region each plant belongs
    if shapes_countries is None:
        plants_locs = pp_df[["lon", "lat"]].apply(lambda xy: (xy[0], xy[1]),
        matched_locs = match_points_to_regions(
            plants_locs, shapes_ds,
        plants_region_ds = pp_df[["lon", "lat"
                                  ]].apply(lambda x: add_region(x[0], x[1]),
        unique_countries = sorted(list(set(pp_df["ISO2"])))
        plants_region_ds = pd.Series(index=pp_df.index)
        for country in unique_countries:
            pp_df_in_country = pp_df[pp_df["ISO2"] == country]
            plants_locs = pp_df_in_country[["lon",
                                            "lat"]].apply(lambda xy:
                                                          (xy[0], xy[1]),
            shapes_in_country = shapes_ds[[
                c == country for c in shapes_countries
            matched_locs = match_points_to_regions(
            plants_region_ds.loc[pp_df_in_country.index] = \
                pp_df_in_country[["lon", "lat"]].apply(lambda x: add_region(x[0], x[1]), axis=1)

    return plants_region_ds
Ejemplo n.º 6
def generate_eu_hydro_files(resolution: float, topology_unit: str,
                            timestamps: pd.DatetimeIndex):
     Generating hydro files, i.e., capacities and inflows.

     resolution: float
         Runoff data spatial resolution.
     topology_unit: str
         Topology in use ('countries', 'NUTS2', 'NUTS3').
     timestamps: pd.DatetimeIndex
         Time horizon for which inflows are computed.


    assert topology_unit in ["countries", "NUTS2", "NUTS3"
                             ], "Error: requested topology_unit not available."

    # Load shapes based on topology
    if topology_unit == 'countries':
        shapes = get_natural_earth_shapes()
    else:  # topology in ['NUTS2', 'NUTS3']
        shapes = get_nuts_shapes(topology_unit[-1:])
    shapes_countries = replace_iso2_codes([code[:2] for code in shapes.index])
    countries = sorted(list(set(shapes_countries)))

    tech_dir = f"{data_path}technologies/"
    tech_config = yaml.load(open(join(tech_dir, 'tech_config.yml')),

    # Runoff data
    runoff_dataset = read_runoff_data(resolution, timestamps)

    # Find to which nuts region each of the runoff points belong
    runoff_points_region_ds = \
        match_points_to_regions(runoff_dataset.locations.values, shapes, keep_outside=False).dropna()
    logger.info('Runoff measurement points mapped to regions shapes.')

    def add_region_code(pp_df: pd.DataFrame):
        if topology_unit == "countries":
            pp_df['region_code'] = pp_df["ISO2"]
            pp_df['region_code'] = match_powerplants_to_regions(
                pp_df, shapes, shapes_countries)
            pp_df = pp_df[~pp_df['region_code'].isnull()]
        return pp_df

    # Build ROR data
    # Get all ROR powerplants in the countries of interest and add region name
    logging.info('Building ROR data')
    ror_plants_df = get_powerplants('ror', countries)
    ror_plants_df = add_region_code(ror_plants_df)
    # Get capacity and inflow per region (for which inflow data exists)
    ror_capacity_ds, ror_inflows_df = build_ror_data(
        ror_plants_df.set_index(["region_code"])["Capacity"], timestamps,
        runoff_dataset, runoff_points_region_ds)

    # Build STO data
    logging.info('Building STO data')
    sto_plants_df = get_powerplants('sto', countries)
    sto_plants_df = add_region_code(sto_plants_df)
    sto_capacity_df, sto_inflows_df, sto_multipliers_ds = \
        build_sto_data(sto_plants_df.set_index(["region_code"])["Capacity"], timestamps,
                       runoff_dataset, runoff_points_region_ds, ror_capacity_ds, ror_inflows_df)

    # Build PHS data
    logging.info('Building PHS data')
    default_phs_duration = tech_config['phs']['default_duration']

    phs_plants_df = get_powerplants('phs', countries)
    phs_plants_df = add_region_code(phs_plants_df)
    phs_capacity_df = build_phs_data(phs_plants_df, default_phs_duration)

    # Merge capacities DataFrame.
    capacities_df = pd.concat(
        [ror_capacity_ds, sto_capacity_df, phs_capacity_df], axis=1,
    capacities_df.columns = [
        'ROR_CAP [GW]', 'STO_CAP [GW]', 'STO_EN_CAP [GWh]', 'PSP_CAP [GW]',
        'PSP_EN_CAP [GWh]'
    capacities_df.replace(0., np.nan, inplace=True)
    capacities_df.dropna(how='all', inplace=True)
    ror_inflows_df = ror_inflows_df[
        capacities_df['ROR_CAP [GW]'].dropna().index]
    sto_inflows_df = sto_inflows_df[
        capacities_df['STO_CAP [GW]'].dropna().index]

    # Saving files
    save_dir = f"{data_path}hydro/generated/"
    logger.info('Files saved to disk.')