コード例 #1
0
def write_poly_shapefile():
    """Convert ReEDS wind resource csv-format file to a shapefile.

    .. note:: *gis_rs.csv* is from ReEDS open-source: */bokehpivot/in/gis_rs.csv*,
        *hierarchy.csv* is from: */bokehpivot/in/reeds2/hierarchy.csv*.
    """
    fiona = _check_import("fiona")
    shapely_geometry = _check_import("shapely.geometry")
    Polygon = shapely_geometry.Polygon  # noqa: N806
    mapping = shapely_geometry.mapping

    outpath = const.reeds_wind_shapefile_path
    os.makedirs(outpath, exist_ok=True)

    polys = pd.read_csv(const.reeds_wind_csv_path,
                        sep=",",
                        dtype={
                            "id": object,
                            "group": object
                        })
    hierarchy = pd.read_csv(const.reeds_mapping_hierarchy_path)
    polys = polys.merge(hierarchy, left_on="id", right_on="rs", how="left")
    polys = polys[polys["country"] == "usa"]

    # Remove holes
    polys = polys[polys["hole"] == False].drop("hole", axis=1)  # noqa: E712

    # Define a polygon feature geometry with one attribute
    schema = {
        "geometry": "Polygon",
        "properties": {
            "id": "str"
        },
    }

    names = polys.group.drop_duplicates()

    # Write a new Shapefile
    with fiona.open(outpath, "w", "ESRI Shapefile", schema) as c:
        # If there are multiple geometries, put the "for" loop here
        for i in names:
            poly_df = polys[polys["group"] == i]
            id_name = poly_df["id"].drop_duplicates().to_numpy()[0]

            ls = []
            for j in poly_df.index:
                ls += [(poly_df.loc[j, "long"], poly_df.loc[j, "lat"])]

            poly = Polygon(ls)
            c.write({
                "geometry": mapping(poly),
                "properties": {
                    "id": id_name
                },
            })
コード例 #2
0
def convert_shapefile_to_latlon_dict(filename, key):
    """Converts a shapefile to a dictionary of lat/lon data.

    :param str filename: the location of the shapefile to interpret.
    :param str key: the shapefile column values used as dictionary keys.
    :return: (*dict*) -- dictionary with keys from the specified shapefile column,
        values are dict with keys of {"lat", "lon"}, values are coordinates, padded by
        nan values to indicate the end of each polygon before the start of the next one.
    :raises ValueError: if the specified key is not present in the shapefile, or the
        shapefile contains at least one polygon with a hole.
    """
    gpd = _check_import("geopandas")
    shapes = gpd.read_file(filename)
    if key not in shapes.columns:
        raise ValueError("key must be present in the columns of the shapefile")
    exploded_shapes = shapes.explode()
    if sum([len(g.interiors) for g in exploded_shapes.geometry]) > 0:
        raise ValueError("Cannot convert shapes with holes")
    keys_to_latlon_dicts = {}
    for i in shapes.index:
        latlon_arrays_list = [np.array(g.xy) for g in exploded_shapes.exterior.loc[i]]
        # Join individual arrays, padding inbetween with (nan, nan) coordinate point
        nanpadded_array = np.concatenate(
            [
                np.concatenate([s.T, np.empty((1, 2)) * np.nan])
                if i < (len(latlon_arrays_list) - 1)
                else s.T
                for i, s in enumerate(latlon_arrays_list)
            ]
        ).T
        latlon_dict = {"lats": nanpadded_array[1], "lons": nanpadded_array[0]}
        keys_to_latlon_dicts[shapes.loc[i, key]] = latlon_dict
    return keys_to_latlon_dicts
コード例 #3
0
def points_to_polys(df, name, shpfile, search_dist=0.04):
    """Given a dataframe which includes 'lat' and 'lon' columns, and a shapefile of
        Polygons/Multipolygon regions, map df.index to closest regions.

    :param pandas.DataFrame df: includes an index, and 'lat' and 'lon' columns.
    :param str name: what to name the id (bus, plant, substation, etc)
    :param str shpfile: name of shapefile containing a collection Polygon/Multipolygon
        shapes with region IDs.
    :param float/int search_dist: distance to search from point for nearest polygon.
    :raises ValueError: if some points are dropped because too far away from polys.
    :return: (*geopandas.GeoDataFrame*) --
        columns: index id, (point) geometry, [region, other properties of region]
    """
    gpd = _check_import("geopandas")
    polys = gpd.read_file(shpfile)

    # If no assigned crs, assign it. If it has another crs assigned, convert it.
    crs = "EPSG:4326"
    if polys.crs is None:
        polys.crs = crs
    elif polys.crs != crs:
        polys = polys.to_crs(crs)

    # load buses into Points geodataframe
    id_name = name + "_id"
    pts = gpd.GeoDataFrame(
        pd.DataFrame({id_name: df.index}),
        geometry=gpd.points_from_xy(df.lon, df.lat),
        crs=crs,
    )

    # find which ReEDS region the points belong to
    # (within the region or as close as possible, if in the ocean or something)
    pts_poly = sjoin_nearest(left_df=pts,
                             right_df=polys,
                             search_dist=search_dist)
    pts_poly = pts_poly.drop("index_right", axis=1)

    if len(pts) > len(pts_poly):
        dropped = pts[~pts[id_name].isin(pts_poly[id_name])][id_name].to_list()
        err_msg = (
            "Some points dropped because could not be mapped to regions. "
            "Check your lat/lon values to be sure it's in the US. "
            f"Or increase search_dist if close. Problem ids: {dropped}")
        raise ValueError(err_msg)

    return pts_poly
コード例 #4
0
def points_to_polys(df, name, shpfile, search_dist=0.04):
    """Map node to closest region.

    :param pandas.DataFrame df: data frame with node id as index and *'lat'* and
        *'lon'* as columns.
    :param str name: name of node, e.g., bus, plant, substation, etc.
    :param str shpfile: shapefile enclosing Polygon/Multipolygon with region id.
    :param float/int search_dist: radius around point to detect polygons.
    :raises ValueError: if some points are dropped because too far away from polygons.
    :return: (*geopandas.GeoDataFrame*) -- columns: id name, (point) geometry,
        region and properties of region.
    """
    gpd = _check_import("geopandas")
    polys = gpd.read_file(shpfile)

    # If no assigned crs, assign it. If it has another crs assigned, convert it.
    crs = "EPSG:4326"
    if polys.crs is None:
        polys.crs = crs
    elif polys.crs != crs:
        polys = polys.to_crs(crs)

    # load buses into Points geodataframe
    id_name = name + "_id"
    pts = gpd.GeoDataFrame(
        pd.DataFrame({id_name: df.index}),
        geometry=gpd.points_from_xy(df.lon, df.lat),
        crs=crs,
    )

    # find which ReEDS region the points belong to
    # (within the region or as close as possible, if in the ocean or something)
    pts_poly = sjoin_nearest(left_df=pts,
                             right_df=polys,
                             search_dist=search_dist)
    pts_poly = pts_poly.drop("index_right", axis=1)

    if len(pts) > len(pts_poly):
        dropped = pts[~pts[id_name].isin(pts_poly[id_name])][id_name].to_list()
        err_msg = (
            "Some points dropped because could not be mapped to regions. "
            "Check your lat/lon values to be sure it's in the US. "
            f"Or increase search_dist. ids dropped: {dropped}")
        raise ValueError(err_msg)

    return pts_poly
コード例 #5
0
def sjoin_nearest(left_df, right_df, search_dist=0.06):
    """
    Perform a spatial join between two input layers.
    If a geometry in left_df falls outside (all) geometries in right_df, the data from
        nearest Polygon will be used as a result.
    To make queries faster, change "search_dist."
    :param geopandas.GeoDataFrame left_df: A dataframe of Points.
    :param geopandas.GeoDataFrame right_df: A dataframe of Polygons/Multipolygons
    :param float/int search_dist: parameter (specified in map units) is used to limit
        the search area for geometries around source points. Smaller -> faster runtime.
    :return: (*geopandas.GeoDataFrame*) -- A dataframe of Points mapped to each polygon
        in right_df.
    """
    def _find_nearest(series, polygons, search_dist):
        """Given a row with a bus id and a Point, find the closest polygon.

        :param pandas.Series series: point to map.
        :param geopandas.geodataframe.GeoDataFrame polygons: polygons to select from.
        :param float search_dist: radius around point to detect polygons in.
        """
        geom = series[left_df.geometry.name]
        # Get geometries within search distance
        candidates = polygons.loc[polygons.intersects(
            geom.buffer(search_dist))]

        if len(candidates) == 0:
            raise ValueError(
                f"No polygons found within {search_dist} of {series.name}")

        # Select the closest Polygon
        distances = candidates.apply(
            lambda x: geom.distance(x[candidates.geometry.name].exterior),
            axis=1)
        closest_poly = polygons.loc[distances.idxmin].to_frame().T

        # Reset index
        series = series.to_frame().T.reset_index(drop=True)

        # Drop geometry from closest polygon
        closest_poly = closest_poly.drop(polygons.geometry.name, axis=1)
        closest_poly = closest_poly.reset_index(drop=True)

        # Join values
        join = series.join(closest_poly, lsuffix="_left", rsuffix="_right")

        # Add information about distance to closest geometry if requested
        join["dist"] = distances.min()

        return join.squeeze()

    gpd = _check_import("geopandas")

    if "dist" in (set(left_df.columns) | set(right_df.columns)):
        raise ValueError(
            "neither series nor polygons can contain a 'dist' column")

    # Explode possible MultiGeometries. This is a major speedup!
    right_df = right_df.explode()
    right_df = right_df.reset_index(drop=True)

    # Make spatial join between points that fall inside the Polygons
    points_in_regions = gpd.sjoin(left_df=left_df,
                                  right_df=right_df,
                                  op="intersects")
    points_in_regions["dist"] = 0

    # Find closest Polygons, for points that don't fall within any
    missing_indices = set(left_df.index) - set(points_in_regions.index)
    points_not_in_regions = left_df.loc[missing_indices]
    closest_geometries = points_not_in_regions.apply(_find_nearest,
                                                     args=(right_df,
                                                           search_dist),
                                                     axis=1)

    # Merge everything together
    closest_geometries = gpd.GeoDataFrame(closest_geometries)
    result = points_in_regions.append(closest_geometries,
                                      ignore_index=True,
                                      sort=False)
    return result
コード例 #6
0
def write_poly_shapefile():
    """
    Converts a ReEDS csv-format file to a shapefile. Shouldn't need to run again
        unless new source data.
    Right now, hard-coded read ReEDS wind resource regions (labelled rs).
    gis_rs.csv is from ReEDS open-source: "/bokehpivot/in/gis_rs.csv"
    hierarchy.csv is from: "/bokehpivot/in/reeds2/hierarchy.csv"
    writes out the shapefile in "rs/rs.shp"

    Note: These ReEDS wind resource region shapes are approximate. Thus, there are
        probably some mistakes, but this is currently only used for mapping plant
        regional multipliers, which are approximate anyway, so it should be fine.
    """
    fiona = _check_import("fiona")
    shapely_geometry = _check_import("shapely.geometry")
    Polygon = shapely_geometry.Polygon
    mapping = shapely_geometry.mapping

    outpath = const.reeds_wind_shapefile_path
    os.makedirs(outpath, exist_ok=True)

    polys = pd.read_csv(const.reeds_wind_csv_path,
                        sep=",",
                        dtype={
                            "id": object,
                            "group": object
                        })
    hierarchy = pd.read_csv(const.reeds_mapping_hierarchy_path)
    polys = polys.merge(hierarchy, left_on="id", right_on="rs", how="left")
    polys = polys[polys["country"] == "usa"]

    # Remove holes
    polys = polys[polys["hole"] == False].drop("hole", axis=1)  # noqa: E712

    # Define a polygon feature geometry with one attribute
    schema = {
        "geometry": "Polygon",
        "properties": {
            "id": "str"
        },
    }

    names = polys.group.drop_duplicates()

    # Write a new Shapefile
    with fiona.open(outpath, "w", "ESRI Shapefile", schema) as c:
        # If there are multiple geometries, put the "for" loop here
        for i in names:
            poly_df = polys[polys["group"] == i]
            id_name = poly_df["id"].drop_duplicates().to_numpy()[0]

            ls = []
            for j in poly_df.index:
                ls += [(poly_df.loc[j, "long"], poly_df.loc[j, "lat"])]

            poly = Polygon(ls)
            c.write({
                "geometry": mapping(poly),
                "properties": {
                    "id": id_name
                },
            })
コード例 #7
0
def sjoin_nearest(left_df, right_df, search_dist=0.06):
    """Perform a spatial join between two input layers.

    :param geopandas.GeoDataFrame left_df: A dataframe of Points.
    :param geopandas.GeoDataFrame right_df: A dataframe of Polygons/Multipolygons.
    :param float/int search_dist: radius (in map units) around point to detect polygons.
    :return: (*geopandas.GeoDataFrame*) -- data frame of Points mapped to each Polygon.

    .. note:: data from nearest Polygon/Multipolygon will be used as a result if a
        Point falls outside all available Polygon/Multipolygons.
    """
    def _find_nearest(series, polygons, search_dist):
        """Find the closest polygon.

        :param pandas.Series series: point to map.
        :param geopandas.geodataframe.GeoDataFrame polygons: polygons to select from.
        :param float search_dist: radius around point to detect polygons.
        """
        geom = series[left_df.geometry.name]
        # Get geometries within search distance
        candidates = polygons.loc[polygons.intersects(
            geom.buffer(search_dist))]

        if len(candidates) == 0:
            raise ValueError(
                f"No polygons found within {search_dist} of {series.name}")

        # Select the closest Polygon
        distances = candidates.apply(
            lambda x: geom.distance(x[candidates.geometry.name].exterior),
            axis=1)
        closest_poly = polygons.loc[distances.idxmin].to_frame().T

        # Reset index
        series = series.to_frame().T.reset_index(drop=True)

        # Drop geometry from closest polygon
        closest_poly = closest_poly.drop(polygons.geometry.name, axis=1)
        closest_poly = closest_poly.reset_index(drop=True)

        # Join values
        join = series.join(closest_poly, lsuffix="_left", rsuffix="_right")

        # Add information about distance to closest geometry if requested
        join["dist"] = distances.min()

        return join.squeeze()

    gpd = _check_import("geopandas")

    if "dist" in (set(left_df.columns) | set(right_df.columns)):
        raise ValueError(
            "neither series nor polygons can contain a 'dist' column")

    # Explode possible MultiGeometries. This is a major speedup!
    right_df = right_df.explode()
    right_df = right_df.reset_index(drop=True)

    # Make spatial join between points that fall inside the Polygons
    points_in_regions = gpd.sjoin(left_df=left_df,
                                  right_df=right_df,
                                  op="intersects")
    points_in_regions["dist"] = 0

    # Since polygons may overlap, there can be duplicated buses that we want to filter
    duplicated = points_in_regions.loc[points_in_regions.index.duplicated(
        keep=False)]
    to_drop = set()
    for bus in set(duplicated["bus_id"]):
        entries = duplicated.query("bus_id == @bus")
        coords = entries["geometry"].iloc[0].coords[
            0]  # First duped entry, only point
        regions = set(entries["name_abbr"])  # noqa: F841
        candidates = points_in_regions.query(
            "index not in @duplicated.index and name_abbr in @regions")
        neighbor = candidates.apply(lambda x: haversine(
            (x.geometry.x, x.geometry.y), coords),
                                    axis=1).idxmin()
        closest_region = candidates.loc[neighbor, "name_abbr"]  # noqa: F841
        # There may be more than two overlapping geometries, capture all but the closest
        drop_regions = set(
            entries.query("name_abbr != @closest_region")["name_abbr"])
        # Since indices are duplicated, we need to drop via two-column tuples
        to_drop |= {(bus, d) for d in drop_regions}

    points_in_regions = points_in_regions.loc[~points_in_regions.set_index(
        ["bus_id", "name_abbr"]).index.isin(to_drop)]

    # Find closest Polygons, for points that don't fall within any
    missing_indices = set(left_df.index) - set(points_in_regions.index)
    points_not_in_regions = left_df.loc[missing_indices]
    closest_geometries = points_not_in_regions.apply(_find_nearest,
                                                     args=(right_df,
                                                           search_dist),
                                                     axis=1)

    # Merge everything together
    closest_geometries = gpd.GeoDataFrame(closest_geometries)
    result = points_in_regions.append(closest_geometries,
                                      ignore_index=True,
                                      sort=False)
    return result
コード例 #8
0
def plot_capacity_vs_price(
    grid, num_segments, area, gen_type, area_type=None, plot=True
):
    """Plots the generator capacity vs. the generator price for a specified area
        and generation type.

    :param powersimdata.input.grid.Grid grid: Grid object.
    :param int num_segments: The number of segments into which the piecewise linear
        cost curve is split.
    :param str area: Either the load zone, state name, state abbreviation, or
        interconnect.
    :param str gen_type: Generation type.
    :param str area_type: one of: *'loadzone'*, *'state'*, *'state_abbr'*,
        *'interconnect'*. Defaults to None, which allows
        :func:`powersimdata.network.model.area_to_loadzone` to infer the type.
    :param bool plot: If True, the supply curve plot is shown. If False, the plot is
        not shown.
    :return: (*None*) -- The capacity vs. price plot is displayed according to the user.
    :raises TypeError: if a powersimdata.input.grid.Grid object is not input.
    :raises ValueError: if the specified area or generator type is not applicable.
    """

    plt = _check_import("matplotlib.pyplot")

    # Check that a Grid object is input
    if not isinstance(grid, Grid):
        raise TypeError("A Grid object must be input.")

    # Check that the desired number of linearized cost curve segments is an int
    if not isinstance(num_segments, int):
        raise TypeError(
            "The number of linearized cost curve segments must be input as an int."
        )

    # Obtain the desired generator cost and plant information data
    data = get_supply_data(grid, num_segments)

    # Check the input supply data
    check_supply_data(data, num_segments)

    # Check to make sure the generator type is valid
    if gen_type not in data["type"].unique():
        raise ValueError(f"{gen_type} is not a valid generation type.")

    # Identify the load zones that correspond to the specified area and area_type
    returned_zones = area_to_loadzone(grid.get_grid_model(), area, area_type)

    # Trim the DataFrame to only be of the desired area and generation type
    data = data.loc[data.zone_name.isin(returned_zones)]
    data = data.loc[data["type"] == gen_type]

    # Remove generators that have no capacity (e.g., Maine coal generators)
    if data["slope1"].isnull().values.any():
        data.dropna(subset=["slope1"], inplace=True)

    # Check if the area contains generators of the specified type
    if data.empty:
        return

    # Combine the p_diff and slope information for each cost segment
    df_cols = []
    for i in range(num_segments):
        df_cols.append(data.loc[:, ("p_diff" + str(i + 1), "slope" + str(i + 1))])
        df_cols[i].rename(
            columns={"p_diff" + str(i + 1): "p_diff", "slope" + str(i + 1): "slope"},
            inplace=True,
        )
    df = pd.concat(df_cols, axis=0)
    df = df.reset_index(drop=True)

    # Determine the average
    total_cap = df["p_diff"].sum()
    if total_cap == 0:
        data_avg = 0
    else:
        data_avg = (df["slope"] * df["p_diff"]).sum() / total_cap

    # Plot the comparison
    if plot:
        ax = df.plot.scatter(
            x="p_diff", y="slope", s=50, figsize=[20, 10], grid=True, fontsize=20
        )
        plt.title(
            f"Capacity vs. Price for {gen_type} generators in {area}", fontsize=20
        )
        plt.xlabel("Segment Capacity (MW)", fontsize=20)
        plt.ylabel("Segment Price ($/MW)", fontsize=20)
        ax.plot(df["p_diff"], [data_avg] * len(df.index), c="red")
        plt.show()
コード例 #9
0
def plot_c1_vs_c2(
    grid,
    area,
    gen_type,
    area_type=None,
    plot=True,
    zoom=False,
    num_sd=3,
    alpha=0.1,
):
    """Compares the c1 and c2 parameters from the quadratic generator cost curves.

    :param powersimdata.input.grid.Grid grid: Grid object.
    :param str area: Either the load zone, state name, state abbreviation, or
        interconnect.
    :param str gen_type: Generation type.
    :param str area_type: one of: *'loadzone'*, *'state'*, *'state_abbr'*,
        *'interconnect'*. Defaults to None, which allows
        :func:`powersimdata.network.model.area_to_loadzone` to infer the type.
    :param bool plot: If True, the c1 vs. c2 plot is shown. If False, the plot is not
        shown.
    :param bool zoom: If True, filters out c2 outliers to enable better visualization.
        If False, there is no filtering.
    :param float/int num_sd: The number of standard deviations used to filter out c2
        outliers.
    :param float alpha: The alpha blending value for the scatter plot; takes values
        between 0 (transparent) and 1 (opaque).
    :return: (*None*) -- The c1 vs. c2 plot is displayed according to the user.
    :raises TypeError: if a powersimdata.input.grid.Grid object is not input.
    :raises ValueError: if the specified area or generator type is not applicable.
    """

    plt = _check_import("matplotlib.pyplot")

    # Check that a Grid object is input
    if not isinstance(grid, Grid):
        raise TypeError("A Grid object must be input.")

    # Obtain a copy of the Grid object
    grid = copy.deepcopy(grid)

    # Access the generator cost and plant information data
    gencost_df = grid.gencost["before"]
    plant_df = grid.plant

    # Create a new DataFrame with the desired columns
    data = pd.concat(
        [
            plant_df[["type", "interconnect", "zone_name", "Pmin", "Pmax"]],
            gencost_df[
                gencost_df.columns.difference(
                    ["type", "startup", "shutdown", "n", "interconnect"], sort=False
                )
            ],
        ],
        axis=1,
    )

    # Check to make sure the generator type is valid
    if gen_type not in data["type"].unique():
        raise ValueError(f"{gen_type} is not a valid generation type.")

    # Identify the load zones that correspond to the specified area and area_type
    returned_zones = area_to_loadzone(grid.get_grid_model(), area, area_type)

    # Trim the DataFrame to only be of the desired area and generation type
    data = data.loc[data.zone_name.isin(returned_zones)]
    data = data.loc[data["type"] == gen_type]

    # Remove generators that have no capacity (e.g., Maine coal generators)
    data = data[data["Pmin"] != data["Pmax"]]

    # Check if the area contains generators of the specified type
    if data.empty:
        return

    # Filters out large c2 outlier values so the overall trend can be better visualized
    zoom_name = ""
    if zoom:
        # Drop values outside a specified number of standard deviations of c2
        sd_c2 = np.std(data["c2"])
        mean_c2 = np.mean(data["c2"])
        cutoff = mean_c2 + num_sd * sd_c2
        if len(data[data["c2"] > cutoff]) > 0:
            zoom = True
            data = data[data["c2"] <= cutoff]
            max_ylim = np.max(data["c2"] + 0.01)
            min_ylim = np.min(data["c2"] - 0.01)
            max_xlim = np.max(data["c1"] + 1)
            min_xlim = np.min(data["c1"] - 1)
            zoom_name = "(zoomed)"
        else:
            zoom = False

    # Plot the c1 vs. c2 comparison
    if plot:
        fig, ax = plt.subplots()
        fig.set_size_inches(20, 10)
        plt.scatter(
            data["c1"],
            data["c2"],
            s=np.sqrt(data["Pmax"]) * 10,
            alpha=alpha,
            c=data["Pmax"],
            cmap="plasma",
        )
        plt.grid()
        plt.title(
            f"c1 vs. c2 for {gen_type} generators in {area} {zoom_name}", fontsize=20
        )
        if zoom:
            plt.ylim([min_ylim, max_ylim])
            plt.xlim([min_xlim, max_xlim])
        plt.xlabel("c1", fontsize=20)
        plt.ylabel("c2", fontsize=20)
        plt.xticks(fontsize=20)
        plt.yticks(fontsize=20)
        cbar = plt.colorbar()
        cbar.set_label("Capacity (MW)", fontsize=20)
        cbar.ax.tick_params(labelsize=20)
        plt.show()
コード例 #10
0
def ks_test(P1, F1, P2, F2, area=None, gen_type=None, plot=True):
    """Runs a test that is similar to the Kolmogorov-Smirnov test. This function takes
    two supply curves as inputs and returns the greatest difference in price between
    the two supply curves. This function requires that the supply curves offer the same
    amount of capacity.

    :param list P1: List of capacity values for the first supply curve.
    :param list F1: List of price values for the first supply curve.
    :param list P2: List of capacity values for the second supply curve.
    :param list F2: List of price values for the second supply curve.
    :param str area: Either the load zone, state name, state abbreviation, or
        interconnect. Defaults to None because it's not essential.
    :param str gen_type: Generation type. Defaults to None because it's not essential.
    :param bool plot: If True, the supply curve plot is shown. If False, the plot is
        not shown.
    :return: (*float*) -- The maximum price difference between the two supply curves.
    :raises TypeError: if the capacity and price inputs are not provided as lists.
    :raises ValueError: if the supply curves do not offer the same amount of capacity.
    """

    # Check that input capacities and prices are provided as lists
    if not all(isinstance(i, list) for i in [P1, F1, P2, F2]):
        raise TypeError("P1, F1, P2, and F2 must be input as lists.")

    # Check that the supply curves offer the same amount of capacity
    if max(P1) != max(P2):
        raise ValueError(
            "The two supply curves do not offer the same amount of capacity (MW)."
        )

    # Create a list that has every capacity value in which either supply curve steps up
    P_all = list(set(P1) | set(P2))
    P_all.sort()

    # For each capacity value, associate the two corresponding price values
    F_all = []
    for i in range(len(P_all)):
        # Determine the correpsonding price from the first supply curve
        if P_all[i] == P1[-1]:
            f1 = F1[-1]
        else:
            f1 = F1[lower_bound_index(P_all[i], P1)]

        # Determine the correpsonding price from the second supply curve
        if P_all[i] == P2[-1]:
            f2 = F2[-1]
        else:
            f2 = F2[lower_bound_index(P_all[i], P2)]

        # Pair the two price values
        F_all.append([f1, f2])

    # Determine the price differences for each capacity value
    F_diff = [abs(F_all[i][0] - F_all[i][1]) for i in range(len(F_all))]

    # Determine the maximum price difference
    max_diff = max(F_diff)

    # Plot the two supply curves overlaid
    if plot:
        plt = _check_import("matplotlib.pyplot")
        plt.figure(figsize=[20, 10])
        plt.plot(P1, F1)
        plt.plot(P2, F2)
        if None in {area, gen_type}:
            plt.title("Supply Curve Comparison", fontsize=20)
        else:
            plt.title(
                f"Supply curve comparison for {gen_type} generators in {area}",
                fontsize=20,
            )
        plt.xlabel("Capacity (MW)", fontsize=20)
        plt.ylabel("Price ($/MW)", fontsize=20)
        plt.xticks(fontsize=20)
        plt.yticks(fontsize=20)
        plt.show()

    # Return the maximum price difference (this corresponds to the K-S statistic)
    return max_diff
コード例 #11
0
def build_supply_curve(grid, num_segments, area, gen_type, area_type=None, plot=True):
    """Builds a supply curve for a specified area and generation type.

    :param powersimdata.input.grid.Grid grid: Grid object.
    :param int num_segments: The number of segments into which the piecewise linear
        cost curve is split.
    :param str area: Either the load zone, state name, state abbreviation, or
        interconnect.
    :param str gen_type: Generation type.
    :param str area_type: one of: *'loadzone'*, *'state'*, *'state_abbr'*,
        *'interconnect'*. Defaults to None, which allows
        :func:`powersimdata.network.model.area_to_loadzone` to infer the type.
    :param bool plot: If True, the supply curve plot is shown. If False, the plot is
        not shown.
    :return: (*tuple*) -- First element is a list of capacity (MW) amounts needed
        to create supply curve. Second element is a list of bids ($/MW) in the supply
        curve.
    :raises TypeError: if a powersimdata.input.grid.Grid object is not input.
    :raises ValueError: if the specified area or generator type is not applicable.
    """

    # Check that a Grid object is input
    if not isinstance(grid, Grid):
        raise TypeError("A Grid object must be input.")

    # Check that the desired number of linearized cost curve segments is an int
    if not isinstance(num_segments, int):
        raise TypeError(
            "The number of linearized cost curve segments must be input as an int."
        )

    # Obtain the desired generator cost and plant information data
    data = get_supply_data(grid, num_segments)

    # Check the input supply data
    check_supply_data(data, num_segments)

    # Check to make sure the generator type is valid
    if gen_type not in data["type"].unique():
        raise ValueError(f"{gen_type} is not a valid generation type.")

    # Identify the load zones that correspond to the specified area and area_type
    returned_zones = area_to_loadzone(grid.get_grid_model(), area, area_type)

    # Trim the DataFrame to only be of the desired area and generation type
    data = data.loc[data.zone_name.isin(returned_zones)]
    data = data.loc[data["type"] == gen_type]

    # Remove generators that have no capacity (e.g., Maine coal generators)
    if data["slope1"].isnull().values.any():
        data.dropna(subset=["slope1"], inplace=True)

    # Check if the area contains generators of the specified type
    if data.empty:
        return [], []

    # Combine the p_diff and slope information for each cost segment
    df_cols = []
    for i in range(num_segments):
        df_cols.append(data.loc[:, ("p_diff" + str(i + 1), "slope" + str(i + 1))])
        df_cols[i].rename(
            columns={"p_diff" + str(i + 1): "p_diff", "slope" + str(i + 1): "slope"},
            inplace=True,
        )
    df = pd.concat(df_cols, axis=0)

    # Sort the trimmed DataFrame by slope
    df = df.sort_values(by="slope")
    df = df.reset_index(drop=True)

    # Determine the points that comprise the supply curve
    P = []
    F = []
    p_diff_sum = 0
    for i in df.index:
        P.append(p_diff_sum)
        F.append(df["slope"][i])
        P.append(df["p_diff"][i] + p_diff_sum)
        F.append(df["slope"][i])
        p_diff_sum += df["p_diff"][i]

    # Plot the curve
    if plot:
        plt = _check_import("matplotlib.pyplot")
        plt.figure(figsize=[20, 10])
        plt.plot(P, F)
        plt.title(f"Supply curve for {gen_type} generators in {area}", fontsize=20)
        plt.xlabel("Capacity (MW)", fontsize=20)
        plt.ylabel("Price ($/MW)", fontsize=20)
        plt.xticks(fontsize=20)
        plt.yticks(fontsize=20)
        plt.show()

    # Return the capacity and bid amounts
    return P, F
コード例 #12
0
import shutil
from zipfile import ZipFile

from powersimdata.network.constants.region.europe import (
    abv2country,
    abv2timezone,
    interconnect2abv,
)
from powersimdata.network.helpers import (
    check_and_format_interconnect,
    interconnect_to_name,
)
from powersimdata.network.model import ModelImmutables
from powersimdata.utility.helpers import _check_import

pypsa = _check_import("pypsa")
zenodo_get = _check_import("zenodo_get")


class TUB:
    """PyPSA Europe network.

    :param str/iterable interconnect: interconnect name(s).
    :param int reduction: reduction parameter (number of nodes in network). If None,
        the full network is loaded.
    :param bool overwrite: the existing dataset is deleted and a new dataset is
        downloaded from zenodo.
    """
    def __init__(self, interconnect, reduction=None, overwrite=False):
        """Constructor."""
        self.grid_model = "europe_tub"
コード例 #13
0
def build_supply_curve(grid,
                       num_segments,
                       area,
                       gen_type,
                       area_type=None,
                       plot=True):
    """Builds a supply curve for a specified area and generation type.

    :param powersimdata.input.grid.Grid grid: Grid object.
    :param int num_segments: The number of segments into which the piecewise linear
        cost curve is split.
    :param str area: Either the load zone, state name, state abbreviation, or
        interconnect.
    :param str/iterable gen_type: Generation type(s).
    :param str area_type: one of *'loadzone'*, *'state'*, *'state_abbr'*,
        *'interconnect'*. If set to None, type will be inferred.
    :param bool plot: If True, the supply curve plot is shown. If False, the plot is
        not shown.
    :return: (*tuple*) -- First element is a list of capacity (MW) amounts needed
        to create supply curve. Second element is a list of bids ($/MW) in the supply
        curve.
    :raises TypeError: if a powersimdata.input.grid.Grid object is not input.
    :raises ValueError: if the specified area or generator type is not applicable.
    """

    # Check that a Grid object is input
    if not isinstance(grid, Grid):
        raise TypeError("A Grid object must be input.")

    # Check that the desired number of linearized cost curve segments is an int
    if not isinstance(num_segments, int):
        raise TypeError(
            "The number of linearized cost curve segments must be input as an int."
        )

    # Check that whether a single generation type is specified
    if isinstance(gen_type, str):
        gen_type = set([gen_type])

    # Obtain the desired generator cost and plant information data
    supply_data = get_supply_data(grid, num_segments)

    # Check the input supply data
    check_supply_data(supply_data, num_segments)

    # Check to make sure the generator type is valid
    if len(gen_type - set(supply_data["type"].unique())) > 0:
        raise ValueError(f"{gen_type} contains invalid generation type.")

    # Identify the load zones that correspond to the specified area and area_type
    returned_zones = grid.model_immutables.area_to_loadzone(
        area, area_type=area_type)

    # Trim the DataFrame to only be of the desired area and generation type
    supply_data = supply_data.loc[supply_data.zone_name.isin(returned_zones)]
    supply_data = supply_data.loc[supply_data.type.isin(gen_type)]

    # Remove generators that have no capacity (e.g., Maine coal generators)
    if supply_data["slope1"].isnull().values.any():
        supply_data.dropna(subset=["slope1"], inplace=True)

    # Check if the area contains generators of the specified type
    if supply_data.empty:
        return [], []

    # Combine the p_diff and slope information for each cost segment
    supply_df_cols = []
    for i in range(num_segments):
        supply_df_cols.append(supply_data.loc[:, ("p_diff" + str(i + 1),
                                                  "slope" + str(i + 1))])
        supply_df_cols[i].rename(
            columns={
                "p_diff" + str(i + 1): "p_diff",
                "slope" + str(i + 1): "slope"
            },
            inplace=True,
        )
    supply_df = pd.concat(supply_df_cols, axis=0)

    # Sort the trimmed DataFrame by slope
    supply_df = supply_df.sort_values(by="slope")
    supply_df = supply_df.reset_index(drop=True)

    # Determine the points that comprise the supply curve
    capacity_data = []
    price_data = []
    capacity_diff_sum = 0
    for i in supply_df.index:
        capacity_data.append(capacity_diff_sum)
        price_data.append(supply_df["slope"][i])
        capacity_data.append(supply_df["p_diff"][i] + capacity_diff_sum)
        price_data.append(supply_df["slope"][i])
        capacity_diff_sum += supply_df["p_diff"][i]

    # Plot the curve
    if plot:
        plt = _check_import("matplotlib.pyplot")
        plt.figure(figsize=[20, 10])
        plt.plot(capacity_data, price_data)
        plt.title(f"Supply curve for selected generators in {area}",
                  fontsize=20)
        plt.legend(["Generation types:\n{}".format("\n".join(list(gen_type)))],
                   loc="best")
        plt.xlabel("Capacity (MW)", fontsize=20)
        plt.ylabel("Price ($/MW)", fontsize=20)
        plt.xticks(fontsize=20)
        plt.yticks(fontsize=20)
        plt.show()

    # Return the capacity and bid amounts
    return capacity_data, price_data
コード例 #14
0
def export_to_pypsa(
    scenario_or_grid,
    add_all_columns=False,
    add_substations=False,
    add_load_shedding=True,
):
    """Export a Scenario/Grid instance to a PyPSA network.

    .. note::
        This function does not export storages yet.

    :param powersimdata.scenario.scenario.Scenario/
        powersimdata.input.grid.Grid scenario_or_grid: input object. If a Grid instance
        is passed, operational values will be used for the single snapshot "now".
        If a Scenario instance is passed, all available time-series will be
        imported.
    :param bool add_all_columns: whether to add all columns of the
        corresponding component. If true, this will also import columns
        that PyPSA does not process. The default is False.
    :param bool add_substations: whether to export substations. If set
        to True, artificial links of infinite capacity are added from each bus
        to its substation. This is necessary as the substations are imported
        as regualar buses in pypsa and thus require a connection to the network.
        If set to False, the substations will not be exported. This is
        helpful when there are no branches or dclinks connecting the
        substations. Note that the voltage level of the substation buses is set
        to the first bus connected to that substation. The default is False.
    :param bool add_load_shedding: whether to add artificial load shedding
        generators to the exported pypsa network. This ensures feasibility when
        optimizing the exported pypsa network as is. The default is True.
    """
    pypsa = _check_import("pypsa")

    if isinstance(scenario_or_grid, Grid):
        grid = scenario_or_grid
        scenario = None
    elif isinstance(scenario_or_grid, Scenario):
        grid = scenario_or_grid.get_grid()
        scenario = scenario_or_grid
    else:
        raise TypeError(
            "Expected type powersimdata.Grid or powersimdata.Scenario, "
            f"get {type(scenario)}."
        )

    drop_cols = []

    # BUS, LOAD & SUBSTATION
    bus_rename = pypsa_const["bus"]["rename"]
    bus_rename_t = pypsa_const["bus"]["rename_t"]

    if not add_all_columns:
        drop_cols = pypsa_const["bus"]["default_drop_cols"]
        if scenario:
            drop_cols += list(bus_rename_t)

    buses = grid.bus.rename(columns=bus_rename)
    buses.control.replace([1, 2, 3, 4], ["PQ", "PV", "slack", ""], inplace=True)
    buses["zone_name"] = buses.zone_id.map({v: k for k, v in grid.zone2id.items()})
    buses["substation"] = "sub" + grid.bus2sub["sub_id"].astype(str)

    # ensure compatibility with substations (these are imported later)
    buses["is_substation"] = False
    buses["interconnect_sub_id"] = -1
    buses["name"] = ""

    loads = {"proportionality_factor": buses["Pd"]}

    shunts = {k: buses.pop(k) for k in ["b_pu", "g_pu"]}

    substations = grid.sub.copy().rename(columns={"lat": "y", "lon": "x"})
    substations.index = "sub" + substations.index.astype(str)
    substations["is_substation"] = True
    substations["substation"] = substations.index
    v_nom = buses.groupby("substation").v_nom.first().reindex(substations.index)
    substations["v_nom"] = v_nom

    buses = buses.drop(columns=drop_cols, errors="ignore").sort_index(axis=1)

    # now time-dependent
    if scenario:
        buses_t = {}
        loads_t = {"p_set": scenario.get_bus_demand()}
    else:
        buses_t = {v: buses.pop(k).to_frame("now").T for k, v in bus_rename_t.items()}
        buses_t["v_ang"] = np.deg2rad(buses_t["v_ang"])

        loads_t = {"p": buses_t.pop("p"), "q": buses_t.pop("q")}

    # GENERATOR & COSTS
    generator_rename = pypsa_const["generator"]["rename"]
    generator_rename_t = pypsa_const["generator"]["rename_t"]

    if not add_all_columns:
        drop_cols = pypsa_const["generator"]["default_drop_cols"]
        if scenario:
            drop_cols += list(generator_rename_t)

    generators = grid.plant.rename(columns=generator_rename)
    generators.p_min_pu /= generators.p_nom.where(generators.p_nom != 0, 1)
    generators["committable"] = np.where(generators.p_min_pu > 0, True, False)
    generators["ramp_limit_down"] = generators.ramp_limit.replace(0, np.nan)
    generators["ramp_limit_up"] = generators.ramp_limit.replace(0, np.nan)
    generators.drop(columns=drop_cols + ["ramp_limit"], inplace=True)

    gencost = grid.gencost["before"].copy()
    # Linearize quadratic curves as applicable
    fixed = grid.plant["Pmin"] == grid.plant["Pmax"]
    linearized = gencost.loc[~fixed, "c1"] + gencost.loc[~fixed, "c2"] * (
        grid.plant.loc[~fixed, "Pmax"] + grid.plant.loc[~fixed, "Pmin"]
    )
    gencost["c1"] = linearized.combine_first(gencost["c1"])
    gencost = gencost.rename(columns=pypsa_const["cost"]["rename"])
    gencost = gencost[pypsa_const["cost"]["rename"].values()]

    carriers = pd.DataFrame(index=generators.carrier.unique(), dtype=object)

    cars = carriers.index
    constants = grid.model_immutables.plants
    carriers["color"] = pd.Series(constants["type2color"]).reindex(cars)
    carriers["nice_name"] = pd.Series(constants["type2label"]).reindex(cars)
    carriers["co2_emissions"] = (
        pd.Series(constants["carbon_per_mwh"]).div(1e3)
        * pd.Series(constants["efficiency"])
    ).reindex(cars, fill_value=0)
    generators["efficiency"] = generators.carrier.map(constants["efficiency"]).fillna(0)

    # now time-dependent
    if scenario:
        dfs = [scenario.get_wind(), scenario.get_solar(), scenario.get_hydro()]
        p_max_pu = pd.concat(dfs, axis=1)
        p_nom = generators.p_nom[p_max_pu.columns]
        p_max_pu = p_max_pu / p_nom.where(p_nom != 0, 1)
        generators_t = {"p_max_pu": p_max_pu}
        # drop p_nom_min of renewables, make them non-committable
        generators.loc[p_max_pu.columns, "p_min_pu"] = 0
        generators.loc[p_max_pu.columns, "committable"] = False
    else:
        generators_t = {
            v: generators.pop(k).to_frame("now").T
            for k, v in generator_rename_t.items()
        }

    # BRANCHES
    branch_rename = pypsa_const["branch"]["rename"]
    branch_rename_t = pypsa_const["branch"]["rename_t"]

    if not add_all_columns:
        drop_cols = pypsa_const["branch"]["default_drop_cols"]
        if scenario:
            drop_cols += list(branch_rename_t)

    branches = grid.branch.rename(columns=branch_rename).drop(columns=drop_cols)
    branches["v_nom"] = branches.bus0.map(buses.v_nom)
    # BE model assumes a 100 MVA base, pypsa "assumes" a 1 MVA base
    branches[["x_pu", "r_pu"]] /= 100
    branches["x"] = branches.x_pu * branches.v_nom**2
    branches["r"] = branches.r_pu * branches.v_nom**2

    lines = branches.query("branch_device_type == 'Line'")
    lines = lines.drop(columns="branch_device_type")

    transformers = branches.query(
        "branch_device_type in ['TransformerWinding', 'Transformer']"
    )

    if scenario:
        lines_t = {}
        transformers_t = {}
    else:
        lines_t = {
            v: lines.pop(k).to_frame("now").T for k, v in branch_rename_t.items()
        }
        transformers_t = {
            v: transformers.pop(k).to_frame("now").T for k, v in branch_rename_t.items()
        }

    # DC LINES
    link_rename = pypsa_const["link"]["rename"]
    link_rename_t = pypsa_const["link"]["rename_t"]

    if not add_all_columns:
        drop_cols = pypsa_const["link"]["default_drop_cols"]
        if scenario:
            drop_cols += list(link_rename_t)

    links = grid.dcline.rename(columns=link_rename).drop(columns=drop_cols)
    links.p_min_pu /= links.p_nom.where(links.p_nom != 0, 1)

    # SUBSTATION CONNECTORS
    sublinks = dict(
        bus0=buses.index, bus1=buses.substation.values, p_nom=np.inf, p_min_pu=-1
    )
    index = "sub" + pd.RangeIndex(len(buses)).astype(str)
    sublinks = pd.DataFrame(sublinks, index=index)

    if scenario:
        links_t = {}
    else:
        links_t = {v: links.pop(k).to_frame("now").T for k, v in link_rename_t.items()}

    # TODO: add storage export
    if not grid.storage["gen"].empty:
        warnings.warn("The export of storages are not implemented yet.")

    # Import everything to a new pypsa network
    n = pypsa.Network()
    if scenario:
        n.snapshots = loads_t["p_set"].index
    n.madd("Bus", buses.index, **buses, **buses_t)
    n.madd("Load", buses.index, bus=buses.index, **loads, **loads_t)
    n.madd("ShuntImpedance", buses.index, bus=buses.index, **shunts)
    n.madd("Generator", generators.index, **generators, **gencost, **generators_t)
    n.madd("Carrier", carriers.index, **carriers)
    n.madd("Line", lines.index, **lines, **lines_t)
    n.madd("Transformer", transformers.index, **transformers, **transformers_t)
    n.madd("Link", links.index, **links, **links_t)

    if add_substations:
        n.madd("Bus", substations.index, **substations)
        n.madd("Link", sublinks.index, **sublinks)

    if add_load_shedding:
        # Load shedding is moddelled by very costy generators whos power output
        # is measured in kW (see the factor `sign`). This keeps the coefficient
        # range in the LOPF low.
        n.madd(
            "Generator",
            buses.index,
            suffix=" load shedding",
            bus=buses.index,
            sign=1e-3,
            marginal_cost=1e2,
            p_nom=1e9,
            carrier="load",
        )
        n.add("Carrier", "load", nice_name="Load Shedding", color="red")

    n.name = ", ".join([grid.data_loc] + grid.interconnect)
    return n