def write_poly_shapefile(): """Convert ReEDS wind resource csv-format file to a shapefile. .. note:: *gis_rs.csv* is from ReEDS open-source: */bokehpivot/in/gis_rs.csv*, *hierarchy.csv* is from: */bokehpivot/in/reeds2/hierarchy.csv*. """ fiona = _check_import("fiona") shapely_geometry = _check_import("shapely.geometry") Polygon = shapely_geometry.Polygon # noqa: N806 mapping = shapely_geometry.mapping outpath = const.reeds_wind_shapefile_path os.makedirs(outpath, exist_ok=True) polys = pd.read_csv(const.reeds_wind_csv_path, sep=",", dtype={ "id": object, "group": object }) hierarchy = pd.read_csv(const.reeds_mapping_hierarchy_path) polys = polys.merge(hierarchy, left_on="id", right_on="rs", how="left") polys = polys[polys["country"] == "usa"] # Remove holes polys = polys[polys["hole"] == False].drop("hole", axis=1) # noqa: E712 # Define a polygon feature geometry with one attribute schema = { "geometry": "Polygon", "properties": { "id": "str" }, } names = polys.group.drop_duplicates() # Write a new Shapefile with fiona.open(outpath, "w", "ESRI Shapefile", schema) as c: # If there are multiple geometries, put the "for" loop here for i in names: poly_df = polys[polys["group"] == i] id_name = poly_df["id"].drop_duplicates().to_numpy()[0] ls = [] for j in poly_df.index: ls += [(poly_df.loc[j, "long"], poly_df.loc[j, "lat"])] poly = Polygon(ls) c.write({ "geometry": mapping(poly), "properties": { "id": id_name }, })
def convert_shapefile_to_latlon_dict(filename, key): """Converts a shapefile to a dictionary of lat/lon data. :param str filename: the location of the shapefile to interpret. :param str key: the shapefile column values used as dictionary keys. :return: (*dict*) -- dictionary with keys from the specified shapefile column, values are dict with keys of {"lat", "lon"}, values are coordinates, padded by nan values to indicate the end of each polygon before the start of the next one. :raises ValueError: if the specified key is not present in the shapefile, or the shapefile contains at least one polygon with a hole. """ gpd = _check_import("geopandas") shapes = gpd.read_file(filename) if key not in shapes.columns: raise ValueError("key must be present in the columns of the shapefile") exploded_shapes = shapes.explode() if sum([len(g.interiors) for g in exploded_shapes.geometry]) > 0: raise ValueError("Cannot convert shapes with holes") keys_to_latlon_dicts = {} for i in shapes.index: latlon_arrays_list = [np.array(g.xy) for g in exploded_shapes.exterior.loc[i]] # Join individual arrays, padding inbetween with (nan, nan) coordinate point nanpadded_array = np.concatenate( [ np.concatenate([s.T, np.empty((1, 2)) * np.nan]) if i < (len(latlon_arrays_list) - 1) else s.T for i, s in enumerate(latlon_arrays_list) ] ).T latlon_dict = {"lats": nanpadded_array[1], "lons": nanpadded_array[0]} keys_to_latlon_dicts[shapes.loc[i, key]] = latlon_dict return keys_to_latlon_dicts
def points_to_polys(df, name, shpfile, search_dist=0.04): """Given a dataframe which includes 'lat' and 'lon' columns, and a shapefile of Polygons/Multipolygon regions, map df.index to closest regions. :param pandas.DataFrame df: includes an index, and 'lat' and 'lon' columns. :param str name: what to name the id (bus, plant, substation, etc) :param str shpfile: name of shapefile containing a collection Polygon/Multipolygon shapes with region IDs. :param float/int search_dist: distance to search from point for nearest polygon. :raises ValueError: if some points are dropped because too far away from polys. :return: (*geopandas.GeoDataFrame*) -- columns: index id, (point) geometry, [region, other properties of region] """ gpd = _check_import("geopandas") polys = gpd.read_file(shpfile) # If no assigned crs, assign it. If it has another crs assigned, convert it. crs = "EPSG:4326" if polys.crs is None: polys.crs = crs elif polys.crs != crs: polys = polys.to_crs(crs) # load buses into Points geodataframe id_name = name + "_id" pts = gpd.GeoDataFrame( pd.DataFrame({id_name: df.index}), geometry=gpd.points_from_xy(df.lon, df.lat), crs=crs, ) # find which ReEDS region the points belong to # (within the region or as close as possible, if in the ocean or something) pts_poly = sjoin_nearest(left_df=pts, right_df=polys, search_dist=search_dist) pts_poly = pts_poly.drop("index_right", axis=1) if len(pts) > len(pts_poly): dropped = pts[~pts[id_name].isin(pts_poly[id_name])][id_name].to_list() err_msg = ( "Some points dropped because could not be mapped to regions. " "Check your lat/lon values to be sure it's in the US. " f"Or increase search_dist if close. Problem ids: {dropped}") raise ValueError(err_msg) return pts_poly
def points_to_polys(df, name, shpfile, search_dist=0.04): """Map node to closest region. :param pandas.DataFrame df: data frame with node id as index and *'lat'* and *'lon'* as columns. :param str name: name of node, e.g., bus, plant, substation, etc. :param str shpfile: shapefile enclosing Polygon/Multipolygon with region id. :param float/int search_dist: radius around point to detect polygons. :raises ValueError: if some points are dropped because too far away from polygons. :return: (*geopandas.GeoDataFrame*) -- columns: id name, (point) geometry, region and properties of region. """ gpd = _check_import("geopandas") polys = gpd.read_file(shpfile) # If no assigned crs, assign it. If it has another crs assigned, convert it. crs = "EPSG:4326" if polys.crs is None: polys.crs = crs elif polys.crs != crs: polys = polys.to_crs(crs) # load buses into Points geodataframe id_name = name + "_id" pts = gpd.GeoDataFrame( pd.DataFrame({id_name: df.index}), geometry=gpd.points_from_xy(df.lon, df.lat), crs=crs, ) # find which ReEDS region the points belong to # (within the region or as close as possible, if in the ocean or something) pts_poly = sjoin_nearest(left_df=pts, right_df=polys, search_dist=search_dist) pts_poly = pts_poly.drop("index_right", axis=1) if len(pts) > len(pts_poly): dropped = pts[~pts[id_name].isin(pts_poly[id_name])][id_name].to_list() err_msg = ( "Some points dropped because could not be mapped to regions. " "Check your lat/lon values to be sure it's in the US. " f"Or increase search_dist. ids dropped: {dropped}") raise ValueError(err_msg) return pts_poly
def sjoin_nearest(left_df, right_df, search_dist=0.06): """ Perform a spatial join between two input layers. If a geometry in left_df falls outside (all) geometries in right_df, the data from nearest Polygon will be used as a result. To make queries faster, change "search_dist." :param geopandas.GeoDataFrame left_df: A dataframe of Points. :param geopandas.GeoDataFrame right_df: A dataframe of Polygons/Multipolygons :param float/int search_dist: parameter (specified in map units) is used to limit the search area for geometries around source points. Smaller -> faster runtime. :return: (*geopandas.GeoDataFrame*) -- A dataframe of Points mapped to each polygon in right_df. """ def _find_nearest(series, polygons, search_dist): """Given a row with a bus id and a Point, find the closest polygon. :param pandas.Series series: point to map. :param geopandas.geodataframe.GeoDataFrame polygons: polygons to select from. :param float search_dist: radius around point to detect polygons in. """ geom = series[left_df.geometry.name] # Get geometries within search distance candidates = polygons.loc[polygons.intersects( geom.buffer(search_dist))] if len(candidates) == 0: raise ValueError( f"No polygons found within {search_dist} of {series.name}") # Select the closest Polygon distances = candidates.apply( lambda x: geom.distance(x[candidates.geometry.name].exterior), axis=1) closest_poly = polygons.loc[distances.idxmin].to_frame().T # Reset index series = series.to_frame().T.reset_index(drop=True) # Drop geometry from closest polygon closest_poly = closest_poly.drop(polygons.geometry.name, axis=1) closest_poly = closest_poly.reset_index(drop=True) # Join values join = series.join(closest_poly, lsuffix="_left", rsuffix="_right") # Add information about distance to closest geometry if requested join["dist"] = distances.min() return join.squeeze() gpd = _check_import("geopandas") if "dist" in (set(left_df.columns) | set(right_df.columns)): raise ValueError( "neither series nor polygons can contain a 'dist' column") # Explode possible MultiGeometries. This is a major speedup! right_df = right_df.explode() right_df = right_df.reset_index(drop=True) # Make spatial join between points that fall inside the Polygons points_in_regions = gpd.sjoin(left_df=left_df, right_df=right_df, op="intersects") points_in_regions["dist"] = 0 # Find closest Polygons, for points that don't fall within any missing_indices = set(left_df.index) - set(points_in_regions.index) points_not_in_regions = left_df.loc[missing_indices] closest_geometries = points_not_in_regions.apply(_find_nearest, args=(right_df, search_dist), axis=1) # Merge everything together closest_geometries = gpd.GeoDataFrame(closest_geometries) result = points_in_regions.append(closest_geometries, ignore_index=True, sort=False) return result
def write_poly_shapefile(): """ Converts a ReEDS csv-format file to a shapefile. Shouldn't need to run again unless new source data. Right now, hard-coded read ReEDS wind resource regions (labelled rs). gis_rs.csv is from ReEDS open-source: "/bokehpivot/in/gis_rs.csv" hierarchy.csv is from: "/bokehpivot/in/reeds2/hierarchy.csv" writes out the shapefile in "rs/rs.shp" Note: These ReEDS wind resource region shapes are approximate. Thus, there are probably some mistakes, but this is currently only used for mapping plant regional multipliers, which are approximate anyway, so it should be fine. """ fiona = _check_import("fiona") shapely_geometry = _check_import("shapely.geometry") Polygon = shapely_geometry.Polygon mapping = shapely_geometry.mapping outpath = const.reeds_wind_shapefile_path os.makedirs(outpath, exist_ok=True) polys = pd.read_csv(const.reeds_wind_csv_path, sep=",", dtype={ "id": object, "group": object }) hierarchy = pd.read_csv(const.reeds_mapping_hierarchy_path) polys = polys.merge(hierarchy, left_on="id", right_on="rs", how="left") polys = polys[polys["country"] == "usa"] # Remove holes polys = polys[polys["hole"] == False].drop("hole", axis=1) # noqa: E712 # Define a polygon feature geometry with one attribute schema = { "geometry": "Polygon", "properties": { "id": "str" }, } names = polys.group.drop_duplicates() # Write a new Shapefile with fiona.open(outpath, "w", "ESRI Shapefile", schema) as c: # If there are multiple geometries, put the "for" loop here for i in names: poly_df = polys[polys["group"] == i] id_name = poly_df["id"].drop_duplicates().to_numpy()[0] ls = [] for j in poly_df.index: ls += [(poly_df.loc[j, "long"], poly_df.loc[j, "lat"])] poly = Polygon(ls) c.write({ "geometry": mapping(poly), "properties": { "id": id_name }, })
def sjoin_nearest(left_df, right_df, search_dist=0.06): """Perform a spatial join between two input layers. :param geopandas.GeoDataFrame left_df: A dataframe of Points. :param geopandas.GeoDataFrame right_df: A dataframe of Polygons/Multipolygons. :param float/int search_dist: radius (in map units) around point to detect polygons. :return: (*geopandas.GeoDataFrame*) -- data frame of Points mapped to each Polygon. .. note:: data from nearest Polygon/Multipolygon will be used as a result if a Point falls outside all available Polygon/Multipolygons. """ def _find_nearest(series, polygons, search_dist): """Find the closest polygon. :param pandas.Series series: point to map. :param geopandas.geodataframe.GeoDataFrame polygons: polygons to select from. :param float search_dist: radius around point to detect polygons. """ geom = series[left_df.geometry.name] # Get geometries within search distance candidates = polygons.loc[polygons.intersects( geom.buffer(search_dist))] if len(candidates) == 0: raise ValueError( f"No polygons found within {search_dist} of {series.name}") # Select the closest Polygon distances = candidates.apply( lambda x: geom.distance(x[candidates.geometry.name].exterior), axis=1) closest_poly = polygons.loc[distances.idxmin].to_frame().T # Reset index series = series.to_frame().T.reset_index(drop=True) # Drop geometry from closest polygon closest_poly = closest_poly.drop(polygons.geometry.name, axis=1) closest_poly = closest_poly.reset_index(drop=True) # Join values join = series.join(closest_poly, lsuffix="_left", rsuffix="_right") # Add information about distance to closest geometry if requested join["dist"] = distances.min() return join.squeeze() gpd = _check_import("geopandas") if "dist" in (set(left_df.columns) | set(right_df.columns)): raise ValueError( "neither series nor polygons can contain a 'dist' column") # Explode possible MultiGeometries. This is a major speedup! right_df = right_df.explode() right_df = right_df.reset_index(drop=True) # Make spatial join between points that fall inside the Polygons points_in_regions = gpd.sjoin(left_df=left_df, right_df=right_df, op="intersects") points_in_regions["dist"] = 0 # Since polygons may overlap, there can be duplicated buses that we want to filter duplicated = points_in_regions.loc[points_in_regions.index.duplicated( keep=False)] to_drop = set() for bus in set(duplicated["bus_id"]): entries = duplicated.query("bus_id == @bus") coords = entries["geometry"].iloc[0].coords[ 0] # First duped entry, only point regions = set(entries["name_abbr"]) # noqa: F841 candidates = points_in_regions.query( "index not in @duplicated.index and name_abbr in @regions") neighbor = candidates.apply(lambda x: haversine( (x.geometry.x, x.geometry.y), coords), axis=1).idxmin() closest_region = candidates.loc[neighbor, "name_abbr"] # noqa: F841 # There may be more than two overlapping geometries, capture all but the closest drop_regions = set( entries.query("name_abbr != @closest_region")["name_abbr"]) # Since indices are duplicated, we need to drop via two-column tuples to_drop |= {(bus, d) for d in drop_regions} points_in_regions = points_in_regions.loc[~points_in_regions.set_index( ["bus_id", "name_abbr"]).index.isin(to_drop)] # Find closest Polygons, for points that don't fall within any missing_indices = set(left_df.index) - set(points_in_regions.index) points_not_in_regions = left_df.loc[missing_indices] closest_geometries = points_not_in_regions.apply(_find_nearest, args=(right_df, search_dist), axis=1) # Merge everything together closest_geometries = gpd.GeoDataFrame(closest_geometries) result = points_in_regions.append(closest_geometries, ignore_index=True, sort=False) return result
def plot_capacity_vs_price( grid, num_segments, area, gen_type, area_type=None, plot=True ): """Plots the generator capacity vs. the generator price for a specified area and generation type. :param powersimdata.input.grid.Grid grid: Grid object. :param int num_segments: The number of segments into which the piecewise linear cost curve is split. :param str area: Either the load zone, state name, state abbreviation, or interconnect. :param str gen_type: Generation type. :param str area_type: one of: *'loadzone'*, *'state'*, *'state_abbr'*, *'interconnect'*. Defaults to None, which allows :func:`powersimdata.network.model.area_to_loadzone` to infer the type. :param bool plot: If True, the supply curve plot is shown. If False, the plot is not shown. :return: (*None*) -- The capacity vs. price plot is displayed according to the user. :raises TypeError: if a powersimdata.input.grid.Grid object is not input. :raises ValueError: if the specified area or generator type is not applicable. """ plt = _check_import("matplotlib.pyplot") # Check that a Grid object is input if not isinstance(grid, Grid): raise TypeError("A Grid object must be input.") # Check that the desired number of linearized cost curve segments is an int if not isinstance(num_segments, int): raise TypeError( "The number of linearized cost curve segments must be input as an int." ) # Obtain the desired generator cost and plant information data data = get_supply_data(grid, num_segments) # Check the input supply data check_supply_data(data, num_segments) # Check to make sure the generator type is valid if gen_type not in data["type"].unique(): raise ValueError(f"{gen_type} is not a valid generation type.") # Identify the load zones that correspond to the specified area and area_type returned_zones = area_to_loadzone(grid.get_grid_model(), area, area_type) # Trim the DataFrame to only be of the desired area and generation type data = data.loc[data.zone_name.isin(returned_zones)] data = data.loc[data["type"] == gen_type] # Remove generators that have no capacity (e.g., Maine coal generators) if data["slope1"].isnull().values.any(): data.dropna(subset=["slope1"], inplace=True) # Check if the area contains generators of the specified type if data.empty: return # Combine the p_diff and slope information for each cost segment df_cols = [] for i in range(num_segments): df_cols.append(data.loc[:, ("p_diff" + str(i + 1), "slope" + str(i + 1))]) df_cols[i].rename( columns={"p_diff" + str(i + 1): "p_diff", "slope" + str(i + 1): "slope"}, inplace=True, ) df = pd.concat(df_cols, axis=0) df = df.reset_index(drop=True) # Determine the average total_cap = df["p_diff"].sum() if total_cap == 0: data_avg = 0 else: data_avg = (df["slope"] * df["p_diff"]).sum() / total_cap # Plot the comparison if plot: ax = df.plot.scatter( x="p_diff", y="slope", s=50, figsize=[20, 10], grid=True, fontsize=20 ) plt.title( f"Capacity vs. Price for {gen_type} generators in {area}", fontsize=20 ) plt.xlabel("Segment Capacity (MW)", fontsize=20) plt.ylabel("Segment Price ($/MW)", fontsize=20) ax.plot(df["p_diff"], [data_avg] * len(df.index), c="red") plt.show()
def plot_c1_vs_c2( grid, area, gen_type, area_type=None, plot=True, zoom=False, num_sd=3, alpha=0.1, ): """Compares the c1 and c2 parameters from the quadratic generator cost curves. :param powersimdata.input.grid.Grid grid: Grid object. :param str area: Either the load zone, state name, state abbreviation, or interconnect. :param str gen_type: Generation type. :param str area_type: one of: *'loadzone'*, *'state'*, *'state_abbr'*, *'interconnect'*. Defaults to None, which allows :func:`powersimdata.network.model.area_to_loadzone` to infer the type. :param bool plot: If True, the c1 vs. c2 plot is shown. If False, the plot is not shown. :param bool zoom: If True, filters out c2 outliers to enable better visualization. If False, there is no filtering. :param float/int num_sd: The number of standard deviations used to filter out c2 outliers. :param float alpha: The alpha blending value for the scatter plot; takes values between 0 (transparent) and 1 (opaque). :return: (*None*) -- The c1 vs. c2 plot is displayed according to the user. :raises TypeError: if a powersimdata.input.grid.Grid object is not input. :raises ValueError: if the specified area or generator type is not applicable. """ plt = _check_import("matplotlib.pyplot") # Check that a Grid object is input if not isinstance(grid, Grid): raise TypeError("A Grid object must be input.") # Obtain a copy of the Grid object grid = copy.deepcopy(grid) # Access the generator cost and plant information data gencost_df = grid.gencost["before"] plant_df = grid.plant # Create a new DataFrame with the desired columns data = pd.concat( [ plant_df[["type", "interconnect", "zone_name", "Pmin", "Pmax"]], gencost_df[ gencost_df.columns.difference( ["type", "startup", "shutdown", "n", "interconnect"], sort=False ) ], ], axis=1, ) # Check to make sure the generator type is valid if gen_type not in data["type"].unique(): raise ValueError(f"{gen_type} is not a valid generation type.") # Identify the load zones that correspond to the specified area and area_type returned_zones = area_to_loadzone(grid.get_grid_model(), area, area_type) # Trim the DataFrame to only be of the desired area and generation type data = data.loc[data.zone_name.isin(returned_zones)] data = data.loc[data["type"] == gen_type] # Remove generators that have no capacity (e.g., Maine coal generators) data = data[data["Pmin"] != data["Pmax"]] # Check if the area contains generators of the specified type if data.empty: return # Filters out large c2 outlier values so the overall trend can be better visualized zoom_name = "" if zoom: # Drop values outside a specified number of standard deviations of c2 sd_c2 = np.std(data["c2"]) mean_c2 = np.mean(data["c2"]) cutoff = mean_c2 + num_sd * sd_c2 if len(data[data["c2"] > cutoff]) > 0: zoom = True data = data[data["c2"] <= cutoff] max_ylim = np.max(data["c2"] + 0.01) min_ylim = np.min(data["c2"] - 0.01) max_xlim = np.max(data["c1"] + 1) min_xlim = np.min(data["c1"] - 1) zoom_name = "(zoomed)" else: zoom = False # Plot the c1 vs. c2 comparison if plot: fig, ax = plt.subplots() fig.set_size_inches(20, 10) plt.scatter( data["c1"], data["c2"], s=np.sqrt(data["Pmax"]) * 10, alpha=alpha, c=data["Pmax"], cmap="plasma", ) plt.grid() plt.title( f"c1 vs. c2 for {gen_type} generators in {area} {zoom_name}", fontsize=20 ) if zoom: plt.ylim([min_ylim, max_ylim]) plt.xlim([min_xlim, max_xlim]) plt.xlabel("c1", fontsize=20) plt.ylabel("c2", fontsize=20) plt.xticks(fontsize=20) plt.yticks(fontsize=20) cbar = plt.colorbar() cbar.set_label("Capacity (MW)", fontsize=20) cbar.ax.tick_params(labelsize=20) plt.show()
def ks_test(P1, F1, P2, F2, area=None, gen_type=None, plot=True): """Runs a test that is similar to the Kolmogorov-Smirnov test. This function takes two supply curves as inputs and returns the greatest difference in price between the two supply curves. This function requires that the supply curves offer the same amount of capacity. :param list P1: List of capacity values for the first supply curve. :param list F1: List of price values for the first supply curve. :param list P2: List of capacity values for the second supply curve. :param list F2: List of price values for the second supply curve. :param str area: Either the load zone, state name, state abbreviation, or interconnect. Defaults to None because it's not essential. :param str gen_type: Generation type. Defaults to None because it's not essential. :param bool plot: If True, the supply curve plot is shown. If False, the plot is not shown. :return: (*float*) -- The maximum price difference between the two supply curves. :raises TypeError: if the capacity and price inputs are not provided as lists. :raises ValueError: if the supply curves do not offer the same amount of capacity. """ # Check that input capacities and prices are provided as lists if not all(isinstance(i, list) for i in [P1, F1, P2, F2]): raise TypeError("P1, F1, P2, and F2 must be input as lists.") # Check that the supply curves offer the same amount of capacity if max(P1) != max(P2): raise ValueError( "The two supply curves do not offer the same amount of capacity (MW)." ) # Create a list that has every capacity value in which either supply curve steps up P_all = list(set(P1) | set(P2)) P_all.sort() # For each capacity value, associate the two corresponding price values F_all = [] for i in range(len(P_all)): # Determine the correpsonding price from the first supply curve if P_all[i] == P1[-1]: f1 = F1[-1] else: f1 = F1[lower_bound_index(P_all[i], P1)] # Determine the correpsonding price from the second supply curve if P_all[i] == P2[-1]: f2 = F2[-1] else: f2 = F2[lower_bound_index(P_all[i], P2)] # Pair the two price values F_all.append([f1, f2]) # Determine the price differences for each capacity value F_diff = [abs(F_all[i][0] - F_all[i][1]) for i in range(len(F_all))] # Determine the maximum price difference max_diff = max(F_diff) # Plot the two supply curves overlaid if plot: plt = _check_import("matplotlib.pyplot") plt.figure(figsize=[20, 10]) plt.plot(P1, F1) plt.plot(P2, F2) if None in {area, gen_type}: plt.title("Supply Curve Comparison", fontsize=20) else: plt.title( f"Supply curve comparison for {gen_type} generators in {area}", fontsize=20, ) plt.xlabel("Capacity (MW)", fontsize=20) plt.ylabel("Price ($/MW)", fontsize=20) plt.xticks(fontsize=20) plt.yticks(fontsize=20) plt.show() # Return the maximum price difference (this corresponds to the K-S statistic) return max_diff
def build_supply_curve(grid, num_segments, area, gen_type, area_type=None, plot=True): """Builds a supply curve for a specified area and generation type. :param powersimdata.input.grid.Grid grid: Grid object. :param int num_segments: The number of segments into which the piecewise linear cost curve is split. :param str area: Either the load zone, state name, state abbreviation, or interconnect. :param str gen_type: Generation type. :param str area_type: one of: *'loadzone'*, *'state'*, *'state_abbr'*, *'interconnect'*. Defaults to None, which allows :func:`powersimdata.network.model.area_to_loadzone` to infer the type. :param bool plot: If True, the supply curve plot is shown. If False, the plot is not shown. :return: (*tuple*) -- First element is a list of capacity (MW) amounts needed to create supply curve. Second element is a list of bids ($/MW) in the supply curve. :raises TypeError: if a powersimdata.input.grid.Grid object is not input. :raises ValueError: if the specified area or generator type is not applicable. """ # Check that a Grid object is input if not isinstance(grid, Grid): raise TypeError("A Grid object must be input.") # Check that the desired number of linearized cost curve segments is an int if not isinstance(num_segments, int): raise TypeError( "The number of linearized cost curve segments must be input as an int." ) # Obtain the desired generator cost and plant information data data = get_supply_data(grid, num_segments) # Check the input supply data check_supply_data(data, num_segments) # Check to make sure the generator type is valid if gen_type not in data["type"].unique(): raise ValueError(f"{gen_type} is not a valid generation type.") # Identify the load zones that correspond to the specified area and area_type returned_zones = area_to_loadzone(grid.get_grid_model(), area, area_type) # Trim the DataFrame to only be of the desired area and generation type data = data.loc[data.zone_name.isin(returned_zones)] data = data.loc[data["type"] == gen_type] # Remove generators that have no capacity (e.g., Maine coal generators) if data["slope1"].isnull().values.any(): data.dropna(subset=["slope1"], inplace=True) # Check if the area contains generators of the specified type if data.empty: return [], [] # Combine the p_diff and slope information for each cost segment df_cols = [] for i in range(num_segments): df_cols.append(data.loc[:, ("p_diff" + str(i + 1), "slope" + str(i + 1))]) df_cols[i].rename( columns={"p_diff" + str(i + 1): "p_diff", "slope" + str(i + 1): "slope"}, inplace=True, ) df = pd.concat(df_cols, axis=0) # Sort the trimmed DataFrame by slope df = df.sort_values(by="slope") df = df.reset_index(drop=True) # Determine the points that comprise the supply curve P = [] F = [] p_diff_sum = 0 for i in df.index: P.append(p_diff_sum) F.append(df["slope"][i]) P.append(df["p_diff"][i] + p_diff_sum) F.append(df["slope"][i]) p_diff_sum += df["p_diff"][i] # Plot the curve if plot: plt = _check_import("matplotlib.pyplot") plt.figure(figsize=[20, 10]) plt.plot(P, F) plt.title(f"Supply curve for {gen_type} generators in {area}", fontsize=20) plt.xlabel("Capacity (MW)", fontsize=20) plt.ylabel("Price ($/MW)", fontsize=20) plt.xticks(fontsize=20) plt.yticks(fontsize=20) plt.show() # Return the capacity and bid amounts return P, F
import shutil from zipfile import ZipFile from powersimdata.network.constants.region.europe import ( abv2country, abv2timezone, interconnect2abv, ) from powersimdata.network.helpers import ( check_and_format_interconnect, interconnect_to_name, ) from powersimdata.network.model import ModelImmutables from powersimdata.utility.helpers import _check_import pypsa = _check_import("pypsa") zenodo_get = _check_import("zenodo_get") class TUB: """PyPSA Europe network. :param str/iterable interconnect: interconnect name(s). :param int reduction: reduction parameter (number of nodes in network). If None, the full network is loaded. :param bool overwrite: the existing dataset is deleted and a new dataset is downloaded from zenodo. """ def __init__(self, interconnect, reduction=None, overwrite=False): """Constructor.""" self.grid_model = "europe_tub"
def build_supply_curve(grid, num_segments, area, gen_type, area_type=None, plot=True): """Builds a supply curve for a specified area and generation type. :param powersimdata.input.grid.Grid grid: Grid object. :param int num_segments: The number of segments into which the piecewise linear cost curve is split. :param str area: Either the load zone, state name, state abbreviation, or interconnect. :param str/iterable gen_type: Generation type(s). :param str area_type: one of *'loadzone'*, *'state'*, *'state_abbr'*, *'interconnect'*. If set to None, type will be inferred. :param bool plot: If True, the supply curve plot is shown. If False, the plot is not shown. :return: (*tuple*) -- First element is a list of capacity (MW) amounts needed to create supply curve. Second element is a list of bids ($/MW) in the supply curve. :raises TypeError: if a powersimdata.input.grid.Grid object is not input. :raises ValueError: if the specified area or generator type is not applicable. """ # Check that a Grid object is input if not isinstance(grid, Grid): raise TypeError("A Grid object must be input.") # Check that the desired number of linearized cost curve segments is an int if not isinstance(num_segments, int): raise TypeError( "The number of linearized cost curve segments must be input as an int." ) # Check that whether a single generation type is specified if isinstance(gen_type, str): gen_type = set([gen_type]) # Obtain the desired generator cost and plant information data supply_data = get_supply_data(grid, num_segments) # Check the input supply data check_supply_data(supply_data, num_segments) # Check to make sure the generator type is valid if len(gen_type - set(supply_data["type"].unique())) > 0: raise ValueError(f"{gen_type} contains invalid generation type.") # Identify the load zones that correspond to the specified area and area_type returned_zones = grid.model_immutables.area_to_loadzone( area, area_type=area_type) # Trim the DataFrame to only be of the desired area and generation type supply_data = supply_data.loc[supply_data.zone_name.isin(returned_zones)] supply_data = supply_data.loc[supply_data.type.isin(gen_type)] # Remove generators that have no capacity (e.g., Maine coal generators) if supply_data["slope1"].isnull().values.any(): supply_data.dropna(subset=["slope1"], inplace=True) # Check if the area contains generators of the specified type if supply_data.empty: return [], [] # Combine the p_diff and slope information for each cost segment supply_df_cols = [] for i in range(num_segments): supply_df_cols.append(supply_data.loc[:, ("p_diff" + str(i + 1), "slope" + str(i + 1))]) supply_df_cols[i].rename( columns={ "p_diff" + str(i + 1): "p_diff", "slope" + str(i + 1): "slope" }, inplace=True, ) supply_df = pd.concat(supply_df_cols, axis=0) # Sort the trimmed DataFrame by slope supply_df = supply_df.sort_values(by="slope") supply_df = supply_df.reset_index(drop=True) # Determine the points that comprise the supply curve capacity_data = [] price_data = [] capacity_diff_sum = 0 for i in supply_df.index: capacity_data.append(capacity_diff_sum) price_data.append(supply_df["slope"][i]) capacity_data.append(supply_df["p_diff"][i] + capacity_diff_sum) price_data.append(supply_df["slope"][i]) capacity_diff_sum += supply_df["p_diff"][i] # Plot the curve if plot: plt = _check_import("matplotlib.pyplot") plt.figure(figsize=[20, 10]) plt.plot(capacity_data, price_data) plt.title(f"Supply curve for selected generators in {area}", fontsize=20) plt.legend(["Generation types:\n{}".format("\n".join(list(gen_type)))], loc="best") plt.xlabel("Capacity (MW)", fontsize=20) plt.ylabel("Price ($/MW)", fontsize=20) plt.xticks(fontsize=20) plt.yticks(fontsize=20) plt.show() # Return the capacity and bid amounts return capacity_data, price_data
def export_to_pypsa( scenario_or_grid, add_all_columns=False, add_substations=False, add_load_shedding=True, ): """Export a Scenario/Grid instance to a PyPSA network. .. note:: This function does not export storages yet. :param powersimdata.scenario.scenario.Scenario/ powersimdata.input.grid.Grid scenario_or_grid: input object. If a Grid instance is passed, operational values will be used for the single snapshot "now". If a Scenario instance is passed, all available time-series will be imported. :param bool add_all_columns: whether to add all columns of the corresponding component. If true, this will also import columns that PyPSA does not process. The default is False. :param bool add_substations: whether to export substations. If set to True, artificial links of infinite capacity are added from each bus to its substation. This is necessary as the substations are imported as regualar buses in pypsa and thus require a connection to the network. If set to False, the substations will not be exported. This is helpful when there are no branches or dclinks connecting the substations. Note that the voltage level of the substation buses is set to the first bus connected to that substation. The default is False. :param bool add_load_shedding: whether to add artificial load shedding generators to the exported pypsa network. This ensures feasibility when optimizing the exported pypsa network as is. The default is True. """ pypsa = _check_import("pypsa") if isinstance(scenario_or_grid, Grid): grid = scenario_or_grid scenario = None elif isinstance(scenario_or_grid, Scenario): grid = scenario_or_grid.get_grid() scenario = scenario_or_grid else: raise TypeError( "Expected type powersimdata.Grid or powersimdata.Scenario, " f"get {type(scenario)}." ) drop_cols = [] # BUS, LOAD & SUBSTATION bus_rename = pypsa_const["bus"]["rename"] bus_rename_t = pypsa_const["bus"]["rename_t"] if not add_all_columns: drop_cols = pypsa_const["bus"]["default_drop_cols"] if scenario: drop_cols += list(bus_rename_t) buses = grid.bus.rename(columns=bus_rename) buses.control.replace([1, 2, 3, 4], ["PQ", "PV", "slack", ""], inplace=True) buses["zone_name"] = buses.zone_id.map({v: k for k, v in grid.zone2id.items()}) buses["substation"] = "sub" + grid.bus2sub["sub_id"].astype(str) # ensure compatibility with substations (these are imported later) buses["is_substation"] = False buses["interconnect_sub_id"] = -1 buses["name"] = "" loads = {"proportionality_factor": buses["Pd"]} shunts = {k: buses.pop(k) for k in ["b_pu", "g_pu"]} substations = grid.sub.copy().rename(columns={"lat": "y", "lon": "x"}) substations.index = "sub" + substations.index.astype(str) substations["is_substation"] = True substations["substation"] = substations.index v_nom = buses.groupby("substation").v_nom.first().reindex(substations.index) substations["v_nom"] = v_nom buses = buses.drop(columns=drop_cols, errors="ignore").sort_index(axis=1) # now time-dependent if scenario: buses_t = {} loads_t = {"p_set": scenario.get_bus_demand()} else: buses_t = {v: buses.pop(k).to_frame("now").T for k, v in bus_rename_t.items()} buses_t["v_ang"] = np.deg2rad(buses_t["v_ang"]) loads_t = {"p": buses_t.pop("p"), "q": buses_t.pop("q")} # GENERATOR & COSTS generator_rename = pypsa_const["generator"]["rename"] generator_rename_t = pypsa_const["generator"]["rename_t"] if not add_all_columns: drop_cols = pypsa_const["generator"]["default_drop_cols"] if scenario: drop_cols += list(generator_rename_t) generators = grid.plant.rename(columns=generator_rename) generators.p_min_pu /= generators.p_nom.where(generators.p_nom != 0, 1) generators["committable"] = np.where(generators.p_min_pu > 0, True, False) generators["ramp_limit_down"] = generators.ramp_limit.replace(0, np.nan) generators["ramp_limit_up"] = generators.ramp_limit.replace(0, np.nan) generators.drop(columns=drop_cols + ["ramp_limit"], inplace=True) gencost = grid.gencost["before"].copy() # Linearize quadratic curves as applicable fixed = grid.plant["Pmin"] == grid.plant["Pmax"] linearized = gencost.loc[~fixed, "c1"] + gencost.loc[~fixed, "c2"] * ( grid.plant.loc[~fixed, "Pmax"] + grid.plant.loc[~fixed, "Pmin"] ) gencost["c1"] = linearized.combine_first(gencost["c1"]) gencost = gencost.rename(columns=pypsa_const["cost"]["rename"]) gencost = gencost[pypsa_const["cost"]["rename"].values()] carriers = pd.DataFrame(index=generators.carrier.unique(), dtype=object) cars = carriers.index constants = grid.model_immutables.plants carriers["color"] = pd.Series(constants["type2color"]).reindex(cars) carriers["nice_name"] = pd.Series(constants["type2label"]).reindex(cars) carriers["co2_emissions"] = ( pd.Series(constants["carbon_per_mwh"]).div(1e3) * pd.Series(constants["efficiency"]) ).reindex(cars, fill_value=0) generators["efficiency"] = generators.carrier.map(constants["efficiency"]).fillna(0) # now time-dependent if scenario: dfs = [scenario.get_wind(), scenario.get_solar(), scenario.get_hydro()] p_max_pu = pd.concat(dfs, axis=1) p_nom = generators.p_nom[p_max_pu.columns] p_max_pu = p_max_pu / p_nom.where(p_nom != 0, 1) generators_t = {"p_max_pu": p_max_pu} # drop p_nom_min of renewables, make them non-committable generators.loc[p_max_pu.columns, "p_min_pu"] = 0 generators.loc[p_max_pu.columns, "committable"] = False else: generators_t = { v: generators.pop(k).to_frame("now").T for k, v in generator_rename_t.items() } # BRANCHES branch_rename = pypsa_const["branch"]["rename"] branch_rename_t = pypsa_const["branch"]["rename_t"] if not add_all_columns: drop_cols = pypsa_const["branch"]["default_drop_cols"] if scenario: drop_cols += list(branch_rename_t) branches = grid.branch.rename(columns=branch_rename).drop(columns=drop_cols) branches["v_nom"] = branches.bus0.map(buses.v_nom) # BE model assumes a 100 MVA base, pypsa "assumes" a 1 MVA base branches[["x_pu", "r_pu"]] /= 100 branches["x"] = branches.x_pu * branches.v_nom**2 branches["r"] = branches.r_pu * branches.v_nom**2 lines = branches.query("branch_device_type == 'Line'") lines = lines.drop(columns="branch_device_type") transformers = branches.query( "branch_device_type in ['TransformerWinding', 'Transformer']" ) if scenario: lines_t = {} transformers_t = {} else: lines_t = { v: lines.pop(k).to_frame("now").T for k, v in branch_rename_t.items() } transformers_t = { v: transformers.pop(k).to_frame("now").T for k, v in branch_rename_t.items() } # DC LINES link_rename = pypsa_const["link"]["rename"] link_rename_t = pypsa_const["link"]["rename_t"] if not add_all_columns: drop_cols = pypsa_const["link"]["default_drop_cols"] if scenario: drop_cols += list(link_rename_t) links = grid.dcline.rename(columns=link_rename).drop(columns=drop_cols) links.p_min_pu /= links.p_nom.where(links.p_nom != 0, 1) # SUBSTATION CONNECTORS sublinks = dict( bus0=buses.index, bus1=buses.substation.values, p_nom=np.inf, p_min_pu=-1 ) index = "sub" + pd.RangeIndex(len(buses)).astype(str) sublinks = pd.DataFrame(sublinks, index=index) if scenario: links_t = {} else: links_t = {v: links.pop(k).to_frame("now").T for k, v in link_rename_t.items()} # TODO: add storage export if not grid.storage["gen"].empty: warnings.warn("The export of storages are not implemented yet.") # Import everything to a new pypsa network n = pypsa.Network() if scenario: n.snapshots = loads_t["p_set"].index n.madd("Bus", buses.index, **buses, **buses_t) n.madd("Load", buses.index, bus=buses.index, **loads, **loads_t) n.madd("ShuntImpedance", buses.index, bus=buses.index, **shunts) n.madd("Generator", generators.index, **generators, **gencost, **generators_t) n.madd("Carrier", carriers.index, **carriers) n.madd("Line", lines.index, **lines, **lines_t) n.madd("Transformer", transformers.index, **transformers, **transformers_t) n.madd("Link", links.index, **links, **links_t) if add_substations: n.madd("Bus", substations.index, **substations) n.madd("Link", sublinks.index, **sublinks) if add_load_shedding: # Load shedding is moddelled by very costy generators whos power output # is measured in kW (see the factor `sign`). This keeps the coefficient # range in the LOPF low. n.madd( "Generator", buses.index, suffix=" load shedding", bus=buses.index, sign=1e-3, marginal_cost=1e2, p_nom=1e9, carrier="load", ) n.add("Carrier", "load", nice_name="Load Shedding", color="red") n.name = ", ".join([grid.data_loc] + grid.interconnect) return n