def decompose_plant_data_frame_into_areas(df, areas, grid): """Take a plant-column data frame and decompose it into plant-column data frames for areas. :param pandas.DataFrame df: data frame, columns are plant id in grid. :param dict areas: areas to use for decomposition. Keys are area types ('*loadzone*', '*state*', or '*interconnect*'), values are str/list/tuple/set of areas. :param powersimdata.input.grid.Grid grid: Grid instance. :return: (*dict*) -- keys are areas, values are plant-column data frames. """ _check_data_frame(df, "PG") plant_id = set(df.columns) _check_plants_are_in_grid(plant_id, grid) areas = _check_areas_are_in_grid_and_format(areas, grid) df_areas = {} for k, v in areas.items(): if k == "interconnect": for i in v: name = "%s interconnect" % " - ".join(i.split("_")) df_areas[name] = df[get_plant_id_in_interconnects(i, grid) & plant_id] elif k == "state": for s in v: df_areas[s] = df[get_plant_id_in_states(s, grid) & plant_id] elif k == "loadzone": for l in v: df_areas[l] = df[get_plant_id_in_loadzones(l, grid) & plant_id] return df_areas
def calculate_branch_difference(branch1, branch2): """Calculate the capacity differences between two branch data frames. If capacity in ``branch2`` is larger than capacity in ``branch1``, the return will be positive. :param pandas.DataFrame branch1: first branch data frame. :param pandas.DataFrame branch2: second branch data frame. :param float/int difference_threshold: drop any changes less than this value from the returned Series. :return: (*pandas.Series*) -- capacity difference between the two branch data frames. """ _check_data_frame(branch1, "branch1") _check_data_frame(branch2, "branch2") if not ("rateA" in branch1.columns) and ("rateA" in branch2.columns): raise ValueError("branch1 and branch2 both must have 'rateA' columns") branch1, branch2 = _reindex_as_necessary( branch1, branch2, ["from_bus_id", "to_bus_id"] ) branch_merge = branch1.merge( branch2, how="outer", right_index=True, left_index=True, suffixes=(None, "_2") ) branch_merge["diff"] = branch_merge.rateA_2.fillna(0) - branch_merge.rateA.fillna(0) # Ensure that lats & lons get filled in as necessary from branch2 entries for l in ["from_lat", "from_lon", "to_lat", "to_lon"]: branch_merge[l].fillna(branch_merge[f"{l}_2"], inplace=True) return branch_merge
def test_check_data_frame_argument_value(): arg = ( (pd.DataFrame({"California": [], "Texas": []}), "row"), (pd.DataFrame({}), "col"), ) for a in arg: with pytest.raises(ValueError): _check_data_frame(a[0], a[1])
def test_check_data_frame_argument_type(): arg = ( (1, "int"), ("homer", "str"), ({"homer", "marge", "bart", "lida"}, "set"), (pd.DataFrame({"California": [1, 2, 3], "Texas": [4, 5, 6]}), 123456), ) for a in arg: with pytest.raises(TypeError): _check_data_frame(a[0], a[1])
def summarize_plant_to_location(df, grid): """Take a plant-column data frame and sum to a location-column data frame. :param pandas.DataFrame df: dataframe, columns are plant id in grid. :param powersimdata.input.grid.Grid grid: Grid instance. :return: (*pandas.DataFrame*) -- index: df index, columns: location tuples. """ _check_data_frame(df, "PG") _check_grid_type(grid) _check_plants_are_in_grid(df.columns.to_list(), grid) all_locations = grid.plant[["lat", "lon"]] locations_in_df = all_locations.loc[df.columns].to_records(index=False) location_data = df.groupby(locations_in_df, axis=1).sum() return location_data
def decompose_plant_data_frame_into_resources(df, resources, grid): """Take a plant-column data frame and decompose it into plant-column data frames for each resource. :param pandas.DataFrame df: data frame, columns are plant id in grid. :param str/list/tuple/set resources: resource(s) to use for decomposition. :param powersimdata.input.grid.Grid grid: Grid instance. :return: (*dict*) -- keys are resources, values are plant-column data frames. """ _check_data_frame(df, "PG") plant_id = set(df.columns) _check_plants_are_in_grid(plant_id, grid) resources = _check_resources_are_in_grid_and_format(resources, grid) df_resources = { r: df[get_plant_id_for_resources(r, grid) & plant_id].sort_index(axis=1) for r in resources } return df_resources
def summarize_plant_to_bus(df, grid, all_buses=False): """Take a plant-column data frame and sum to a bus-column data frame. :param pandas.DataFrame df: dataframe, columns are plant id in grid. :param powersimdata.input.grid.Grid grid: Grid instance. :param boolean all_buses: return all buses in grid, not just plant buses. :return: (*pandas.DataFrame*) -- index as df input, columns are buses. """ _check_data_frame(df, "PG") _check_grid_type(grid) _check_plants_are_in_grid(df.columns.to_list(), grid) all_buses_in_grid = grid.plant["bus_id"] buses_in_df = all_buses_in_grid.loc[df.columns] bus_data = df.T.groupby(buses_in_df).sum().T if all_buses: bus_data = pd.DataFrame(bus_data, columns=grid.bus.index, index=df.index).fillna(0.0) return bus_data
def summarize_hist_gen( hist_gen_raw: pd.DataFrame, all_resources: list, grid_model="usa_tamu" ) -> pd.DataFrame: """Get the total historical generation for each generator type and state combination, adding totals for interconnects and for all states. :param pandas.DataFrame hist_gen_raw: historical generation data frame. Columns are resources and indices are either state or load zone. :param list all_resources: list of resources. :param str grid_model: grid_model :return: (*pandas.DataFrame*) -- historical generation per resource. """ _check_data_frame(hist_gen_raw, "PG") filtered_colnames = _check_resources_and_format( all_resources, grid_model=grid_model ) mi = ModelImmutables(grid_model) result = hist_gen_raw.copy() # Interconnection eastern_areas = ( set([mi.zones["abv2state"][s] for s in mi.zones["interconnect2abv"]["Eastern"]]) | mi.zones["interconnect2loadzone"]["Eastern"] ) eastern = result.loc[result.index.isin(eastern_areas)].sum() ercot_areas = mi.zones["interconnect2loadzone"]["Texas"] ercot = result.loc[result.index.isin(ercot_areas)].sum() western_areas = ( set([mi.zones["abv2state"][s] for s in mi.zones["interconnect2abv"]["Western"]]) | mi.zones["interconnect2loadzone"]["Western"] ) western = result.loc[result.index.isin(western_areas)].sum() # State def _groupby_state(index: str) -> str: """Use state as a dict key if index is a smaller region (e.g. Texas East), otherwise use the given index. :param str index: either a state name or region within a state. :return: (*str*) -- the corresponding state name. """ return ( mi.zones["loadzone2state"][index] if index in mi.zones["loadzone2state"] else index ) result = result.groupby(by=_groupby_state).aggregate(np.sum) # Summary all = result.sum() result.loc["Eastern interconnection"] = eastern result.loc["Western interconnection"] = western result.loc["Texas interconnection"] = ercot result.loc["All"] = all result = result.loc[:, filtered_colnames] result.rename(columns=mi.plants["type2label"], inplace=True) return result
def test_check_data_frame(): _check_data_frame( pd.DataFrame({"California": [1, 2, 3], "Texas": [4, 5, 6]}), "pandas.DataFrame" )