def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ prj_ramp_rates = get_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn ) df = cursor_to_df(prj_ramp_rates) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="Mid", errors=validate_values(df, ["lf_reserves_up_ramp_rate"], min=0, max=1) )
def validate_required_subscenario_ids(scenario_id, subscenarios, conn): """ Check whether the required subscenario_ids are specified in the db :param subscenarios: :param conn: :return: boolean, True if all required subscenario_ids are specified """ required_subscenario_ids = determine_subscenarios_by_feature(conn)["core"] errors = [] for sc_id in required_subscenario_ids: if getattr(subscenarios, sc_id) is None: errors.append( "'{}' is a required input in the 'scenarios' table".format( sc_id)) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id="N/A", stage_id="N/A", gridpath_module="N/A", db_table="scenarios", severity="High", errors=errors) # Return True if all required subscenario_ids are valid (list is empty) return not bool(errors)
def validate_data_dependent_subscenario_ids(scenario_id, subscenarios, conn): """ :param subscenarios: :param conn: :return: """ assert subscenarios.PROJECT_PORTFOLIO_SCENARIO_ID is not None req_cap_types = set( get_required_capacity_types_from_database(conn, scenario_id)) new_build_types = { "gen_new_lin,", "gen_new_bin", "stor_new_lin", "stor_new_bin", "dr_new" } existing_build_types = { "gen_spec", "gen_ret_bin", "gen_ret_lin", "stor_spec", } dr_types = {"dr_new"} # Determine required subscenario_ids sc_id_type = [] if bool(req_cap_types & new_build_types): sc_id_type.append(("PROJECT_NEW_COST_SCENARIO_ID", "New Build")) if bool(req_cap_types & existing_build_types): sc_id_type.append( ("PROJECT_SPECIFIED_CAPACITY_SCENARIO_ID", "Existing")) sc_id_type.append( ("PROJECT_SPECIFIED_FIXED_COST_SCENARIO_ID", "Existing")) if bool(req_cap_types & dr_types): sc_id_type.append( ("PROJECT_NEW_POTENTIAL_SCENARIO_ID", "Demand Response (DR)")) # Check whether required subscenario_ids are present errors = [] for sc_id, build_type in sc_id_type: if getattr(subscenarios, sc_id) is None: errors.append( "'{}' is a required input in the 'scenarios' table if there " "are '{}' resources in the portfolio".format( sc_id, build_type)) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id="N/A", stage_id="N/A", gridpath_module="N/A", db_table="scenarios", severity="High", errors=errors) # Return True if all required subscenario_ids are valid (list is empty) return not bool(errors)
def validate_hydro_opchars(scenario_id, subscenarios, subproblem, stage, conn, op_type): """ :param subscenarios: :param subproblem: :param stage: :param conn: :param op_type: :return: """ hydro_chars = get_hydro_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn, op_type ) # Convert input data into pandas DataFrame df = cursor_to_df(hydro_chars) value_cols = ["min_power_fraction", "average_power_fraction", "max_power_fraction"] # Check for missing inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_hydro_operational_chars", severity="High", errors=validate_missing_inputs(df, value_cols, ["project", "horizon"]), ) # Check for sign (should be percent fraction) hydro_opchar_fraction_error = write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_hydro_operational_chars", severity="Low", errors=validate_values(df, value_cols, min=0, max=1), ) # Check min <= avg <= sign write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_hydro_operational_chars", severity="Mid", errors=validate_column_monotonicity( df=df, cols=value_cols, idx_col=["project", "horizon"] ), ) return hydro_opchar_fraction_error
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # Validate operational chars table inputs validate_opchars(scenario_id, subscenarios, subproblem, stage, conn, "gen_simple") # Other module specific validations c = conn.cursor() heat_rates = c.execute(""" SELECT project, load_point_fraction FROM inputs_project_portfolios INNER JOIN (SELECT project, operational_type, heat_rate_curves_scenario_id FROM inputs_project_operational_chars WHERE project_operational_chars_scenario_id = {} AND operational_type = '{}') AS op_char USING(project) INNER JOIN (SELECT project, heat_rate_curves_scenario_id, load_point_fraction FROM inputs_project_heat_rate_curves) as heat_rates USING(project, heat_rate_curves_scenario_id) WHERE project_portfolio_scenario_id = {} """.format( subscenarios.PROJECT_OPERATIONAL_CHARS_SCENARIO_ID, "gen_simple", subscenarios.PROJECT_PORTFOLIO_SCENARIO_ID, )) # Convert inputs to dataframe hr_df = cursor_to_df(heat_rates) # Check that there is only one load point (constant heat rate) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_heat_rate_curves", severity="Mid", errors=validate_single_input( df=hr_df, msg="gen_simple can only have one load " "point (constant heat rate).", ), )
def validate_opchars(scenario_id, subscenarios, subproblem, stage, conn, op_type): """ :param subscenarios: :param subproblem: :param stage: :param conn: :param op_type: :return: """ # TODO: deal with fuel and variable O&M column, which are nor optional nor # required for any? # Get the opchar inputs for this operational type df = get_optype_inputs_from_db(scenario_id, subscenarios, conn, op_type) # Get the required, optional, and other columns with their types ( required_columns_types, optional_columns_types, other_columns_types, ) = get_optype_param_requirements(op_type=op_type) req_cols = required_columns_types.keys() na_cols = other_columns_types.keys() # Check that required inputs are present write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="High", errors=validate_req_cols(df, req_cols, True, op_type), ) # Check that other (not required or optional) inputs are not present write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="Low", errors=validate_req_cols(df, na_cols, False, op_type), ) # TODO: do data-type and numeric non-negativity checking here rather than # in project.init? # Return the opchar df (sometimes used for further validations) return df
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ project_zones = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Convert input data into pandas DataFrame df = cursor_to_df(project_zones) zones_w_project = df["prm_zone"].unique() # Get the required PRM zones # TODO: make this into a function similar to get_projects()? # could eventually centralize all these db query functions in one place c = conn.cursor() zones = c.execute("""SELECT prm_zone FROM inputs_geography_prm_zones WHERE prm_zone_scenario_id = {} """.format(subscenarios.PRM_ZONE_SCENARIO_ID)) zones = [z[0] for z in zones] # convert to list # Check that each PRM zone has at least one project assigned to it write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_prm_zones", severity="High", errors=validate_idxs( actual_idxs=zones_w_project, req_idxs=zones, idx_label="prm_zone", msg="Each PRM zone needs at least 1 project " "assigned to it.", ), ) # Make sure PRM type is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_missing_inputs(df, "prm_type"), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # timepoints = get_inputs_from_database( # scenario_id, subscenarios, subproblem, stage, conn) # validate timepoint inputs # TODO: could gather the timepoints from the previous stage (if any) and # check that the previous stage timepoint map inputs are valid c = conn.cursor() validation_data = c.execute(""" SELECT period, sum(number_of_hours_in_timepoint*timepoint_weight) as hours_in_period_timepoints FROM inputs_temporal WHERE temporal_scenario_id = {} AND spinup_or_lookahead = 0 AND stage_id = {} GROUP BY period;""".format(subscenarios.TEMPORAL_SCENARIO_ID, stage)).fetchall() for row in validation_data: period = row[0] hours_in_period_timepoints = row[1] if hours_in_period_timepoints not in [8760, 8766, 8784]: msg = """ Check timepoint parameters for period {}. Your timepoint parameters currently sum up to {}. In each period, regardless of period param values, the total number of hours in timepoints adjusted for timepoint weight and duration and excluding spinup and lookahead timepoints should be the number of hours in a year (8760, 8766, or 8784). This is to ensure consistent weighting of timepoint-level and period-level costs. """.format(str(period), str(hours_in_period_timepoints)) # Check values of hours_in_period_timepoints write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal", severity="High", errors=[msg], )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # TODO: check that hours in full period is within x and y # ("within" check or "validate" check in param definition returns obscure # error message that isn't helpful). periods = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(periods) # Get expected dtypes expected_dtypes = get_expected_dtypes(conn=conn, tables=["inputs_temporal_periods"]) # Hard-code data type for hours_in_period_timepoints expected_dtypes["hours_in_period_timepoints"] = "numeric" # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_periods", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_periods", severity="Mid", errors=validate_values(df, valid_numeric_columns, "period", min=0), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ ( min_max_builds, supply_curve_count, supply_curve_id, supply_curve, ) = get_model_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn ) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "dr_new") # Convert input data into pandas DataFrame df = cursor_to_df(min_max_builds) df_sc = cursor_to_df(supply_curve) dr_projects = df_sc["project"].unique() # Check for missing project potential inputs cols = ["min_cumulative_new_build_mwh", "max_cumulative_new_build_mwh"] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_missing_inputs(df, cols), ) # Check for missing supply curve inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_shiftable_load_supply_curve", severity="High", errors=validate_idxs( actual_idxs=dr_projects, req_idxs=projects, idx_label="project" ), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # Validate operational chars table inputs opchar_df = validate_opchars( scenario_id, subscenarios, subproblem, stage, conn, "gen_var_must_take" ) # Validate var profiles input table cap_factor_validation_error = validate_var_profiles( scenario_id, subscenarios, subproblem, stage, conn, "gen_var_must_take" ) if cap_factor_validation_error: warnings.warn( """ Found gen_var_must_take cap factors that are <0 or >1. This is allowed but this warning is here to make sure it is intended. """ ) # Other module specific validations # Check that the project does not show up in any of the # inputs_project_reserve_bas tables since gen_var_must_take can't # provide any reserves projects_by_reserve = get_projects_by_reserve(scenario_id, subscenarios, conn) for reserve, projects_w_ba in projects_by_reserve.items(): table = "inputs_project_" + reserve + "_bas" reserve_errors = validate_idxs( actual_idxs=opchar_df["project"], invalid_idxs=projects_w_ba, msg="gen_var_must_take cannot provide {}.".format(reserve), ) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table=table, severity="Mid", errors=reserve_errors, )
def validate_feature_subscenario_ids(scenario_id, subscenarios, optional_features, conn): """ :param subscenarios: :param optional_features: :param conn: :return: """ subscenario_ids_by_feature = determine_subscenarios_by_feature(conn) feature_list = optional_features.get_active_features() errors = {"High": [], "Low": []} # errors by severity for feature, subscenario_ids in subscenario_ids_by_feature.items(): if feature not in ["core", "optional", "data_dependent"]: for sc_id in subscenario_ids: # If the feature is requested, and the associated subscenarios # are not specified, raise a validation error if feature in feature_list and getattr(subscenarios, sc_id) == "NULL": errors["High"].append( "Requested feature '{}' requires an input for '{}'". format(feature, sc_id)) # If the feature is not requested, and the associated # subscenarios are specified, raise a validation error # TODO: need to add handling of subscenarios shared among # features; commenting out for now # elif feature not in feature_list and \ # getattr(subscenarios, sc_id) != "NULL": # errors["Low"].append( # "Detected inputs for '{}' while related feature '{}' " # "is not requested".format(sc_id, feature) # ) for severity, error_list in errors.items(): write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id="N/A", stage_id="N/A", gridpath_module="N/A", db_table="scenarios", severity=severity, errors=error_list, ) # Return True if all required subscenario_ids are valid (list is empty) return not bool(sum(errors.values(), []))
def validate_hours_in_subproblem_period(scenario_id, subscenarios, conn): """ :param subscenarios: :param conn: :return: """ sql = """ SELECT stage_id, period, n_hours, hours_in_full_period FROM (SELECT temporal_scenario_id, stage_id, period, sum(number_of_hours_in_timepoint * timepoint_weight) as n_hours FROM inputs_temporal WHERE spinup_or_lookahead IS NULL AND temporal_scenario_id = {} GROUP BY temporal_scenario_id, stage_id, period ) AS tmp_table INNER JOIN (SELECT temporal_scenario_id, period, hours_in_full_period FROM inputs_temporal_periods) AS period_tbl USING (temporal_scenario_id, period) """.format(subscenarios.TEMPORAL_SCENARIO_ID) df = pd.read_sql(sql, con=conn) msg = """Total number of hours in timepoints adjusted for timepoint weight and duration and excluding spinup and lookahead timepoints should match hours_in_full_period.""" write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id="N/A", stage_id="N/A", gridpath_module="N/A", db_table="inputs_temporal", severity="Mid", errors=validate_cols_equal( df=df, col1="n_hours", col2="hours_in_full_period", idx_col=["stage_id", "period"], msg=msg ) )
def validate_var_profiles(scenario_id, subscenarios, subproblem, stage, conn, op_type): """ :param subscenarios: :param subproblem: :param stage: :param conn: :param op_type: :return: """ var_profiles = get_var_profile_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn, op_type ) # Convert input data into pandas DataFrame df = cursor_to_df(var_profiles) value_cols = ["cap_factor"] # Check for missing inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_variable_generator_profiles", severity="High", errors=validate_missing_inputs(df, value_cols, ["project", "timepoint"]), ) # Check for sign (should be percent fraction) cap_factor_validation_error = write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_variable_generator_profiles", severity="Low", errors=validate_values(df, ["cap_factor"], min=0, max=1), ) return cap_factor_validation_error
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ project_zone_dur = get_model_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(project_zone_dur) cols = ["min_duration_for_full_capacity_credit_hours"] # Make sure param sign is as expected write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_values(df, cols, min=0, strict_min=True), ) # Make sure param is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_missing_inputs(df, cols), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ :param subscenarios: :param subproblem: :param stage: :param conn: :return: """ availabilities = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(availabilities) idx_cols = ["project", "timepoint"] value_cols = ["availability_derate"] # Check data types availability expected_dtypes = get_expected_dtypes(conn, [ "inputs_project_availability", "inputs_project_availability_exogenous" ]) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_exogenous", severity="High", errors=dtype_errors, ) # Check for missing inputs msg = ("If not specified, availability is assumed to be 100%. If you " "don't want to specify any availability derates, simply leave the " "exogenous_availability_scenario_id empty and this message will " "disappear.") write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_exogenous", severity="Low", errors=validate_missing_inputs(df, value_cols, idx_cols, msg), ) # Check for correct sign if "availability" not in error_columns: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_exogenous", severity="High", errors=validate_values(df, value_cols, min=0, max=1), )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ :param subscenarios: :param subproblem: :param stage: :param conn: :return: """ params = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(params) # Check data types availability expected_dtypes = get_expected_dtypes(conn, [ "inputs_project_availability", "inputs_project_availability_endogenous" ]) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_endogenous", severity="High", errors=dtype_errors) # Check for missing inputs msg = "" value_cols = [ "unavailable_hours_per_period", "unavailable_hours_per_event_min", "available_hours_between_events_min" ] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_endogenous", severity="Low", errors=validate_missing_inputs(df, value_cols, "project", msg)) cols = ["unavailable_hours_per_event_min", "unavailable_hours_per_period"] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_endogenous", severity="High", errors=validate_column_monotonicity(df=df, cols=cols, idx_col=["project"]))
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ project_fractions = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(project_fractions) # Make sure fraction is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_values(df, ["elcc_simple_fraction"], min=0, max=1), ) # Make sure fraction is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_missing_inputs(df, "elcc_simple_fraction"), )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ tx_cost = get_module_specific_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) tx_lines = get_tx_lines(conn, scenario_id, subscenarios, "capacity_type", "tx_new_lin") # Convert input data into pandas DataFrame df = cursor_to_df(tx_cost) # get the tx lines lists tx_lines_w_cost = df["transmission_line"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_new_cost"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_new_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_new_cost", severity="High", errors=validate_values(df, valid_numeric_columns, "transmission_line", min=0)) # Check that all binary new build tx lines are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_new_cost", severity="Mid", errors=validate_idxs( actual_idxs=tx_lines_w_cost, req_idxs=tx_lines, idx_label="transmission_line", msg=msg))
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ hrzs, hrz_tmps = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) c = conn.cursor() periods_horizons = c.execute(""" SELECT balancing_type_horizon, period, horizon FROM periods_horizons WHERE temporal_scenario_id = {} AND subproblem_id = {} and stage_id = {} """.format(subscenarios.TEMPORAL_SCENARIO_ID, subproblem, stage)) df_hrzs = cursor_to_df(hrzs) df_hrz_tmps = cursor_to_df(hrz_tmps) df_periods_hrzs = cursor_to_df(periods_horizons) # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_temporal_horizons", "inputs_temporal_horizon_timepoints" ]) # Check dtypes horizons dtype_errors, error_columns = validate_dtypes(df_hrzs, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizons", severity="High", errors=dtype_errors) # Check dtypes horizon_timepoints dtype_errors, error_columns = validate_dtypes(df_hrz_tmps, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative - horizons numeric_columns = [ c for c in df_hrzs.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizons", severity="Mid", errors=validate_values(df_hrzs, valid_numeric_columns, "horizon", min=0)) # Check valid numeric columns are non-negative - horizon_timepoints numeric_columns = [ c for c in df_hrzs.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="Mid", errors=validate_values(df_hrz_tmps, valid_numeric_columns, ["horizon", "timepoint"], min=0)) # One horizon cannot straddle multiple periods msg = "All timepoints within a horizon should belong to the same period." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="High", errors=validate_single_input( df=df_periods_hrzs, idx_col=["balancing_type_horizon", "horizon"], msg=msg)) # Make sure there are no missing horizon inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="High", errors=validate_missing_inputs( df=df_hrz_tmps, col="horizon", idx_col=["balancing_type_horizon", "timepoint"]))
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # Get the binary build generator inputs new_gen_costs, new_build_size = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "gen_new_bin") # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_gen_costs) bld_size_df = cursor_to_df(new_build_size) # get the project lists cost_projects = cost_df["project"].unique() bld_size_projects = bld_size_df["project"] # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_project_new_cost", "inputs_project_new_binary_build_size"] ) # Check dtypes - cost_df dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors ) # Check valid numeric columns are non-negative - cost_df numeric_columns = [c for c in cost_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0) ) # Check dtypes - bld_size_df dtype_errors, error_columns = validate_dtypes(bld_size_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=dtype_errors ) # Check valid numeric columns are non-negative - bld_size_df numeric_columns = [c for c in bld_size_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_values(bld_size_df, valid_numeric_columns, min=0) ) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="Mid", errors=validate_idxs(actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg) ) # Check that all binary new build projects have build size specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_idxs(actual_idxs=bld_size_projects, req_idxs=projects, idx_label="project") )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ c = conn.cursor() # Get the project inputs projects = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Convert input data into pandas DataFrame df = cursor_to_df(projects) # Check data types: expected_dtypes = get_expected_dtypes( conn, ["inputs_project_portfolios", "inputs_project_availability", "inputs_project_load_zones", "inputs_project_operational_chars"] ) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars, inputs_project_portfolios", severity="High", errors=dtype_errors ) # Check valid numeric columns are non-negative numeric_columns = [c for c in df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="High", errors=validate_values(df, valid_numeric_columns, min=0) ) # Check that we're not combining incompatible cap-types and op-types cols = ["capacity_type", "operational_type"] invalid_combos = c.execute( """ SELECT {} FROM mod_capacity_and_operational_type_invalid_combos """.format(",".join(cols)) ).fetchall() write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars, inputs_project_portfolios", severity="High", errors=validate_columns(df, cols, invalids=invalid_combos) ) # Check that capacity type is valid # Note: foreign key already ensures this! valid_cap_types = c.execute( """SELECT capacity_type from mod_capacity_types""" ).fetchall() valid_cap_types = [v[0] for v in valid_cap_types] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_portfolios", severity="High", errors=validate_columns(df, "capacity_type", valids=valid_cap_types) ) # Check that operational type is valid # Note: foreign key already ensures this! valid_op_types = c.execute( """SELECT operational_type from mod_operational_types""" ).fetchall() valid_op_types = [v[0] for v in valid_op_types] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_portfolios", severity="High", errors=validate_columns(df, "operational_type", valids=valid_op_types) ) # Check that all portfolio projects are present in the availability inputs msg = "All projects in the portfolio should have an availability type " \ "specified in the inputs_project_availability table." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability", severity="High", errors=validate_missing_inputs(df, "availability_type", msg=msg) ) # Check that all portfolio projects are present in the opchar inputs msg = "All projects in the portfolio should have an operational type " \ "and balancing type specified in the " \ "inputs_project_operational_chars table." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="High", errors=validate_missing_inputs(df, ["operational_type", "balancing_type_project"], msg=msg) ) # Check that all portfolio projects are present in the load zone inputs msg = "All projects in the portfolio should have a load zone " \ "specified in the inputs_project_load_zones table." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_load_zones", severity="High", errors=validate_missing_inputs(df, "load_zone", msg=msg) )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # Get the fuel input data fuels, fuel_prices = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Get the projects fuels c1 = conn.cursor() projects = c1.execute("""SELECT project, fuel FROM inputs_project_portfolios INNER JOIN (SELECT project, fuel FROM inputs_project_operational_chars WHERE project_operational_chars_scenario_id = {} AND fuel IS NOT NULL) AS op_char USING (project) WHERE project_portfolio_scenario_id = {}""".format( subscenarios.PROJECT_OPERATIONAL_CHARS_SCENARIO_ID, subscenarios.PROJECT_PORTFOLIO_SCENARIO_ID)) # Get the relevant periods and months c2 = conn.cursor() periods_months = c2.execute("""SELECT DISTINCT period, month FROM inputs_temporal WHERE temporal_scenario_id = {} AND subproblem_id = {} AND stage_id = {};""".format(subscenarios.TEMPORAL_SCENARIO_ID, subproblem, stage)) # Convert input data into pandas DataFrame fuels_df = cursor_to_df(fuels) fuel_prices_df = cursor_to_df(fuel_prices) prj_df = cursor_to_df(projects) # Get relevant lists fuels = fuels_df["fuel"].to_list() actual_fuel_periods_months = list( fuel_prices_df[["fuel", "period", "month"]].itertuples(index=False, name=None)) req_fuel_periods_months = [(f, p, m) for (p, m) in periods_months for f in fuels] # Check data types expected_dtypes = get_expected_dtypes( conn, ["inputs_project_fuels", "inputs_project_fuel_prices"]) dtype_errors, error_columns = validate_dtypes(fuels_df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_fuels", severity="High", errors=dtype_errors) dtype_errors, error_columns = validate_dtypes(fuel_prices_df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_fuel_prices", severity="High", errors=dtype_errors) # TODO: couldn't this be a simple foreign key or is NULL not allowed then? # TODO: should this check be in projects.init instead? # Check that fuels specified for projects are valid fuels write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="High", errors=validate_columns(prj_df, "fuel", valids=fuels)) # Check that fuel prices exist for the period and month write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_fuel_prices", severity="High", errors=validate_idxs( actual_idxs=actual_fuel_periods_months, req_idxs=req_fuel_periods_months, idx_label="(fuel, period, month)"))
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ hurdle_rates = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(hurdle_rates) # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_hurdle_rates"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_hurdle_rates", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_hurdle_rates", severity="High", errors=validate_values(df, valid_numeric_columns, "transmission_line", min=0), ) # Check that all binary new build tx lines are available in >=1 vintage msg = ("Expected hurdle rates specified for each modeling period when " "transmission hurdle rates feature is on.") cols = [ "hurdle_rate_positive_direction_per_mwh", "hurdle_rate_negative_direction_per_mwh", ] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_hurdle_rates", severity="Low", errors=validate_missing_inputs(df=df, col=cols, idx_col=["transmission_line", "period"], msg=msg), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ c = conn.cursor() # Get the transmission inputs transmission_lines = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Convert input data into pandas DataFrame df = cursor_to_df(transmission_lines) # Check data types: expected_dtypes = get_expected_dtypes( conn, [ "inputs_transmission_portfolios", "inputs_transmission_availability", "inputs_transmission_load_zones", "inputs_transmission_operational_chars", ], ) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_portfolios, " "inputs_transmission_load_zones, " "inputs_transmission_operational_chars", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_operational_chars", severity="High", errors=validate_values(df, valid_numeric_columns, min=0), ) # Ensure we're not combining incompatible capacity and operational types cols = ["capacity_type", "operational_type"] invalid_combos = c.execute(""" SELECT {} FROM mod_tx_capacity_and_tx_operational_type_invalid_combos """.format(",".join(cols))).fetchall() write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table= "inputs_transmission_operational_chars, inputs_tranmission_portfolios", severity="High", errors=validate_columns(df, cols, invalids=invalid_combos), ) # Check reactance > 0 write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_operational_chars", severity="High", errors=validate_values(df, ["reactance_ohms"], min=0, strict_min=True), ) # Check that all portfolio tx lines are present in the opchar inputs msg = ("All tx lines in the portfolio should have an operational type " "specified in the inputs_transmission_operational_chars table.") write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_operational_chars", severity="High", errors=validate_missing_inputs(df, ["operational_type"], idx_col="transmission_line", msg=msg), ) # Check that all portfolio tx lines are present in the load zone inputs msg = ("All tx lines in the portfolio should have a load zone from/to " "specified in the inputs_transmission_load_zones table.") write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_load_zones", severity="High", errors=validate_missing_inputs(df, ["load_zone_from", "load_zone_to"], idx_col="transmission_line", msg=msg), ) # Check that all tx load zones are part of the active load zones load_zones = get_load_zones(conn, subscenarios) for col in ["load_zone_from", "load_zone_to"]: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_load_zones", severity="High", errors=validate_columns(df, col, valids=load_zones), )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ tx_capacities = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) tx_lines = get_tx_lines(conn, scenario_id, subscenarios, "capacity_type", "tx_spec") # Convert input data into pandas DataFrame and extract data df = cursor_to_df(tx_capacities) spec_tx_lines = df["transmission_line"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_specified_capacity"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=dtype_errors) # Ensure tx_line capacity is specified in at least 1 period msg = "Expected specified capacity for at least one period." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=validate_idxs(actual_idxs=spec_tx_lines, req_idxs=tx_lines, idx_label="transmission_line", msg=msg)) # Check for missing values (vs. missing row entries above) cols = ["min_mw", "max_mw"] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=validate_missing_inputs(df, cols)) # check that min <= max write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=df, cols=cols, idx_col=["project", "period"]))
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ new_stor_costs = get_model_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn ) projects = get_projects( conn, scenario_id, subscenarios, "capacity_type", "stor_new_lin" ) # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_stor_costs) df_cols = cost_df.columns # get the project lists cost_projects = cost_df["project"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_project_new_cost", "inputs_project_new_potential"] ) # Check dtypes dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [c for c in cost_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0), ) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="Mid", errors=validate_idxs( actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg ), ) cols = ["min_cumulative_new_build_mw", "max_cumulative_new_build_mw"] # Check that maximum new build doesn't decrease if cols[1] in df_cols: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="Mid", errors=validate_row_monotonicity( df=cost_df, col=cols[1], rank_col="vintage" ), ) # check that min build <= max build if set(cols).issubset(set(df_cols)): write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=cost_df, cols=cols, idx_col=["project", "vintage"] ), ) cols = ["min_cumulative_new_build_mwh", "max_cumulative_new_build_mwh"] # Check that maximum new build doesn't decrease - MWh if cols[1] in df_cols: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="Mid", errors=validate_row_monotonicity( df=cost_df, col=cols[1], rank_col="vintage" ), ) # check that min build <= max build - MWh if set(cols).issubset(set(df_cols)): write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=cost_df, cols=cols, idx_col=["project", "vintage"] ), )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # TODO: check that there are no minimum duration inputs for this type # (duration is specified by specifying the build size in mw and mwh) # Maybe also check all other required / not required inputs? # --> see example in gen_must_run operational_type. Seems very verbose and # hard to maintain. Is there a way to generalize this? # Get the binary build generator inputs new_stor_costs, new_stor_build_size = \ get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "stor_new_bin") # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_stor_costs) bld_size_df = cursor_to_df(new_stor_build_size) # get the project lists cost_projects = cost_df["project"].unique() bld_size_projects = bld_size_df["project"] # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_project_new_cost", "inputs_project_new_binary_build_size" ]) # Check dtypes - cost_df dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative - cost_df numeric_columns = [ c for c in cost_df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0)) # Check dtypes - bld_size_df dtype_errors, error_columns = validate_dtypes(bld_size_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative - bld_size_df numeric_columns = [ c for c in bld_size_df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_values(bld_size_df, valid_numeric_columns, min=0)) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_idxs( actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg)) # Check that all binary new build projects have build size specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_idxs(actual_idxs=bld_size_projects, req_idxs=projects, idx_label="project"))
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ gen_ret_bin_params = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "gen_ret_bin") # Convert input data into pandas DataFrame and extract data df = cursor_to_df(gen_ret_bin_params) spec_projects = df["project"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_project_specified_capacity", "inputs_project_specified_fixed_cost" ]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_values(df, valid_numeric_columns, min=0)) # Ensure project capacity & fixed cost is specified in at least 1 period msg = "Expected specified capacity & fixed costs for at least one period." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_idxs( actual_idxs=spec_projects, req_idxs=projects, idx_label="project", msg=msg)) # Check for missing values (vs. missing row entries above) cols = ["specified_capacity_mw", "annual_fixed_cost_per_mw_year"] write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_missing_inputs(df, cols))
def generic_validate_project_bas(scenario_id, subscenarios, subproblem, stage, conn, reserve_type, project_ba_subscenario_id, ba_subscenario_id): """ :param subscenarios: :param subproblem: :param stage: :param conn: :param reserve_type: :param project_ba_subscenario_id: :param ba_subscenario_id: :return: """ # TODO: is this actually needed? subproblem = 1 if subproblem == "" else subproblem stage = 1 if stage == "" else stage project_bas, prj_derates = generic_get_inputs_from_database( scenario_id=scenario_id, subscenarios=subscenarios, subproblem=subproblem, stage=stage, conn=conn, reserve_type=reserve_type, project_ba_subscenario_id=project_ba_subscenario_id, ba_subscenario_id=ba_subscenario_id) # Convert input data into pandas DataFrame df = cursor_to_df(project_bas) df_derate = cursor_to_df(prj_derates).dropna() bas_w_project = df["{}_ba".format(reserve_type)].unique() projects_w_ba = df["project"].unique() projects_w_derate = df_derate["project"].unique() # Get the required reserve bas c = conn.cursor() bas = c.execute("""SELECT {}_ba FROM inputs_geography_{}_bas WHERE {}_ba_scenario_id = {} """.format(reserve_type, reserve_type, reserve_type, subscenarios.REGULATION_UP_BA_SCENARIO_ID)) bas = [b[0] for b in bas] # convert to list # Check that each reserve BA has at least one project assigned to it write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_{}_bas".format(reserve_type), severity="High", errors=validate_idxs(actual_idxs=bas_w_project, req_idxs=bas, idx_label="{}_ba".format(reserve_type), msg="Each reserve BA needs at least 1 " "project assigned to it.")) # Check that all projects w derates have a BA specified msg = "Project has a reserve derate specified but no relevant BA." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="Low", errors=validate_idxs( actual_idxs=projects_w_ba, req_idxs=projects_w_derate, idx_label="project", msg=msg))