def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ prj_ramp_rates = get_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn ) df = cursor_to_df(prj_ramp_rates) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="Mid", errors=validate_values(df, ["lf_reserves_up_ramp_rate"], min=0, max=1) )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ :param subscenarios: :param subproblem: :param stage: :param conn: :return: """ availabilities = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(availabilities) idx_cols = ["project", "timepoint"] value_cols = ["availability_derate"] # Check data types availability expected_dtypes = get_expected_dtypes(conn, [ "inputs_project_availability", "inputs_project_availability_exogenous" ]) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_exogenous", severity="High", errors=dtype_errors, ) # Check for missing inputs msg = ("If not specified, availability is assumed to be 100%. If you " "don't want to specify any availability derates, simply leave the " "exogenous_availability_scenario_id empty and this message will " "disappear.") write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_exogenous", severity="Low", errors=validate_missing_inputs(df, value_cols, idx_cols, msg), ) # Check for correct sign if "availability" not in error_columns: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_exogenous", severity="High", errors=validate_values(df, value_cols, min=0, max=1), )
def validate_hydro_opchars(scenario_id, subscenarios, subproblem, stage, conn, op_type): """ :param subscenarios: :param subproblem: :param stage: :param conn: :param op_type: :return: """ hydro_chars = get_hydro_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn, op_type ) # Convert input data into pandas DataFrame df = cursor_to_df(hydro_chars) value_cols = ["min_power_fraction", "average_power_fraction", "max_power_fraction"] # Check for missing inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_hydro_operational_chars", severity="High", errors=validate_missing_inputs(df, value_cols, ["project", "horizon"]), ) # Check for sign (should be percent fraction) hydro_opchar_fraction_error = write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_hydro_operational_chars", severity="Low", errors=validate_values(df, value_cols, min=0, max=1), ) # Check min <= avg <= sign write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_hydro_operational_chars", severity="Mid", errors=validate_column_monotonicity( df=df, cols=value_cols, idx_col=["project", "horizon"] ), ) return hydro_opchar_fraction_error
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # TODO: check that hours in full period is within x and y # ("within" check or "validate" check in param definition returns obscure # error message that isn't helpful). periods = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(periods) # Get expected dtypes expected_dtypes = get_expected_dtypes(conn=conn, tables=["inputs_temporal_periods"]) # Hard-code data type for hours_in_period_timepoints expected_dtypes["hours_in_period_timepoints"] = "numeric" # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_periods", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_periods", severity="Mid", errors=validate_values(df, valid_numeric_columns, "period", min=0), )
def validate_var_profiles(scenario_id, subscenarios, subproblem, stage, conn, op_type): """ :param subscenarios: :param subproblem: :param stage: :param conn: :param op_type: :return: """ var_profiles = get_var_profile_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn, op_type ) # Convert input data into pandas DataFrame df = cursor_to_df(var_profiles) value_cols = ["cap_factor"] # Check for missing inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_variable_generator_profiles", severity="High", errors=validate_missing_inputs(df, value_cols, ["project", "timepoint"]), ) # Check for sign (should be percent fraction) cap_factor_validation_error = write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_variable_generator_profiles", severity="Low", errors=validate_values(df, ["cap_factor"], min=0, max=1), ) return cap_factor_validation_error
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ project_zone_dur = get_model_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(project_zone_dur) cols = ["min_duration_for_full_capacity_credit_hours"] # Make sure param sign is as expected write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_values(df, cols, min=0, strict_min=True), ) # Make sure param is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_missing_inputs(df, cols), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ project_fractions = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(project_fractions) # Make sure fraction is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_values(df, ["elcc_simple_fraction"], min=0, max=1), ) # Make sure fraction is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_missing_inputs(df, "elcc_simple_fraction"), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ new_stor_costs = get_model_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn ) projects = get_projects( conn, scenario_id, subscenarios, "capacity_type", "stor_new_lin" ) # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_stor_costs) df_cols = cost_df.columns # get the project lists cost_projects = cost_df["project"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_project_new_cost", "inputs_project_new_potential"] ) # Check dtypes dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [c for c in cost_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0), ) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="Mid", errors=validate_idxs( actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg ), ) cols = ["min_cumulative_new_build_mw", "max_cumulative_new_build_mw"] # Check that maximum new build doesn't decrease if cols[1] in df_cols: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="Mid", errors=validate_row_monotonicity( df=cost_df, col=cols[1], rank_col="vintage" ), ) # check that min build <= max build if set(cols).issubset(set(df_cols)): write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=cost_df, cols=cols, idx_col=["project", "vintage"] ), ) cols = ["min_cumulative_new_build_mwh", "max_cumulative_new_build_mwh"] # Check that maximum new build doesn't decrease - MWh if cols[1] in df_cols: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="Mid", errors=validate_row_monotonicity( df=cost_df, col=cols[1], rank_col="vintage" ), ) # check that min build <= max build - MWh if set(cols).issubset(set(df_cols)): write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=cost_df, cols=cols, idx_col=["project", "vintage"] ), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ c = conn.cursor() # Get the transmission inputs transmission_lines = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Convert input data into pandas DataFrame df = cursor_to_df(transmission_lines) # Check data types: expected_dtypes = get_expected_dtypes( conn, [ "inputs_transmission_portfolios", "inputs_transmission_availability", "inputs_transmission_load_zones", "inputs_transmission_operational_chars", ], ) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_portfolios, " "inputs_transmission_load_zones, " "inputs_transmission_operational_chars", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_operational_chars", severity="High", errors=validate_values(df, valid_numeric_columns, min=0), ) # Ensure we're not combining incompatible capacity and operational types cols = ["capacity_type", "operational_type"] invalid_combos = c.execute(""" SELECT {} FROM mod_tx_capacity_and_tx_operational_type_invalid_combos """.format(",".join(cols))).fetchall() write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table= "inputs_transmission_operational_chars, inputs_tranmission_portfolios", severity="High", errors=validate_columns(df, cols, invalids=invalid_combos), ) # Check reactance > 0 write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_operational_chars", severity="High", errors=validate_values(df, ["reactance_ohms"], min=0, strict_min=True), ) # Check that all portfolio tx lines are present in the opchar inputs msg = ("All tx lines in the portfolio should have an operational type " "specified in the inputs_transmission_operational_chars table.") write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_operational_chars", severity="High", errors=validate_missing_inputs(df, ["operational_type"], idx_col="transmission_line", msg=msg), ) # Check that all portfolio tx lines are present in the load zone inputs msg = ("All tx lines in the portfolio should have a load zone from/to " "specified in the inputs_transmission_load_zones table.") write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_load_zones", severity="High", errors=validate_missing_inputs(df, ["load_zone_from", "load_zone_to"], idx_col="transmission_line", msg=msg), ) # Check that all tx load zones are part of the active load zones load_zones = get_load_zones(conn, subscenarios) for col in ["load_zone_from", "load_zone_to"]: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_load_zones", severity="High", errors=validate_columns(df, col, valids=load_zones), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ c = conn.cursor() # Get the project inputs projects = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Convert input data into pandas DataFrame df = cursor_to_df(projects) # Check data types: expected_dtypes = get_expected_dtypes( conn, ["inputs_project_portfolios", "inputs_project_availability", "inputs_project_load_zones", "inputs_project_operational_chars"] ) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars, inputs_project_portfolios", severity="High", errors=dtype_errors ) # Check valid numeric columns are non-negative numeric_columns = [c for c in df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="High", errors=validate_values(df, valid_numeric_columns, min=0) ) # Check that we're not combining incompatible cap-types and op-types cols = ["capacity_type", "operational_type"] invalid_combos = c.execute( """ SELECT {} FROM mod_capacity_and_operational_type_invalid_combos """.format(",".join(cols)) ).fetchall() write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars, inputs_project_portfolios", severity="High", errors=validate_columns(df, cols, invalids=invalid_combos) ) # Check that capacity type is valid # Note: foreign key already ensures this! valid_cap_types = c.execute( """SELECT capacity_type from mod_capacity_types""" ).fetchall() valid_cap_types = [v[0] for v in valid_cap_types] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_portfolios", severity="High", errors=validate_columns(df, "capacity_type", valids=valid_cap_types) ) # Check that operational type is valid # Note: foreign key already ensures this! valid_op_types = c.execute( """SELECT operational_type from mod_operational_types""" ).fetchall() valid_op_types = [v[0] for v in valid_op_types] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_portfolios", severity="High", errors=validate_columns(df, "operational_type", valids=valid_op_types) ) # Check that all portfolio projects are present in the availability inputs msg = "All projects in the portfolio should have an availability type " \ "specified in the inputs_project_availability table." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability", severity="High", errors=validate_missing_inputs(df, "availability_type", msg=msg) ) # Check that all portfolio projects are present in the opchar inputs msg = "All projects in the portfolio should have an operational type " \ "and balancing type specified in the " \ "inputs_project_operational_chars table." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="High", errors=validate_missing_inputs(df, ["operational_type", "balancing_type_project"], msg=msg) ) # Check that all portfolio projects are present in the load zone inputs msg = "All projects in the portfolio should have a load zone " \ "specified in the inputs_project_load_zones table." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_load_zones", severity="High", errors=validate_missing_inputs(df, "load_zone", msg=msg) )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ hurdle_rates = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(hurdle_rates) # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_hurdle_rates"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_hurdle_rates", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_hurdle_rates", severity="High", errors=validate_values(df, valid_numeric_columns, "transmission_line", min=0), ) # Check that all binary new build tx lines are available in >=1 vintage msg = ("Expected hurdle rates specified for each modeling period when " "transmission hurdle rates feature is on.") cols = [ "hurdle_rate_positive_direction_per_mwh", "hurdle_rate_negative_direction_per_mwh", ] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_hurdle_rates", severity="Low", errors=validate_missing_inputs(df=df, col=cols, idx_col=["transmission_line", "period"], msg=msg), )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ tx_cost = get_module_specific_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) tx_lines = get_tx_lines(conn, scenario_id, subscenarios, "capacity_type", "tx_new_lin") # Convert input data into pandas DataFrame df = cursor_to_df(tx_cost) # get the tx lines lists tx_lines_w_cost = df["transmission_line"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_new_cost"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_new_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_new_cost", severity="High", errors=validate_values(df, valid_numeric_columns, "transmission_line", min=0)) # Check that all binary new build tx lines are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_new_cost", severity="Mid", errors=validate_idxs( actual_idxs=tx_lines_w_cost, req_idxs=tx_lines, idx_label="transmission_line", msg=msg))
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ hrzs, hrz_tmps = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) c = conn.cursor() periods_horizons = c.execute(""" SELECT balancing_type_horizon, period, horizon FROM periods_horizons WHERE temporal_scenario_id = {} AND subproblem_id = {} and stage_id = {} """.format(subscenarios.TEMPORAL_SCENARIO_ID, subproblem, stage)) df_hrzs = cursor_to_df(hrzs) df_hrz_tmps = cursor_to_df(hrz_tmps) df_periods_hrzs = cursor_to_df(periods_horizons) # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_temporal_horizons", "inputs_temporal_horizon_timepoints" ]) # Check dtypes horizons dtype_errors, error_columns = validate_dtypes(df_hrzs, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizons", severity="High", errors=dtype_errors) # Check dtypes horizon_timepoints dtype_errors, error_columns = validate_dtypes(df_hrz_tmps, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative - horizons numeric_columns = [ c for c in df_hrzs.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizons", severity="Mid", errors=validate_values(df_hrzs, valid_numeric_columns, "horizon", min=0)) # Check valid numeric columns are non-negative - horizon_timepoints numeric_columns = [ c for c in df_hrzs.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="Mid", errors=validate_values(df_hrz_tmps, valid_numeric_columns, ["horizon", "timepoint"], min=0)) # One horizon cannot straddle multiple periods msg = "All timepoints within a horizon should belong to the same period." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="High", errors=validate_single_input( df=df_periods_hrzs, idx_col=["balancing_type_horizon", "horizon"], msg=msg)) # Make sure there are no missing horizon inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="High", errors=validate_missing_inputs( df=df_hrz_tmps, col="horizon", idx_col=["balancing_type_horizon", "timepoint"]))
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # Get the binary build generator inputs new_gen_costs, new_build_size = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "gen_new_bin") # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_gen_costs) bld_size_df = cursor_to_df(new_build_size) # get the project lists cost_projects = cost_df["project"].unique() bld_size_projects = bld_size_df["project"] # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_project_new_cost", "inputs_project_new_binary_build_size"] ) # Check dtypes - cost_df dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors ) # Check valid numeric columns are non-negative - cost_df numeric_columns = [c for c in cost_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0) ) # Check dtypes - bld_size_df dtype_errors, error_columns = validate_dtypes(bld_size_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=dtype_errors ) # Check valid numeric columns are non-negative - bld_size_df numeric_columns = [c for c in bld_size_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_values(bld_size_df, valid_numeric_columns, min=0) ) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="Mid", errors=validate_idxs(actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg) ) # Check that all binary new build projects have build size specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_idxs(actual_idxs=bld_size_projects, req_idxs=projects, idx_label="project") )
def test_validate_values(self): """ :return: """ cols = ["project", "load_point_fraction", "average_heat_rate_mmbtu_per_mwh"] cols_to_check = ["load_point_fraction", "average_heat_rate_mmbtu_per_mwh"] idx_col = "project" test_cases = { # Make sure correct inputs aren't flagged 1: {"df": pd.DataFrame( columns=cols, data=[["gas_ct", 0, 10.5], ["gas_ct", 1, 9], ["coal_plant", 0.5, 10] ]), "min": 0, "max": np.inf, "strict_min": False, "strict_max": False, "result": [] }, # Make sure strict inequality requirement works 2: {"df": pd.DataFrame( columns=cols, data=[["gas_ct", 0, 10.5], ["gas_ct", 1, 9], ["coal_plant", 0.5, 10] ]), "min": 0, "max": np.inf, "strict_min": True, "strict_max": False, "result": ["project(s) 'gas_ct': Expected 0 < 'load_point_fraction' <= inf"] }, # If >1 error in different columns separate error msgs are created 3: {"df": pd.DataFrame( columns=cols, data=[["gas_ct", 0, -10.5], ["gas_ct", -1, 9], ["coal_plant", -0.5, 10] ]), "min": 0, "max": np.inf, "strict_min": False, "strict_max": False, "result": ["project(s) 'gas_ct, coal_plant': Expected 0 <= 'load_point_fraction' <= inf", "project(s) 'gas_ct': Expected 0 <= 'average_heat_rate_mmbtu_per_mwh' <= inf"] }, # Make sure upper bounds are working 4: {"df": pd.DataFrame( columns=cols, data=[["gas_ct", 0.2, 1], ["gas_ct", 0.5, 0.9], ["coal_plant", 1, 1.9] ]), "min": 0, "max": 1, "strict_min": False, "strict_max": False, "result": ["project(s) 'coal_plant': Expected 0 <= 'average_heat_rate_mmbtu_per_mwh' <= 1"] }, } for test_case in test_cases.keys(): expected_list = test_cases[test_case]["result"] actual_list = module_to_test.validate_values( df=test_cases[test_case]["df"], col=cols_to_check, idx_col=idx_col, min=test_cases[test_case]["min"], max=test_cases[test_case]["max"], strict_min=test_cases[test_case]["strict_min"], strict_max=test_cases[test_case]["strict_max"], ) self.assertListEqual(expected_list, actual_list)
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ gen_ret_bin_params = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "gen_ret_bin") # Convert input data into pandas DataFrame and extract data df = cursor_to_df(gen_ret_bin_params) spec_projects = df["project"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_project_specified_capacity", "inputs_project_specified_fixed_cost" ]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_values(df, valid_numeric_columns, min=0)) # Ensure project capacity & fixed cost is specified in at least 1 period msg = "Expected specified capacity & fixed costs for at least one period." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_idxs( actual_idxs=spec_projects, req_idxs=projects, idx_label="project", msg=msg)) # Check for missing values (vs. missing row entries above) cols = ["specified_capacity_mw", "annual_fixed_cost_per_mw_year"] write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_missing_inputs(df, cols))
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # TODO: check that there are no minimum duration inputs for this type # (duration is specified by specifying the build size in mw and mwh) # Maybe also check all other required / not required inputs? # --> see example in gen_must_run operational_type. Seems very verbose and # hard to maintain. Is there a way to generalize this? # Get the binary build generator inputs new_stor_costs, new_stor_build_size = \ get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "stor_new_bin") # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_stor_costs) bld_size_df = cursor_to_df(new_stor_build_size) # get the project lists cost_projects = cost_df["project"].unique() bld_size_projects = bld_size_df["project"] # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_project_new_cost", "inputs_project_new_binary_build_size" ]) # Check dtypes - cost_df dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative - cost_df numeric_columns = [ c for c in cost_df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0)) # Check dtypes - bld_size_df dtype_errors, error_columns = validate_dtypes(bld_size_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative - bld_size_df numeric_columns = [ c for c in bld_size_df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_values(bld_size_df, valid_numeric_columns, min=0)) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_idxs( actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg)) # Check that all binary new build projects have build size specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_idxs(actual_idxs=bld_size_projects, req_idxs=projects, idx_label="project"))