def test_validate_missing_inputs(self): """ :return: """ cols = ["project", "capacity_type", "operational_type"] test_cases = { # Make sure a case with only basic inputs doesn't throw errors 1: {"df": pd.DataFrame( columns=cols, data=[["gas_ct", "cap1", "op1"], ["gas_ct2", "cap2", "op1"]]), "idx_col": "project", "result_cap_col": [], "result_both_cols": [] }, # Make sure missing inputs are detected 2: {"df": pd.DataFrame( columns=cols, data=[["gas_ct", "cap1", "op1"], ["gas_ct2", None, None]]), "idx_col": "project", "result_cap_col": ["Missing capacity_type inputs for project(s): ['gas_ct2']. "], "result_both_cols": ["Missing capacity_type inputs for project(s): ['gas_ct2']. ", "Missing operational_type inputs for project(s): ['gas_ct2']. "] }, # Make sure idx_col with list of cols works 3: {"df": pd.DataFrame( columns=cols, data=[["gas_ct", "cap1", "op1"], ["gas_ct2", "cap1", None]]), "idx_col": ["project", "capacity_type"], "result_cap_col": [], "result_both_cols": ["Missing operational_type inputs for ['project', 'capacity_type'](s): [['gas_ct2' 'cap1']]. "] } } for test_case in test_cases.keys(): # single column expected_list = test_cases[test_case]["result_cap_col"] actual_list = module_to_test.validate_missing_inputs( df=test_cases[test_case]["df"], idx_col=test_cases[test_case]["idx_col"], col="capacity_type" ) self.assertListEqual(expected_list, actual_list) # multiple columns expected_list = test_cases[test_case]["result_both_cols"] actual_list = module_to_test.validate_missing_inputs( df=test_cases[test_case]["df"], idx_col=test_cases[test_case]["idx_col"], col=["capacity_type", "operational_type"] ) self.assertListEqual(expected_list, actual_list)
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ :param subscenarios: :param subproblem: :param stage: :param conn: :return: """ availabilities = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(availabilities) idx_cols = ["project", "timepoint"] value_cols = ["availability_derate"] # Check data types availability expected_dtypes = get_expected_dtypes(conn, [ "inputs_project_availability", "inputs_project_availability_exogenous" ]) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_exogenous", severity="High", errors=dtype_errors, ) # Check for missing inputs msg = ("If not specified, availability is assumed to be 100%. If you " "don't want to specify any availability derates, simply leave the " "exogenous_availability_scenario_id empty and this message will " "disappear.") write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_exogenous", severity="Low", errors=validate_missing_inputs(df, value_cols, idx_cols, msg), ) # Check for correct sign if "availability" not in error_columns: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_exogenous", severity="High", errors=validate_values(df, value_cols, min=0, max=1), )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ :param subscenarios: :param subproblem: :param stage: :param conn: :return: """ params = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(params) # Check data types availability expected_dtypes = get_expected_dtypes(conn, [ "inputs_project_availability", "inputs_project_availability_endogenous" ]) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_endogenous", severity="High", errors=dtype_errors) # Check for missing inputs msg = "" value_cols = [ "unavailable_hours_per_period", "unavailable_hours_per_event_min", "available_hours_between_events_min" ] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_endogenous", severity="Low", errors=validate_missing_inputs(df, value_cols, "project", msg)) cols = ["unavailable_hours_per_event_min", "unavailable_hours_per_period"] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_endogenous", severity="High", errors=validate_column_monotonicity(df=df, cols=cols, idx_col=["project"]))
def validate_hydro_opchars(scenario_id, subscenarios, subproblem, stage, conn, op_type): """ :param subscenarios: :param subproblem: :param stage: :param conn: :param op_type: :return: """ hydro_chars = get_hydro_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn, op_type ) # Convert input data into pandas DataFrame df = cursor_to_df(hydro_chars) value_cols = ["min_power_fraction", "average_power_fraction", "max_power_fraction"] # Check for missing inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_hydro_operational_chars", severity="High", errors=validate_missing_inputs(df, value_cols, ["project", "horizon"]), ) # Check for sign (should be percent fraction) hydro_opchar_fraction_error = write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_hydro_operational_chars", severity="Low", errors=validate_values(df, value_cols, min=0, max=1), ) # Check min <= avg <= sign write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_hydro_operational_chars", severity="Mid", errors=validate_column_monotonicity( df=df, cols=value_cols, idx_col=["project", "horizon"] ), ) return hydro_opchar_fraction_error
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ project_zones = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Convert input data into pandas DataFrame df = cursor_to_df(project_zones) zones_w_project = df["prm_zone"].unique() # Get the required PRM zones # TODO: make this into a function similar to get_projects()? # could eventually centralize all these db query functions in one place c = conn.cursor() zones = c.execute("""SELECT prm_zone FROM inputs_geography_prm_zones WHERE prm_zone_scenario_id = {} """.format(subscenarios.PRM_ZONE_SCENARIO_ID)) zones = [z[0] for z in zones] # convert to list # Check that each PRM zone has at least one project assigned to it write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_prm_zones", severity="High", errors=validate_idxs( actual_idxs=zones_w_project, req_idxs=zones, idx_label="prm_zone", msg="Each PRM zone needs at least 1 project " "assigned to it.", ), ) # Make sure PRM type is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_missing_inputs(df, "prm_type"), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ ( min_max_builds, supply_curve_count, supply_curve_id, supply_curve, ) = get_model_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn ) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "dr_new") # Convert input data into pandas DataFrame df = cursor_to_df(min_max_builds) df_sc = cursor_to_df(supply_curve) dr_projects = df_sc["project"].unique() # Check for missing project potential inputs cols = ["min_cumulative_new_build_mwh", "max_cumulative_new_build_mwh"] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_missing_inputs(df, cols), ) # Check for missing supply curve inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_shiftable_load_supply_curve", severity="High", errors=validate_idxs( actual_idxs=dr_projects, req_idxs=projects, idx_label="project" ), )
def validate_var_profiles(scenario_id, subscenarios, subproblem, stage, conn, op_type): """ :param subscenarios: :param subproblem: :param stage: :param conn: :param op_type: :return: """ var_profiles = get_var_profile_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn, op_type ) # Convert input data into pandas DataFrame df = cursor_to_df(var_profiles) value_cols = ["cap_factor"] # Check for missing inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_variable_generator_profiles", severity="High", errors=validate_missing_inputs(df, value_cols, ["project", "timepoint"]), ) # Check for sign (should be percent fraction) cap_factor_validation_error = write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_variable_generator_profiles", severity="Low", errors=validate_values(df, ["cap_factor"], min=0, max=1), ) return cap_factor_validation_error
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ project_zone_dur = get_model_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(project_zone_dur) cols = ["min_duration_for_full_capacity_credit_hours"] # Make sure param sign is as expected write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_values(df, cols, min=0, strict_min=True), ) # Make sure param is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_missing_inputs(df, cols), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ project_fractions = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(project_fractions) # Make sure fraction is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_values(df, ["elcc_simple_fraction"], min=0, max=1), ) # Make sure fraction is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_missing_inputs(df, "elcc_simple_fraction"), )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ gen_ret_bin_params = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "gen_ret_bin") # Convert input data into pandas DataFrame and extract data df = cursor_to_df(gen_ret_bin_params) spec_projects = df["project"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_project_specified_capacity", "inputs_project_specified_fixed_cost" ]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_values(df, valid_numeric_columns, min=0)) # Ensure project capacity & fixed cost is specified in at least 1 period msg = "Expected specified capacity & fixed costs for at least one period." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_idxs( actual_idxs=spec_projects, req_idxs=projects, idx_label="project", msg=msg)) # Check for missing values (vs. missing row entries above) cols = ["specified_capacity_mw", "annual_fixed_cost_per_mw_year"] write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_missing_inputs(df, cols))
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ c = conn.cursor() # Get the transmission inputs transmission_lines = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Convert input data into pandas DataFrame df = cursor_to_df(transmission_lines) # Check data types: expected_dtypes = get_expected_dtypes( conn, [ "inputs_transmission_portfolios", "inputs_transmission_availability", "inputs_transmission_load_zones", "inputs_transmission_operational_chars", ], ) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_portfolios, " "inputs_transmission_load_zones, " "inputs_transmission_operational_chars", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_operational_chars", severity="High", errors=validate_values(df, valid_numeric_columns, min=0), ) # Ensure we're not combining incompatible capacity and operational types cols = ["capacity_type", "operational_type"] invalid_combos = c.execute(""" SELECT {} FROM mod_tx_capacity_and_tx_operational_type_invalid_combos """.format(",".join(cols))).fetchall() write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table= "inputs_transmission_operational_chars, inputs_tranmission_portfolios", severity="High", errors=validate_columns(df, cols, invalids=invalid_combos), ) # Check reactance > 0 write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_operational_chars", severity="High", errors=validate_values(df, ["reactance_ohms"], min=0, strict_min=True), ) # Check that all portfolio tx lines are present in the opchar inputs msg = ("All tx lines in the portfolio should have an operational type " "specified in the inputs_transmission_operational_chars table.") write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_operational_chars", severity="High", errors=validate_missing_inputs(df, ["operational_type"], idx_col="transmission_line", msg=msg), ) # Check that all portfolio tx lines are present in the load zone inputs msg = ("All tx lines in the portfolio should have a load zone from/to " "specified in the inputs_transmission_load_zones table.") write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_load_zones", severity="High", errors=validate_missing_inputs(df, ["load_zone_from", "load_zone_to"], idx_col="transmission_line", msg=msg), ) # Check that all tx load zones are part of the active load zones load_zones = get_load_zones(conn, subscenarios) for col in ["load_zone_from", "load_zone_to"]: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_load_zones", severity="High", errors=validate_columns(df, col, valids=load_zones), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ c = conn.cursor() # Get the project inputs projects = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Convert input data into pandas DataFrame df = cursor_to_df(projects) # Check data types: expected_dtypes = get_expected_dtypes( conn, ["inputs_project_portfolios", "inputs_project_availability", "inputs_project_load_zones", "inputs_project_operational_chars"] ) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars, inputs_project_portfolios", severity="High", errors=dtype_errors ) # Check valid numeric columns are non-negative numeric_columns = [c for c in df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="High", errors=validate_values(df, valid_numeric_columns, min=0) ) # Check that we're not combining incompatible cap-types and op-types cols = ["capacity_type", "operational_type"] invalid_combos = c.execute( """ SELECT {} FROM mod_capacity_and_operational_type_invalid_combos """.format(",".join(cols)) ).fetchall() write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars, inputs_project_portfolios", severity="High", errors=validate_columns(df, cols, invalids=invalid_combos) ) # Check that capacity type is valid # Note: foreign key already ensures this! valid_cap_types = c.execute( """SELECT capacity_type from mod_capacity_types""" ).fetchall() valid_cap_types = [v[0] for v in valid_cap_types] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_portfolios", severity="High", errors=validate_columns(df, "capacity_type", valids=valid_cap_types) ) # Check that operational type is valid # Note: foreign key already ensures this! valid_op_types = c.execute( """SELECT operational_type from mod_operational_types""" ).fetchall() valid_op_types = [v[0] for v in valid_op_types] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_portfolios", severity="High", errors=validate_columns(df, "operational_type", valids=valid_op_types) ) # Check that all portfolio projects are present in the availability inputs msg = "All projects in the portfolio should have an availability type " \ "specified in the inputs_project_availability table." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability", severity="High", errors=validate_missing_inputs(df, "availability_type", msg=msg) ) # Check that all portfolio projects are present in the opchar inputs msg = "All projects in the portfolio should have an operational type " \ "and balancing type specified in the " \ "inputs_project_operational_chars table." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="High", errors=validate_missing_inputs(df, ["operational_type", "balancing_type_project"], msg=msg) ) # Check that all portfolio projects are present in the load zone inputs msg = "All projects in the portfolio should have a load zone " \ "specified in the inputs_project_load_zones table." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_load_zones", severity="High", errors=validate_missing_inputs(df, "load_zone", msg=msg) )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ hurdle_rates = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(hurdle_rates) # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_hurdle_rates"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_hurdle_rates", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_hurdle_rates", severity="High", errors=validate_values(df, valid_numeric_columns, "transmission_line", min=0), ) # Check that all binary new build tx lines are available in >=1 vintage msg = ("Expected hurdle rates specified for each modeling period when " "transmission hurdle rates feature is on.") cols = [ "hurdle_rate_positive_direction_per_mwh", "hurdle_rate_negative_direction_per_mwh", ] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_hurdle_rates", severity="Low", errors=validate_missing_inputs(df=df, col=cols, idx_col=["transmission_line", "period"], msg=msg), )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ tx_capacities = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) tx_lines = get_tx_lines(conn, scenario_id, subscenarios, "capacity_type", "tx_spec") # Convert input data into pandas DataFrame and extract data df = cursor_to_df(tx_capacities) spec_tx_lines = df["transmission_line"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_specified_capacity"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=dtype_errors) # Ensure tx_line capacity is specified in at least 1 period msg = "Expected specified capacity for at least one period." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=validate_idxs(actual_idxs=spec_tx_lines, req_idxs=tx_lines, idx_label="transmission_line", msg=msg)) # Check for missing values (vs. missing row entries above) cols = ["min_mw", "max_mw"] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=validate_missing_inputs(df, cols)) # check that min <= max write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=df, cols=cols, idx_col=["project", "period"]))
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ hrzs, hrz_tmps = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) c = conn.cursor() periods_horizons = c.execute(""" SELECT balancing_type_horizon, period, horizon FROM periods_horizons WHERE temporal_scenario_id = {} AND subproblem_id = {} and stage_id = {} """.format(subscenarios.TEMPORAL_SCENARIO_ID, subproblem, stage)) df_hrzs = cursor_to_df(hrzs) df_hrz_tmps = cursor_to_df(hrz_tmps) df_periods_hrzs = cursor_to_df(periods_horizons) # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_temporal_horizons", "inputs_temporal_horizon_timepoints" ]) # Check dtypes horizons dtype_errors, error_columns = validate_dtypes(df_hrzs, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizons", severity="High", errors=dtype_errors) # Check dtypes horizon_timepoints dtype_errors, error_columns = validate_dtypes(df_hrz_tmps, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative - horizons numeric_columns = [ c for c in df_hrzs.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizons", severity="Mid", errors=validate_values(df_hrzs, valid_numeric_columns, "horizon", min=0)) # Check valid numeric columns are non-negative - horizon_timepoints numeric_columns = [ c for c in df_hrzs.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="Mid", errors=validate_values(df_hrz_tmps, valid_numeric_columns, ["horizon", "timepoint"], min=0)) # One horizon cannot straddle multiple periods msg = "All timepoints within a horizon should belong to the same period." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="High", errors=validate_single_input( df=df_periods_hrzs, idx_col=["balancing_type_horizon", "horizon"], msg=msg)) # Make sure there are no missing horizon inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="High", errors=validate_missing_inputs( df=df_hrz_tmps, col="horizon", idx_col=["balancing_type_horizon", "timepoint"]))