def test_validate_column_monotonicity(self): """ :return: """ cols = ["project", "period", "min_mw", "avg_mw", "max_mw"] test_cases = { # Make sure a case with only basic inputs doesn't throw errors 1: {"df": pd.DataFrame( columns=cols, data=[["gas_ct", 2020, 10, 15, 20], ["gas_ct", 2030, 10, 15, 20], ["coal", 2030, 20, 20, 20]]), "cols": ["min_mw", "avg_mw", "max_mw"], "idx_col": "project", "result": [] }, # Make sure erroneous inputs are properly caught 2: {"df": pd.DataFrame( columns=cols, data=[["gas_ct", 2020, 10, 15, 20], ["gas_ct", 2030, 21, 15, 20], ["coal", 2030, 20, 20, 20]]), "cols": ["min_mw", "avg_mw", "max_mw"], "idx_col": "project", "result": ["project(s) ['gas_ct']: Values cannot decrease " "between ['min_mw', 'avg_mw', 'max_mw']. "] }, # None values are ignored 3: {"df": pd.DataFrame( columns=cols, data=[["gas_ct", 2020, 10, 15, 20], ["gas_ct", 2030, None, 15, 20], ["coal", 2030, 20, 20, 20]]), "cols": ["min_mw", "max_mw"], "idx_col": "project", "result": [] }, # idx_col can be list of strings 4: {"df": pd.DataFrame( columns=cols, data=[["gas_ct", 2020, 10, 15, 20], ["gas_ct", 2030, 21, 15, 20], ["coal", 2030, 20, 20, 20]]), "cols": ["min_mw", "avg_mw", "max_mw"], "idx_col": ["project", "period"], "result": ["['project', 'period'](s) [['gas_ct' 2030]]: " "Values cannot decrease between " "['min_mw', 'avg_mw', 'max_mw']. "] } } for test_case in test_cases.keys(): expected_list = test_cases[test_case]["result"] actual_list = module_to_test.validate_column_monotonicity( df=test_cases[test_case]["df"], cols=test_cases[test_case]["cols"], idx_col=test_cases[test_case]["idx_col"], ) self.assertListEqual(expected_list, actual_list)
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ :param subscenarios: :param subproblem: :param stage: :param conn: :return: """ params = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(params) # Check data types availability expected_dtypes = get_expected_dtypes(conn, [ "inputs_project_availability", "inputs_project_availability_endogenous" ]) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_endogenous", severity="High", errors=dtype_errors) # Check for missing inputs msg = "" value_cols = [ "unavailable_hours_per_period", "unavailable_hours_per_event_min", "available_hours_between_events_min" ] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_endogenous", severity="Low", errors=validate_missing_inputs(df, value_cols, "project", msg)) cols = ["unavailable_hours_per_event_min", "unavailable_hours_per_period"] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_endogenous", severity="High", errors=validate_column_monotonicity(df=df, cols=cols, idx_col=["project"]))
def validate_hydro_opchars(scenario_id, subscenarios, subproblem, stage, conn, op_type): """ :param subscenarios: :param subproblem: :param stage: :param conn: :param op_type: :return: """ hydro_chars = get_hydro_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn, op_type ) # Convert input data into pandas DataFrame df = cursor_to_df(hydro_chars) value_cols = ["min_power_fraction", "average_power_fraction", "max_power_fraction"] # Check for missing inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_hydro_operational_chars", severity="High", errors=validate_missing_inputs(df, value_cols, ["project", "horizon"]), ) # Check for sign (should be percent fraction) hydro_opchar_fraction_error = write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_hydro_operational_chars", severity="Low", errors=validate_values(df, value_cols, min=0, max=1), ) # Check min <= avg <= sign write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_hydro_operational_chars", severity="Mid", errors=validate_column_monotonicity( df=df, cols=value_cols, idx_col=["project", "horizon"] ), ) return hydro_opchar_fraction_error
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ new_stor_costs = get_model_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn ) projects = get_projects( conn, scenario_id, subscenarios, "capacity_type", "stor_new_lin" ) # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_stor_costs) df_cols = cost_df.columns # get the project lists cost_projects = cost_df["project"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_project_new_cost", "inputs_project_new_potential"] ) # Check dtypes dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [c for c in cost_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0), ) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="Mid", errors=validate_idxs( actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg ), ) cols = ["min_cumulative_new_build_mw", "max_cumulative_new_build_mw"] # Check that maximum new build doesn't decrease if cols[1] in df_cols: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="Mid", errors=validate_row_monotonicity( df=cost_df, col=cols[1], rank_col="vintage" ), ) # check that min build <= max build if set(cols).issubset(set(df_cols)): write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=cost_df, cols=cols, idx_col=["project", "vintage"] ), ) cols = ["min_cumulative_new_build_mwh", "max_cumulative_new_build_mwh"] # Check that maximum new build doesn't decrease - MWh if cols[1] in df_cols: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="Mid", errors=validate_row_monotonicity( df=cost_df, col=cols[1], rank_col="vintage" ), ) # check that min build <= max build - MWh if set(cols).issubset(set(df_cols)): write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=cost_df, cols=cols, idx_col=["project", "vintage"] ), )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ tx_capacities = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) tx_lines = get_tx_lines(conn, scenario_id, subscenarios, "capacity_type", "tx_spec") # Convert input data into pandas DataFrame and extract data df = cursor_to_df(tx_capacities) spec_tx_lines = df["transmission_line"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_specified_capacity"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=dtype_errors) # Ensure tx_line capacity is specified in at least 1 period msg = "Expected specified capacity for at least one period." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=validate_idxs(actual_idxs=spec_tx_lines, req_idxs=tx_lines, idx_label="transmission_line", msg=msg)) # Check for missing values (vs. missing row entries above) cols = ["min_mw", "max_mw"] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=validate_missing_inputs(df, cols)) # check that min <= max write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=df, cols=cols, idx_col=["project", "period"]))