def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ project_zones = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Convert input data into pandas DataFrame df = cursor_to_df(project_zones) zones_w_project = df["prm_zone"].unique() # Get the required PRM zones # TODO: make this into a function similar to get_projects()? # could eventually centralize all these db query functions in one place c = conn.cursor() zones = c.execute("""SELECT prm_zone FROM inputs_geography_prm_zones WHERE prm_zone_scenario_id = {} """.format(subscenarios.PRM_ZONE_SCENARIO_ID)) zones = [z[0] for z in zones] # convert to list # Check that each PRM zone has at least one project assigned to it write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_prm_zones", severity="High", errors=validate_idxs( actual_idxs=zones_w_project, req_idxs=zones, idx_label="prm_zone", msg="Each PRM zone needs at least 1 project " "assigned to it.", ), ) # Make sure PRM type is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_missing_inputs(df, "prm_type"), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ ( min_max_builds, supply_curve_count, supply_curve_id, supply_curve, ) = get_model_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn ) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "dr_new") # Convert input data into pandas DataFrame df = cursor_to_df(min_max_builds) df_sc = cursor_to_df(supply_curve) dr_projects = df_sc["project"].unique() # Check for missing project potential inputs cols = ["min_cumulative_new_build_mwh", "max_cumulative_new_build_mwh"] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_missing_inputs(df, cols), ) # Check for missing supply curve inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_shiftable_load_supply_curve", severity="High", errors=validate_idxs( actual_idxs=dr_projects, req_idxs=projects, idx_label="project" ), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # Validate operational chars table inputs opchar_df = validate_opchars( scenario_id, subscenarios, subproblem, stage, conn, "gen_var_must_take" ) # Validate var profiles input table cap_factor_validation_error = validate_var_profiles( scenario_id, subscenarios, subproblem, stage, conn, "gen_var_must_take" ) if cap_factor_validation_error: warnings.warn( """ Found gen_var_must_take cap factors that are <0 or >1. This is allowed but this warning is here to make sure it is intended. """ ) # Other module specific validations # Check that the project does not show up in any of the # inputs_project_reserve_bas tables since gen_var_must_take can't # provide any reserves projects_by_reserve = get_projects_by_reserve(scenario_id, subscenarios, conn) for reserve, projects_w_ba in projects_by_reserve.items(): table = "inputs_project_" + reserve + "_bas" reserve_errors = validate_idxs( actual_idxs=opchar_df["project"], invalid_idxs=projects_w_ba, msg="gen_var_must_take cannot provide {}.".format(reserve), ) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table=table, severity="Mid", errors=reserve_errors, )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ gen_ret_bin_params = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "gen_ret_bin") # Convert input data into pandas DataFrame and extract data df = cursor_to_df(gen_ret_bin_params) spec_projects = df["project"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_project_specified_capacity", "inputs_project_specified_fixed_cost" ]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_values(df, valid_numeric_columns, min=0)) # Ensure project capacity & fixed cost is specified in at least 1 period msg = "Expected specified capacity & fixed costs for at least one period." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_idxs( actual_idxs=spec_projects, req_idxs=projects, idx_label="project", msg=msg)) # Check for missing values (vs. missing row entries above) cols = ["specified_capacity_mw", "annual_fixed_cost_per_mw_year"] write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_missing_inputs(df, cols))
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ new_stor_costs = get_model_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn ) projects = get_projects( conn, scenario_id, subscenarios, "capacity_type", "stor_new_lin" ) # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_stor_costs) df_cols = cost_df.columns # get the project lists cost_projects = cost_df["project"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_project_new_cost", "inputs_project_new_potential"] ) # Check dtypes dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [c for c in cost_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0), ) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="Mid", errors=validate_idxs( actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg ), ) cols = ["min_cumulative_new_build_mw", "max_cumulative_new_build_mw"] # Check that maximum new build doesn't decrease if cols[1] in df_cols: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="Mid", errors=validate_row_monotonicity( df=cost_df, col=cols[1], rank_col="vintage" ), ) # check that min build <= max build if set(cols).issubset(set(df_cols)): write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=cost_df, cols=cols, idx_col=["project", "vintage"] ), ) cols = ["min_cumulative_new_build_mwh", "max_cumulative_new_build_mwh"] # Check that maximum new build doesn't decrease - MWh if cols[1] in df_cols: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="Mid", errors=validate_row_monotonicity( df=cost_df, col=cols[1], rank_col="vintage" ), ) # check that min build <= max build - MWh if set(cols).issubset(set(df_cols)): write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=cost_df, cols=cols, idx_col=["project", "vintage"] ), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # Get the fuel input data fuels, fuel_prices = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Get the projects fuels c1 = conn.cursor() projects = c1.execute("""SELECT project, fuel FROM inputs_project_portfolios INNER JOIN (SELECT project, fuel FROM inputs_project_operational_chars WHERE project_operational_chars_scenario_id = {} AND fuel IS NOT NULL) AS op_char USING (project) WHERE project_portfolio_scenario_id = {}""".format( subscenarios.PROJECT_OPERATIONAL_CHARS_SCENARIO_ID, subscenarios.PROJECT_PORTFOLIO_SCENARIO_ID)) # Get the relevant periods and months c2 = conn.cursor() periods_months = c2.execute("""SELECT DISTINCT period, month FROM inputs_temporal WHERE temporal_scenario_id = {} AND subproblem_id = {} AND stage_id = {};""".format(subscenarios.TEMPORAL_SCENARIO_ID, subproblem, stage)) # Convert input data into pandas DataFrame fuels_df = cursor_to_df(fuels) fuel_prices_df = cursor_to_df(fuel_prices) prj_df = cursor_to_df(projects) # Get relevant lists fuels = fuels_df["fuel"].to_list() actual_fuel_periods_months = list( fuel_prices_df[["fuel", "period", "month"]].itertuples(index=False, name=None)) req_fuel_periods_months = [(f, p, m) for (p, m) in periods_months for f in fuels] # Check data types expected_dtypes = get_expected_dtypes( conn, ["inputs_project_fuels", "inputs_project_fuel_prices"]) dtype_errors, error_columns = validate_dtypes(fuels_df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_fuels", severity="High", errors=dtype_errors) dtype_errors, error_columns = validate_dtypes(fuel_prices_df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_fuel_prices", severity="High", errors=dtype_errors) # TODO: couldn't this be a simple foreign key or is NULL not allowed then? # TODO: should this check be in projects.init instead? # Check that fuels specified for projects are valid fuels write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="High", errors=validate_columns(prj_df, "fuel", valids=fuels)) # Check that fuel prices exist for the period and month write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_fuel_prices", severity="High", errors=validate_idxs( actual_idxs=actual_fuel_periods_months, req_idxs=req_fuel_periods_months, idx_label="(fuel, period, month)"))
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ tx_capacities = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) tx_lines = get_tx_lines(conn, scenario_id, subscenarios, "capacity_type", "tx_spec") # Convert input data into pandas DataFrame and extract data df = cursor_to_df(tx_capacities) spec_tx_lines = df["transmission_line"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_specified_capacity"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=dtype_errors) # Ensure tx_line capacity is specified in at least 1 period msg = "Expected specified capacity for at least one period." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=validate_idxs(actual_idxs=spec_tx_lines, req_idxs=tx_lines, idx_label="transmission_line", msg=msg)) # Check for missing values (vs. missing row entries above) cols = ["min_mw", "max_mw"] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=validate_missing_inputs(df, cols)) # check that min <= max write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=df, cols=cols, idx_col=["project", "period"]))
def generic_validate_project_bas(scenario_id, subscenarios, subproblem, stage, conn, reserve_type, project_ba_subscenario_id, ba_subscenario_id): """ :param subscenarios: :param subproblem: :param stage: :param conn: :param reserve_type: :param project_ba_subscenario_id: :param ba_subscenario_id: :return: """ # TODO: is this actually needed? subproblem = 1 if subproblem == "" else subproblem stage = 1 if stage == "" else stage project_bas, prj_derates = generic_get_inputs_from_database( scenario_id=scenario_id, subscenarios=subscenarios, subproblem=subproblem, stage=stage, conn=conn, reserve_type=reserve_type, project_ba_subscenario_id=project_ba_subscenario_id, ba_subscenario_id=ba_subscenario_id) # Convert input data into pandas DataFrame df = cursor_to_df(project_bas) df_derate = cursor_to_df(prj_derates).dropna() bas_w_project = df["{}_ba".format(reserve_type)].unique() projects_w_ba = df["project"].unique() projects_w_derate = df_derate["project"].unique() # Get the required reserve bas c = conn.cursor() bas = c.execute("""SELECT {}_ba FROM inputs_geography_{}_bas WHERE {}_ba_scenario_id = {} """.format(reserve_type, reserve_type, reserve_type, subscenarios.REGULATION_UP_BA_SCENARIO_ID)) bas = [b[0] for b in bas] # convert to list # Check that each reserve BA has at least one project assigned to it write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_{}_bas".format(reserve_type), severity="High", errors=validate_idxs(actual_idxs=bas_w_project, req_idxs=bas, idx_label="{}_ba".format(reserve_type), msg="Each reserve BA needs at least 1 " "project assigned to it.")) # Check that all projects w derates have a BA specified msg = "Project has a reserve derate specified but no relevant BA." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="Low", errors=validate_idxs( actual_idxs=projects_w_ba, req_idxs=projects_w_derate, idx_label="project", msg=msg))
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # TODO: check that there are no minimum duration inputs for this type # (duration is specified by specifying the build size in mw and mwh) # Maybe also check all other required / not required inputs? # --> see example in gen_must_run operational_type. Seems very verbose and # hard to maintain. Is there a way to generalize this? # Get the binary build generator inputs new_stor_costs, new_stor_build_size = \ get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "stor_new_bin") # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_stor_costs) bld_size_df = cursor_to_df(new_stor_build_size) # get the project lists cost_projects = cost_df["project"].unique() bld_size_projects = bld_size_df["project"] # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_project_new_cost", "inputs_project_new_binary_build_size" ]) # Check dtypes - cost_df dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative - cost_df numeric_columns = [ c for c in cost_df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0)) # Check dtypes - bld_size_df dtype_errors, error_columns = validate_dtypes(bld_size_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative - bld_size_df numeric_columns = [ c for c in bld_size_df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_values(bld_size_df, valid_numeric_columns, min=0)) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_idxs( actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg)) # Check that all binary new build projects have build size specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_idxs(actual_idxs=bld_size_projects, req_idxs=projects, idx_label="project"))
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ tx_cost = get_module_specific_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) tx_lines = get_tx_lines(conn, scenario_id, subscenarios, "capacity_type", "tx_new_lin") # Convert input data into pandas DataFrame df = cursor_to_df(tx_cost) # get the tx lines lists tx_lines_w_cost = df["transmission_line"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_new_cost"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_new_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_new_cost", severity="High", errors=validate_values(df, valid_numeric_columns, "transmission_line", min=0)) # Check that all binary new build tx lines are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_new_cost", severity="Mid", errors=validate_idxs( actual_idxs=tx_lines_w_cost, req_idxs=tx_lines, idx_label="transmission_line", msg=msg))
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # Get the binary build generator inputs new_gen_costs, new_build_size = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "gen_new_bin") # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_gen_costs) bld_size_df = cursor_to_df(new_build_size) # get the project lists cost_projects = cost_df["project"].unique() bld_size_projects = bld_size_df["project"] # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_project_new_cost", "inputs_project_new_binary_build_size"] ) # Check dtypes - cost_df dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors ) # Check valid numeric columns are non-negative - cost_df numeric_columns = [c for c in cost_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0) ) # Check dtypes - bld_size_df dtype_errors, error_columns = validate_dtypes(bld_size_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=dtype_errors ) # Check valid numeric columns are non-negative - bld_size_df numeric_columns = [c for c in bld_size_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_values(bld_size_df, valid_numeric_columns, min=0) ) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="Mid", errors=validate_idxs(actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg) ) # Check that all binary new build projects have build size specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_idxs(actual_idxs=bld_size_projects, req_idxs=projects, idx_label="project") )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # Validate operational chars table inputs opchar_df = validate_opchars(scenario_id, subscenarios, subproblem, stage, conn, "gen_must_run") # Other module specific validations c = conn.cursor() heat_rates = c.execute(""" SELECT project, load_point_fraction FROM inputs_project_portfolios INNER JOIN (SELECT project, operational_type, heat_rate_curves_scenario_id FROM inputs_project_operational_chars WHERE project_operational_chars_scenario_id = {} AND operational_type = '{}') AS op_char USING(project) INNER JOIN (SELECT project, heat_rate_curves_scenario_id, load_point_fraction FROM inputs_project_heat_rate_curves) as heat_rates USING(project, heat_rate_curves_scenario_id) WHERE project_portfolio_scenario_id = {} """.format(subscenarios.PROJECT_OPERATIONAL_CHARS_SCENARIO_ID, "gen_must_run", subscenarios.PROJECT_PORTFOLIO_SCENARIO_ID)) # Convert inputs to data frame hr_df = cursor_to_df(heat_rates) # Check that there is only one load point (constant heat rate) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_heat_rate_curves", severity="Mid", errors=validate_single_input( df=hr_df, msg="gen_must_run can only have one load " "point (constant heat rate).")) # Check that the project does not show up in any of the # inputs_project_reserve_bas tables since gen_must_run can't provide any # reserves projects_by_reserve = get_projects_by_reserve(scenario_id, subscenarios, conn) for reserve, projects_w_ba in projects_by_reserve.items(): table = "inputs_project_" + reserve + "_bas" reserve_errors = validate_idxs( actual_idxs=opchar_df["project"], invalid_idxs=projects_w_ba, msg="gen_must_run cannot provide {}.".format(reserve)) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table=table, severity="Mid", errors=reserve_errors)
def test_validate_idxs(self): test_cases = { # Make sure correct inputs don't throw error 1: {"actual_idxs": ["gas_ct", "coal_plant"], "req_idxs": ["gas_ct"], "invalid_idxs": ["gen_gas_ct2"], "idx_label": "project", "msg": "", "result": [], }, # Make sure missing required indexes inputs are detected 2: {"actual_idxs": [], "req_idxs": ["gas_ct"], "invalid_idxs": [], "idx_label": "project", "msg": "", "result": ["Missing required inputs for project: ['gas_ct']. "] }, # Make sure missing required tuple indexes are properly detected 3: {"actual_idxs": [], "req_idxs": [("gas_ct", 2020)], "invalid_idxs": [], "idx_label": "(project, period)", "msg": "", "result": ["Missing required inputs for (project, period): [('gas_ct', 2020)]. "] }, # Make sure multiple missing required tuple indexes are properly # detected (results are sorted!) 4: {"actual_idxs": [], "req_idxs": [("gas_ct", 2020), ("coal_plant", 2020)], "invalid_idxs": [], "idx_label": "(project, period)", "msg": "", "result": ["Missing required inputs for (project, period): [('coal_plant', 2020), ('gas_ct', 2020)]. "] }, # Make sure invalid idxs are detected and error message is added. 5: {"actual_idxs": ["gas_ct", "btm_solar"], "req_idxs": [], "invalid_idxs": ["btm_solar"], "idx_label": "project", "msg": "gen_var_must_take cannot provide lf_down.", "result": ["Invalid inputs for project: ['btm_solar']. gen_var_must_take cannot provide lf_down."] }, # Make sure multiple invalid idxs are detected correctly 6: {"actual_idxs": ["gas_ct", "btm_solar", "btm_wind"], "req_idxs": [], "invalid_idxs": ["btm_solar", "btm_wind"], "idx_label": "project", "msg": "gen_var_must_take cannot provide lf_down.", "result": ["Invalid inputs for project: ['btm_solar', 'btm_wind']. gen_var_must_take cannot provide lf_down."] }, } for test_case in test_cases.keys(): expected_list = test_cases[test_case]["result"] actual_list = module_to_test.validate_idxs( actual_idxs=test_cases[test_case]["actual_idxs"], req_idxs=test_cases[test_case]["req_idxs"], invalid_idxs=test_cases[test_case]["invalid_idxs"], idx_label=test_cases[test_case]["idx_label"], msg=test_cases[test_case]["msg"] ) self.assertListEqual(expected_list, actual_list)