def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ ( min_max_builds, supply_curve_count, supply_curve_id, supply_curve, ) = get_model_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn ) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "dr_new") # Convert input data into pandas DataFrame df = cursor_to_df(min_max_builds) df_sc = cursor_to_df(supply_curve) dr_projects = df_sc["project"].unique() # Check for missing project potential inputs cols = ["min_cumulative_new_build_mwh", "max_cumulative_new_build_mwh"] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_missing_inputs(df, cols), ) # Check for missing supply curve inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_shiftable_load_supply_curve", severity="High", errors=validate_idxs( actual_idxs=dr_projects, req_idxs=projects, idx_label="project" ), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ prj_ramp_rates = get_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn ) df = cursor_to_df(prj_ramp_rates) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="Mid", errors=validate_values(df, ["lf_reserves_up_ramp_rate"], min=0, max=1) )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ :param subscenarios: :param subproblem: :param stage: :param conn: :return: """ availabilities = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(availabilities) idx_cols = ["project", "timepoint"] value_cols = ["availability_derate"] # Check data types availability expected_dtypes = get_expected_dtypes(conn, [ "inputs_project_availability", "inputs_project_availability_exogenous" ]) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_exogenous", severity="High", errors=dtype_errors, ) # Check for missing inputs msg = ("If not specified, availability is assumed to be 100%. If you " "don't want to specify any availability derates, simply leave the " "exogenous_availability_scenario_id empty and this message will " "disappear.") write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_exogenous", severity="Low", errors=validate_missing_inputs(df, value_cols, idx_cols, msg), ) # Check for correct sign if "availability" not in error_columns: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_exogenous", severity="High", errors=validate_values(df, value_cols, min=0, max=1), )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ :param subscenarios: :param subproblem: :param stage: :param conn: :return: """ params = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(params) # Check data types availability expected_dtypes = get_expected_dtypes(conn, [ "inputs_project_availability", "inputs_project_availability_endogenous" ]) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_endogenous", severity="High", errors=dtype_errors) # Check for missing inputs msg = "" value_cols = [ "unavailable_hours_per_period", "unavailable_hours_per_event_min", "available_hours_between_events_min" ] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_endogenous", severity="Low", errors=validate_missing_inputs(df, value_cols, "project", msg)) cols = ["unavailable_hours_per_event_min", "unavailable_hours_per_period"] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_endogenous", severity="High", errors=validate_column_monotonicity(df=df, cols=cols, idx_col=["project"]))
def validate_hydro_opchars(scenario_id, subscenarios, subproblem, stage, conn, op_type): """ :param subscenarios: :param subproblem: :param stage: :param conn: :param op_type: :return: """ hydro_chars = get_hydro_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn, op_type ) # Convert input data into pandas DataFrame df = cursor_to_df(hydro_chars) value_cols = ["min_power_fraction", "average_power_fraction", "max_power_fraction"] # Check for missing inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_hydro_operational_chars", severity="High", errors=validate_missing_inputs(df, value_cols, ["project", "horizon"]), ) # Check for sign (should be percent fraction) hydro_opchar_fraction_error = write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_hydro_operational_chars", severity="Low", errors=validate_values(df, value_cols, min=0, max=1), ) # Check min <= avg <= sign write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_hydro_operational_chars", severity="Mid", errors=validate_column_monotonicity( df=df, cols=value_cols, idx_col=["project", "horizon"] ), ) return hydro_opchar_fraction_error
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ project_zones = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Convert input data into pandas DataFrame df = cursor_to_df(project_zones) zones_w_project = df["prm_zone"].unique() # Get the required PRM zones # TODO: make this into a function similar to get_projects()? # could eventually centralize all these db query functions in one place c = conn.cursor() zones = c.execute("""SELECT prm_zone FROM inputs_geography_prm_zones WHERE prm_zone_scenario_id = {} """.format(subscenarios.PRM_ZONE_SCENARIO_ID)) zones = [z[0] for z in zones] # convert to list # Check that each PRM zone has at least one project assigned to it write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_prm_zones", severity="High", errors=validate_idxs( actual_idxs=zones_w_project, req_idxs=zones, idx_label="prm_zone", msg="Each PRM zone needs at least 1 project " "assigned to it.", ), ) # Make sure PRM type is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_missing_inputs(df, "prm_type"), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # Validate operational chars table inputs validate_opchars(scenario_id, subscenarios, subproblem, stage, conn, "gen_simple") # Other module specific validations c = conn.cursor() heat_rates = c.execute(""" SELECT project, load_point_fraction FROM inputs_project_portfolios INNER JOIN (SELECT project, operational_type, heat_rate_curves_scenario_id FROM inputs_project_operational_chars WHERE project_operational_chars_scenario_id = {} AND operational_type = '{}') AS op_char USING(project) INNER JOIN (SELECT project, heat_rate_curves_scenario_id, load_point_fraction FROM inputs_project_heat_rate_curves) as heat_rates USING(project, heat_rate_curves_scenario_id) WHERE project_portfolio_scenario_id = {} """.format( subscenarios.PROJECT_OPERATIONAL_CHARS_SCENARIO_ID, "gen_simple", subscenarios.PROJECT_PORTFOLIO_SCENARIO_ID, )) # Convert inputs to dataframe hr_df = cursor_to_df(heat_rates) # Check that there is only one load point (constant heat rate) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_heat_rate_curves", severity="Mid", errors=validate_single_input( df=hr_df, msg="gen_simple can only have one load " "point (constant heat rate).", ), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # TODO: check that hours in full period is within x and y # ("within" check or "validate" check in param definition returns obscure # error message that isn't helpful). periods = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(periods) # Get expected dtypes expected_dtypes = get_expected_dtypes(conn=conn, tables=["inputs_temporal_periods"]) # Hard-code data type for hours_in_period_timepoints expected_dtypes["hours_in_period_timepoints"] = "numeric" # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_periods", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_periods", severity="Mid", errors=validate_values(df, valid_numeric_columns, "period", min=0), )
def get_expected_dtypes(conn, tables): """ Goes through each listed table and creates a dictionary that maps each column name to an expected datatype. If the tables have duplicate column names, the last table will define the expected datatype (generally datatypes are the same for columns in different tables with the same name so this shouldn't be an issue). :param conn: database connection :param tables: list of database tables for which to collect datatypes :return: dictionary with table columns and expected datatype category ('numeric' or 'string') """ # Map SQLITE types to either numeric or string # Based on '3.1 Determination of column affinity': # https://www.sqlite.org/datatype3.html numeric_types = [ "BOOLEAN", "DATE", "DATETIME", "DECIMAL", "DOUB", "FLOA", "INT", "NUMERIC", "REAL", "TIME", ] string_types = ["BLOB", "CHAR", "CLOB", "STRING", "TEXT"] def get_type_category(detailed_type): if any(numeric_type in detailed_type for numeric_type in numeric_types): return "numeric" elif any(string_type in detailed_type for string_type in string_types): return "string" else: raise ValueError("Encountered unknown SQLite type: type {}".format( detailed_type)) expected_dtypes = {} for table in tables: # Get the expected datatypes from the table info (pragma) table_info = conn.execute("""PRAGMA table_info({})""".format(table)) df = cursor_to_df(table_info) df["type_category"] = df["type"].map(get_type_category) dtypes_dict = dict(zip(df.name, df.type_category)) expected_dtypes.update(dtypes_dict) return expected_dtypes
def validate_var_profiles(scenario_id, subscenarios, subproblem, stage, conn, op_type): """ :param subscenarios: :param subproblem: :param stage: :param conn: :param op_type: :return: """ var_profiles = get_var_profile_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn, op_type ) # Convert input data into pandas DataFrame df = cursor_to_df(var_profiles) value_cols = ["cap_factor"] # Check for missing inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_variable_generator_profiles", severity="High", errors=validate_missing_inputs(df, value_cols, ["project", "timepoint"]), ) # Check for sign (should be percent fraction) cap_factor_validation_error = write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_variable_generator_profiles", severity="Low", errors=validate_values(df, ["cap_factor"], min=0, max=1), ) return cap_factor_validation_error
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ project_zone_dur = get_model_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(project_zone_dur) cols = ["min_duration_for_full_capacity_credit_hours"] # Make sure param sign is as expected write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_values(df, cols, min=0, strict_min=True), ) # Make sure param is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_missing_inputs(df, cols), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ project_fractions = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(project_fractions) # Make sure fraction is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_values(df, ["elcc_simple_fraction"], min=0, max=1), ) # Make sure fraction is specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_elcc_chars", severity="High", errors=validate_missing_inputs(df, "elcc_simple_fraction"), )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ gen_ret_bin_params = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "gen_ret_bin") # Convert input data into pandas DataFrame and extract data df = cursor_to_df(gen_ret_bin_params) spec_projects = df["project"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_project_specified_capacity", "inputs_project_specified_fixed_cost" ]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_values(df, valid_numeric_columns, min=0)) # Ensure project capacity & fixed cost is specified in at least 1 period msg = "Expected specified capacity & fixed costs for at least one period." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_idxs( actual_idxs=spec_projects, req_idxs=projects, idx_label="project", msg=msg)) # Check for missing values (vs. missing row entries above) cols = ["specified_capacity_mw", "annual_fixed_cost_per_mw_year"] write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_missing_inputs(df, cols))
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ new_stor_costs = get_model_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn ) projects = get_projects( conn, scenario_id, subscenarios, "capacity_type", "stor_new_lin" ) # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_stor_costs) df_cols = cost_df.columns # get the project lists cost_projects = cost_df["project"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_project_new_cost", "inputs_project_new_potential"] ) # Check dtypes dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [c for c in cost_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0), ) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="Mid", errors=validate_idxs( actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg ), ) cols = ["min_cumulative_new_build_mw", "max_cumulative_new_build_mw"] # Check that maximum new build doesn't decrease if cols[1] in df_cols: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="Mid", errors=validate_row_monotonicity( df=cost_df, col=cols[1], rank_col="vintage" ), ) # check that min build <= max build if set(cols).issubset(set(df_cols)): write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=cost_df, cols=cols, idx_col=["project", "vintage"] ), ) cols = ["min_cumulative_new_build_mwh", "max_cumulative_new_build_mwh"] # Check that maximum new build doesn't decrease - MWh if cols[1] in df_cols: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="Mid", errors=validate_row_monotonicity( df=cost_df, col=cols[1], rank_col="vintage" ), ) # check that min build <= max build - MWh if set(cols).issubset(set(df_cols)): write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=cost_df, cols=cols, idx_col=["project", "vintage"] ), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ c = conn.cursor() # Get the transmission inputs transmission_lines = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Convert input data into pandas DataFrame df = cursor_to_df(transmission_lines) # Check data types: expected_dtypes = get_expected_dtypes( conn, [ "inputs_transmission_portfolios", "inputs_transmission_availability", "inputs_transmission_load_zones", "inputs_transmission_operational_chars", ], ) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_portfolios, " "inputs_transmission_load_zones, " "inputs_transmission_operational_chars", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_operational_chars", severity="High", errors=validate_values(df, valid_numeric_columns, min=0), ) # Ensure we're not combining incompatible capacity and operational types cols = ["capacity_type", "operational_type"] invalid_combos = c.execute(""" SELECT {} FROM mod_tx_capacity_and_tx_operational_type_invalid_combos """.format(",".join(cols))).fetchall() write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table= "inputs_transmission_operational_chars, inputs_tranmission_portfolios", severity="High", errors=validate_columns(df, cols, invalids=invalid_combos), ) # Check reactance > 0 write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_operational_chars", severity="High", errors=validate_values(df, ["reactance_ohms"], min=0, strict_min=True), ) # Check that all portfolio tx lines are present in the opchar inputs msg = ("All tx lines in the portfolio should have an operational type " "specified in the inputs_transmission_operational_chars table.") write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_operational_chars", severity="High", errors=validate_missing_inputs(df, ["operational_type"], idx_col="transmission_line", msg=msg), ) # Check that all portfolio tx lines are present in the load zone inputs msg = ("All tx lines in the portfolio should have a load zone from/to " "specified in the inputs_transmission_load_zones table.") write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_load_zones", severity="High", errors=validate_missing_inputs(df, ["load_zone_from", "load_zone_to"], idx_col="transmission_line", msg=msg), ) # Check that all tx load zones are part of the active load zones load_zones = get_load_zones(conn, subscenarios) for col in ["load_zone_from", "load_zone_to"]: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_load_zones", severity="High", errors=validate_columns(df, col, valids=load_zones), )
def write_model_inputs( scenario_directory, scenario_id, subscenarios, subproblem, stage, conn ): """ Get inputs from database and write out the model input projects.tab (to be precise, amend it) and project_carbon_tax_allowance.tab files. :param scenario_directory: string, the scenario directory :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ project_zones, project_carbon_tax_allowance = get_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn ) # projects.tab # Make a dict for easy access prj_zone_dict = dict() for (prj, zone) in project_zones: prj_zone_dict[str(prj)] = "." if zone is None else str(zone) with open( os.path.join( scenario_directory, str(subproblem), str(stage), "inputs", "projects.tab" ), "r", ) as projects_file_in: reader = csv.reader(projects_file_in, delimiter="\t", lineterminator="\n") new_rows = list() # Append column header header = next(reader) header.append("carbon_tax_zone") new_rows.append(header) # Append correct values for row in reader: # If project specified, check if BA specified or not if row[0] in list(prj_zone_dict.keys()): row.append(prj_zone_dict[row[0]]) new_rows.append(row) # If project not specified, specify no BA else: row.append(".") new_rows.append(row) with open( os.path.join( scenario_directory, str(subproblem), str(stage), "inputs", "projects.tab" ), "w", newline="", ) as projects_file_out: writer = csv.writer(projects_file_out, delimiter="\t", lineterminator="\n") writer.writerows(new_rows) # project_carbon_tax_allowance.tab ct_allowance_df = cursor_to_df(project_carbon_tax_allowance) if not ct_allowance_df.empty: ct_allowance_df = ct_allowance_df.fillna(".") fpath = os.path.join( scenario_directory, str(subproblem), str(stage), "inputs", "project_carbon_tax_allowance.tab", ) ct_allowance_df.to_csv(fpath, index=False, sep="\t")
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # Validate operational chars table inputs opchar_df = validate_opchars(scenario_id, subscenarios, subproblem, stage, conn, "gen_must_run") # Other module specific validations c = conn.cursor() heat_rates = c.execute(""" SELECT project, load_point_fraction FROM inputs_project_portfolios INNER JOIN (SELECT project, operational_type, heat_rate_curves_scenario_id FROM inputs_project_operational_chars WHERE project_operational_chars_scenario_id = {} AND operational_type = '{}') AS op_char USING(project) INNER JOIN (SELECT project, heat_rate_curves_scenario_id, load_point_fraction FROM inputs_project_heat_rate_curves) as heat_rates USING(project, heat_rate_curves_scenario_id) WHERE project_portfolio_scenario_id = {} """.format(subscenarios.PROJECT_OPERATIONAL_CHARS_SCENARIO_ID, "gen_must_run", subscenarios.PROJECT_PORTFOLIO_SCENARIO_ID)) # Convert inputs to data frame hr_df = cursor_to_df(heat_rates) # Check that there is only one load point (constant heat rate) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_heat_rate_curves", severity="Mid", errors=validate_single_input( df=hr_df, msg="gen_must_run can only have one load " "point (constant heat rate).")) # Check that the project does not show up in any of the # inputs_project_reserve_bas tables since gen_must_run can't provide any # reserves projects_by_reserve = get_projects_by_reserve(scenario_id, subscenarios, conn) for reserve, projects_w_ba in projects_by_reserve.items(): table = "inputs_project_" + reserve + "_bas" reserve_errors = validate_idxs( actual_idxs=opchar_df["project"], invalid_idxs=projects_w_ba, msg="gen_must_run cannot provide {}.".format(reserve)) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table=table, severity="Mid", errors=reserve_errors)
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # Get the fuel input data fuels, fuel_prices = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Get the projects fuels c1 = conn.cursor() projects = c1.execute("""SELECT project, fuel FROM inputs_project_portfolios INNER JOIN (SELECT project, fuel FROM inputs_project_operational_chars WHERE project_operational_chars_scenario_id = {} AND fuel IS NOT NULL) AS op_char USING (project) WHERE project_portfolio_scenario_id = {}""".format( subscenarios.PROJECT_OPERATIONAL_CHARS_SCENARIO_ID, subscenarios.PROJECT_PORTFOLIO_SCENARIO_ID)) # Get the relevant periods and months c2 = conn.cursor() periods_months = c2.execute("""SELECT DISTINCT period, month FROM inputs_temporal WHERE temporal_scenario_id = {} AND subproblem_id = {} AND stage_id = {};""".format(subscenarios.TEMPORAL_SCENARIO_ID, subproblem, stage)) # Convert input data into pandas DataFrame fuels_df = cursor_to_df(fuels) fuel_prices_df = cursor_to_df(fuel_prices) prj_df = cursor_to_df(projects) # Get relevant lists fuels = fuels_df["fuel"].to_list() actual_fuel_periods_months = list( fuel_prices_df[["fuel", "period", "month"]].itertuples(index=False, name=None)) req_fuel_periods_months = [(f, p, m) for (p, m) in periods_months for f in fuels] # Check data types expected_dtypes = get_expected_dtypes( conn, ["inputs_project_fuels", "inputs_project_fuel_prices"]) dtype_errors, error_columns = validate_dtypes(fuels_df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_fuels", severity="High", errors=dtype_errors) dtype_errors, error_columns = validate_dtypes(fuel_prices_df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_fuel_prices", severity="High", errors=dtype_errors) # TODO: couldn't this be a simple foreign key or is NULL not allowed then? # TODO: should this check be in projects.init instead? # Check that fuels specified for projects are valid fuels write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="High", errors=validate_columns(prj_df, "fuel", valids=fuels)) # Check that fuel prices exist for the period and month write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_fuel_prices", severity="High", errors=validate_idxs( actual_idxs=actual_fuel_periods_months, req_idxs=req_fuel_periods_months, idx_label="(fuel, period, month)"))
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ c = conn.cursor() # Get the project inputs projects = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Convert input data into pandas DataFrame df = cursor_to_df(projects) # Check data types: expected_dtypes = get_expected_dtypes( conn, ["inputs_project_portfolios", "inputs_project_availability", "inputs_project_load_zones", "inputs_project_operational_chars"] ) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars, inputs_project_portfolios", severity="High", errors=dtype_errors ) # Check valid numeric columns are non-negative numeric_columns = [c for c in df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="High", errors=validate_values(df, valid_numeric_columns, min=0) ) # Check that we're not combining incompatible cap-types and op-types cols = ["capacity_type", "operational_type"] invalid_combos = c.execute( """ SELECT {} FROM mod_capacity_and_operational_type_invalid_combos """.format(",".join(cols)) ).fetchall() write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars, inputs_project_portfolios", severity="High", errors=validate_columns(df, cols, invalids=invalid_combos) ) # Check that capacity type is valid # Note: foreign key already ensures this! valid_cap_types = c.execute( """SELECT capacity_type from mod_capacity_types""" ).fetchall() valid_cap_types = [v[0] for v in valid_cap_types] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_portfolios", severity="High", errors=validate_columns(df, "capacity_type", valids=valid_cap_types) ) # Check that operational type is valid # Note: foreign key already ensures this! valid_op_types = c.execute( """SELECT operational_type from mod_operational_types""" ).fetchall() valid_op_types = [v[0] for v in valid_op_types] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_portfolios", severity="High", errors=validate_columns(df, "operational_type", valids=valid_op_types) ) # Check that all portfolio projects are present in the availability inputs msg = "All projects in the portfolio should have an availability type " \ "specified in the inputs_project_availability table." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability", severity="High", errors=validate_missing_inputs(df, "availability_type", msg=msg) ) # Check that all portfolio projects are present in the opchar inputs msg = "All projects in the portfolio should have an operational type " \ "and balancing type specified in the " \ "inputs_project_operational_chars table." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="High", errors=validate_missing_inputs(df, ["operational_type", "balancing_type_project"], msg=msg) ) # Check that all portfolio projects are present in the load zone inputs msg = "All projects in the portfolio should have a load zone " \ "specified in the inputs_project_load_zones table." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_load_zones", severity="High", errors=validate_missing_inputs(df, "load_zone", msg=msg) )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ tx_capacities = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) tx_lines = get_tx_lines(conn, scenario_id, subscenarios, "capacity_type", "tx_spec") # Convert input data into pandas DataFrame and extract data df = cursor_to_df(tx_capacities) spec_tx_lines = df["transmission_line"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_specified_capacity"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=dtype_errors) # Ensure tx_line capacity is specified in at least 1 period msg = "Expected specified capacity for at least one period." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=validate_idxs(actual_idxs=spec_tx_lines, req_idxs=tx_lines, idx_label="transmission_line", msg=msg)) # Check for missing values (vs. missing row entries above) cols = ["min_mw", "max_mw"] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=validate_missing_inputs(df, cols)) # check that min <= max write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=df, cols=cols, idx_col=["project", "period"]))
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ hurdle_rates = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(hurdle_rates) # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_hurdle_rates"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_hurdle_rates", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_hurdle_rates", severity="High", errors=validate_values(df, valid_numeric_columns, "transmission_line", min=0), ) # Check that all binary new build tx lines are available in >=1 vintage msg = ("Expected hurdle rates specified for each modeling period when " "transmission hurdle rates feature is on.") cols = [ "hurdle_rate_positive_direction_per_mwh", "hurdle_rate_negative_direction_per_mwh", ] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_hurdle_rates", severity="Low", errors=validate_missing_inputs(df=df, col=cols, idx_col=["transmission_line", "period"], msg=msg), )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # Get the binary build generator inputs new_gen_costs, new_build_size = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "gen_new_bin") # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_gen_costs) bld_size_df = cursor_to_df(new_build_size) # get the project lists cost_projects = cost_df["project"].unique() bld_size_projects = bld_size_df["project"] # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_project_new_cost", "inputs_project_new_binary_build_size"] ) # Check dtypes - cost_df dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors ) # Check valid numeric columns are non-negative - cost_df numeric_columns = [c for c in cost_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0) ) # Check dtypes - bld_size_df dtype_errors, error_columns = validate_dtypes(bld_size_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=dtype_errors ) # Check valid numeric columns are non-negative - bld_size_df numeric_columns = [c for c in bld_size_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_values(bld_size_df, valid_numeric_columns, min=0) ) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="Mid", errors=validate_idxs(actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg) ) # Check that all binary new build projects have build size specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_idxs(actual_idxs=bld_size_projects, req_idxs=projects, idx_label="project") )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ hrzs, hrz_tmps = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) c = conn.cursor() periods_horizons = c.execute(""" SELECT balancing_type_horizon, period, horizon FROM periods_horizons WHERE temporal_scenario_id = {} AND subproblem_id = {} and stage_id = {} """.format(subscenarios.TEMPORAL_SCENARIO_ID, subproblem, stage)) df_hrzs = cursor_to_df(hrzs) df_hrz_tmps = cursor_to_df(hrz_tmps) df_periods_hrzs = cursor_to_df(periods_horizons) # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_temporal_horizons", "inputs_temporal_horizon_timepoints" ]) # Check dtypes horizons dtype_errors, error_columns = validate_dtypes(df_hrzs, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizons", severity="High", errors=dtype_errors) # Check dtypes horizon_timepoints dtype_errors, error_columns = validate_dtypes(df_hrz_tmps, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative - horizons numeric_columns = [ c for c in df_hrzs.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizons", severity="Mid", errors=validate_values(df_hrzs, valid_numeric_columns, "horizon", min=0)) # Check valid numeric columns are non-negative - horizon_timepoints numeric_columns = [ c for c in df_hrzs.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="Mid", errors=validate_values(df_hrz_tmps, valid_numeric_columns, ["horizon", "timepoint"], min=0)) # One horizon cannot straddle multiple periods msg = "All timepoints within a horizon should belong to the same period." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="High", errors=validate_single_input( df=df_periods_hrzs, idx_col=["balancing_type_horizon", "horizon"], msg=msg)) # Make sure there are no missing horizon inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="High", errors=validate_missing_inputs( df=df_hrz_tmps, col="horizon", idx_col=["balancing_type_horizon", "timepoint"]))
def generic_validate_project_bas(scenario_id, subscenarios, subproblem, stage, conn, reserve_type, project_ba_subscenario_id, ba_subscenario_id): """ :param subscenarios: :param subproblem: :param stage: :param conn: :param reserve_type: :param project_ba_subscenario_id: :param ba_subscenario_id: :return: """ # TODO: is this actually needed? subproblem = 1 if subproblem == "" else subproblem stage = 1 if stage == "" else stage project_bas, prj_derates = generic_get_inputs_from_database( scenario_id=scenario_id, subscenarios=subscenarios, subproblem=subproblem, stage=stage, conn=conn, reserve_type=reserve_type, project_ba_subscenario_id=project_ba_subscenario_id, ba_subscenario_id=ba_subscenario_id) # Convert input data into pandas DataFrame df = cursor_to_df(project_bas) df_derate = cursor_to_df(prj_derates).dropna() bas_w_project = df["{}_ba".format(reserve_type)].unique() projects_w_ba = df["project"].unique() projects_w_derate = df_derate["project"].unique() # Get the required reserve bas c = conn.cursor() bas = c.execute("""SELECT {}_ba FROM inputs_geography_{}_bas WHERE {}_ba_scenario_id = {} """.format(reserve_type, reserve_type, reserve_type, subscenarios.REGULATION_UP_BA_SCENARIO_ID)) bas = [b[0] for b in bas] # convert to list # Check that each reserve BA has at least one project assigned to it write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_{}_bas".format(reserve_type), severity="High", errors=validate_idxs(actual_idxs=bas_w_project, req_idxs=bas, idx_label="{}_ba".format(reserve_type), msg="Each reserve BA needs at least 1 " "project assigned to it.")) # Check that all projects w derates have a BA specified msg = "Project has a reserve derate specified but no relevant BA." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="Low", errors=validate_idxs( actual_idxs=projects_w_ba, req_idxs=projects_w_derate, idx_label="project", msg=msg))
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # TODO: check that there are no minimum duration inputs for this type # (duration is specified by specifying the build size in mw and mwh) # Maybe also check all other required / not required inputs? # --> see example in gen_must_run operational_type. Seems very verbose and # hard to maintain. Is there a way to generalize this? # Get the binary build generator inputs new_stor_costs, new_stor_build_size = \ get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "stor_new_bin") # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_stor_costs) bld_size_df = cursor_to_df(new_stor_build_size) # get the project lists cost_projects = cost_df["project"].unique() bld_size_projects = bld_size_df["project"] # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_project_new_cost", "inputs_project_new_binary_build_size" ]) # Check dtypes - cost_df dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative - cost_df numeric_columns = [ c for c in cost_df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0)) # Check dtypes - bld_size_df dtype_errors, error_columns = validate_dtypes(bld_size_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative - bld_size_df numeric_columns = [ c for c in bld_size_df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_values(bld_size_df, valid_numeric_columns, min=0)) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_idxs( actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg)) # Check that all binary new build projects have build size specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_idxs(actual_idxs=bld_size_projects, req_idxs=projects, idx_label="project"))
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ tx_cost = get_module_specific_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) tx_lines = get_tx_lines(conn, scenario_id, subscenarios, "capacity_type", "tx_new_lin") # Convert input data into pandas DataFrame df = cursor_to_df(tx_cost) # get the tx lines lists tx_lines_w_cost = df["transmission_line"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_new_cost"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_new_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_new_cost", severity="High", errors=validate_values(df, valid_numeric_columns, "transmission_line", min=0)) # Check that all binary new build tx lines are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_new_cost", severity="Mid", errors=validate_idxs( actual_idxs=tx_lines_w_cost, req_idxs=tx_lines, idx_label="transmission_line", msg=msg))