Python validate_column_monotonicity Examples, gridpath.auxiliary.validations.validate_column_monotonicity Python Examples

Example #1

0

Show file

File: test_validations.py Project: yangqiu91/gridpath

    def test_validate_column_monotonicity(self):
        """
        :return:
        """

        cols = ["project", "period", "min_mw", "avg_mw", "max_mw"]
        test_cases = {
            # Make sure a case with only basic inputs doesn't throw errors
            1: {"df": pd.DataFrame(
                    columns=cols,
                    data=[["gas_ct", 2020, 10, 15, 20],
                          ["gas_ct", 2030, 10, 15, 20],
                          ["coal", 2030, 20, 20, 20]]),
                "cols": ["min_mw", "avg_mw", "max_mw"],
                "idx_col": "project",
                "result": []
                },
            # Make sure erroneous inputs are properly caught
            2: {"df": pd.DataFrame(
                columns=cols,
                data=[["gas_ct", 2020, 10, 15, 20],
                      ["gas_ct", 2030, 21, 15, 20],
                      ["coal", 2030, 20, 20, 20]]),
                "cols": ["min_mw", "avg_mw", "max_mw"],
                "idx_col": "project",
                "result": ["project(s) ['gas_ct']: Values cannot decrease "
                           "between ['min_mw', 'avg_mw', 'max_mw']. "]
                },
            # None values are ignored
            3: {"df": pd.DataFrame(
                columns=cols,
                data=[["gas_ct", 2020, 10, 15, 20],
                      ["gas_ct", 2030, None, 15, 20],
                      ["coal", 2030, 20, 20, 20]]),
                "cols": ["min_mw", "max_mw"],
                "idx_col": "project",
                "result": []
                },
            # idx_col can be list of strings
            4: {"df": pd.DataFrame(
                columns=cols,
                data=[["gas_ct", 2020, 10, 15, 20],
                      ["gas_ct", 2030, 21, 15, 20],
                      ["coal", 2030, 20, 20, 20]]),
                "cols": ["min_mw", "avg_mw", "max_mw"],
                "idx_col": ["project", "period"],
                "result": ["['project', 'period'](s) [['gas_ct' 2030]]: "
                           "Values cannot decrease between "
                           "['min_mw', 'avg_mw', 'max_mw']. "]
                }
        }
        for test_case in test_cases.keys():
            expected_list = test_cases[test_case]["result"]
            actual_list = module_to_test.validate_column_monotonicity(
                df=test_cases[test_case]["df"],
                cols=test_cases[test_case]["cols"],
                idx_col=test_cases[test_case]["idx_col"],
            )
            self.assertListEqual(expected_list, actual_list)

Example #2

0

Show file

def validate_module_specific_inputs(scenario_id, subscenarios, subproblem,
                                    stage, conn):
    """
    :param subscenarios:
    :param subproblem:
    :param stage:
    :param conn:
    :return:
    """

    params = get_inputs_from_database(scenario_id, subscenarios, subproblem,
                                      stage, conn)

    df = cursor_to_df(params)

    # Check data types availability
    expected_dtypes = get_expected_dtypes(conn, [
        "inputs_project_availability", "inputs_project_availability_endogenous"
    ])
    dtype_errors, error_columns = validate_dtypes(df, expected_dtypes)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_availability_endogenous",
        severity="High",
        errors=dtype_errors)

    # Check for missing inputs
    msg = ""
    value_cols = [
        "unavailable_hours_per_period", "unavailable_hours_per_event_min",
        "available_hours_between_events_min"
    ]
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_availability_endogenous",
        severity="Low",
        errors=validate_missing_inputs(df, value_cols, "project", msg))

    cols = ["unavailable_hours_per_event_min", "unavailable_hours_per_period"]
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_availability_endogenous",
        severity="High",
        errors=validate_column_monotonicity(df=df,
                                            cols=cols,
                                            idx_col=["project"]))

Example #3

0

Show file

File: common_functions.py Project: milindsmart/gridpath

def validate_hydro_opchars(scenario_id, subscenarios, subproblem, stage, conn, op_type):
    """

    :param subscenarios:
    :param subproblem:
    :param stage:
    :param conn:
    :param op_type:
    :return:
    """
    hydro_chars = get_hydro_inputs_from_database(
        scenario_id, subscenarios, subproblem, stage, conn, op_type
    )

    # Convert input data into pandas DataFrame
    df = cursor_to_df(hydro_chars)
    value_cols = ["min_power_fraction", "average_power_fraction", "max_power_fraction"]

    # Check for missing inputs
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_hydro_operational_chars",
        severity="High",
        errors=validate_missing_inputs(df, value_cols, ["project", "horizon"]),
    )

    # Check for sign (should be percent fraction)
    hydro_opchar_fraction_error = write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_hydro_operational_chars",
        severity="Low",
        errors=validate_values(df, value_cols, min=0, max=1),
    )

    # Check min <= avg <= sign
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_hydro_operational_chars",
        severity="Mid",
        errors=validate_column_monotonicity(
            df=df, cols=value_cols, idx_col=["project", "horizon"]
        ),
    )

    return hydro_opchar_fraction_error

Example #4

0

Show file

def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn):
    """
    Get inputs from database and validate the inputs
    :param subscenarios: SubScenarios object with all subscenario info
    :param subproblem:
    :param stage:
    :param conn: database connection
    :return:
    """
    new_stor_costs = get_model_inputs_from_database(
        scenario_id, subscenarios, subproblem, stage, conn
    )

    projects = get_projects(
        conn, scenario_id, subscenarios, "capacity_type", "stor_new_lin"
    )

    # Convert input data into pandas DataFrame
    cost_df = cursor_to_df(new_stor_costs)
    df_cols = cost_df.columns

    # get the project lists
    cost_projects = cost_df["project"].unique()

    # Get expected dtypes
    expected_dtypes = get_expected_dtypes(
        conn=conn, tables=["inputs_project_new_cost", "inputs_project_new_potential"]
    )

    # Check dtypes
    dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_new_cost",
        severity="High",
        errors=dtype_errors,
    )

    # Check valid numeric columns are non-negative
    numeric_columns = [c for c in cost_df.columns if expected_dtypes[c] == "numeric"]
    valid_numeric_columns = set(numeric_columns) - set(error_columns)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_new_cost",
        severity="High",
        errors=validate_values(cost_df, valid_numeric_columns, min=0),
    )

    # Check that all binary new build projects are available in >=1 vintage
    msg = "Expected cost data for at least one vintage."
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_new_cost",
        severity="Mid",
        errors=validate_idxs(
            actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg
        ),
    )

    cols = ["min_cumulative_new_build_mw", "max_cumulative_new_build_mw"]
    # Check that maximum new build doesn't decrease
    if cols[1] in df_cols:
        write_validation_to_database(
            conn=conn,
            scenario_id=scenario_id,
            subproblem_id=subproblem,
            stage_id=stage,
            gridpath_module=__name__,
            db_table="inputs_project_new_potential",
            severity="Mid",
            errors=validate_row_monotonicity(
                df=cost_df, col=cols[1], rank_col="vintage"
            ),
        )

    # check that min build <= max build
    if set(cols).issubset(set(df_cols)):
        write_validation_to_database(
            conn=conn,
            scenario_id=scenario_id,
            subproblem_id=subproblem,
            stage_id=stage,
            gridpath_module=__name__,
            db_table="inputs_project_new_potential",
            severity="High",
            errors=validate_column_monotonicity(
                df=cost_df, cols=cols, idx_col=["project", "vintage"]
            ),
        )

    cols = ["min_cumulative_new_build_mwh", "max_cumulative_new_build_mwh"]
    # Check that maximum new build doesn't decrease - MWh
    if cols[1] in df_cols:
        write_validation_to_database(
            conn=conn,
            scenario_id=scenario_id,
            subproblem_id=subproblem,
            stage_id=stage,
            gridpath_module=__name__,
            db_table="inputs_project_new_potential",
            severity="Mid",
            errors=validate_row_monotonicity(
                df=cost_df, col=cols[1], rank_col="vintage"
            ),
        )

    # check that min build <= max build - MWh
    if set(cols).issubset(set(df_cols)):
        write_validation_to_database(
            conn=conn,
            scenario_id=scenario_id,
            subproblem_id=subproblem,
            stage_id=stage,
            gridpath_module=__name__,
            db_table="inputs_project_new_potential",
            severity="High",
            errors=validate_column_monotonicity(
                df=cost_df, cols=cols, idx_col=["project", "vintage"]
            ),
        )

Example #5

0

Show file

File: tx_spec.py Project: yangqiu91/gridpath

def validate_module_specific_inputs(scenario_id, subscenarios, subproblem,
                                    stage, conn):
    """
    Get inputs from database and validate the inputs
    :param subscenarios: SubScenarios object with all subscenario info
    :param subproblem:
    :param stage:
    :param conn: database connection
    :return:
    """

    tx_capacities = get_module_specific_inputs_from_database(
        scenario_id, subscenarios, subproblem, stage, conn)

    tx_lines = get_tx_lines(conn, scenario_id, subscenarios, "capacity_type",
                            "tx_spec")

    # Convert input data into pandas DataFrame and extract data
    df = cursor_to_df(tx_capacities)
    spec_tx_lines = df["transmission_line"].unique()

    # Get expected dtypes
    expected_dtypes = get_expected_dtypes(
        conn=conn, tables=["inputs_transmission_specified_capacity"])

    # Check dtypes
    dtype_errors, error_columns = validate_dtypes(df, expected_dtypes)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_transmission_specified_capacity",
        severity="High",
        errors=dtype_errors)

    # Ensure tx_line capacity is specified in at least 1 period
    msg = "Expected specified capacity for at least one period."
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_transmission_specified_capacity",
        severity="High",
        errors=validate_idxs(actual_idxs=spec_tx_lines,
                             req_idxs=tx_lines,
                             idx_label="transmission_line",
                             msg=msg))

    # Check for missing values (vs. missing row entries above)
    cols = ["min_mw", "max_mw"]
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_transmission_specified_capacity",
        severity="High",
        errors=validate_missing_inputs(df, cols))

    # check that min <= max
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_project_new_potential",
                                 severity="High",
                                 errors=validate_column_monotonicity(
                                     df=df,
                                     cols=cols,
                                     idx_col=["project", "period"]))