def test_discrete_ecdfer(): fit_df = pd.DataFrame({ "prediction": [0.1, 0.1, 0.3, 0.5, 0.5, 0.6, 0.6, 0.7, 0.8, 0.9] }) input_df = pd.DataFrame({ "prediction": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.65, 0.7, 0.8, 0.9, 1.0] }) ascending = True prediction_column = "prediction" ecdf_column = "prediction_ecdf" max_range = 1000 ecdfer_fn, _, _ = ecdfer(fit_df, ascending, prediction_column, ecdf_column, max_range) ecdfer_df = ecdfer_fn(input_df) discrete_ecdfer_fn, _, _ = discrete_ecdfer( fit_df, ascending, prediction_column, ecdf_column, max_range, round_method=round) discrete_ecdfer_df = discrete_ecdfer_fn(input_df) assert_almost_equal(ecdfer_df[ecdf_column].values, discrete_ecdfer_df[ecdf_column].values, decimal=5) ascending = False ecdfer_fn, data, log = ecdfer(fit_df, ascending, prediction_column, ecdf_column, max_range) ecdfer_df = ecdfer_fn(input_df) discrete_ecdfer_fn, _, _ = discrete_ecdfer( fit_df, ascending, prediction_column, ecdf_column, max_range, round_method=float) discrete_ecdfer_df = discrete_ecdfer_fn(input_df) assert_almost_equal(discrete_ecdfer_df[ecdf_column].values, ecdfer_df[ecdf_column].values, decimal=5)
def test_ecdfer(): fit_df = pd.DataFrame({ "prediction": [0.1, 0.1, 0.3, 0.5, 0.5, 0.6, 0.6, 0.7, 0.8, 0.9] }) input_df = pd.DataFrame({ "prediction": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.65, 0.7, 0.8, 0.9, 1.0] }) expected_df = pd.DataFrame({ "prediction_ecdf": [200.0, 200.0, 300.0, 300.0, 500.0, 700.0, 700.0, 800.0, 900.0, 1000.0, 1000.0] }) ascending = True prediction_column = "prediction" ecdf_column = "prediction_ecdf" max_range = 1000 pred_fn, data, log = ecdfer(fit_df, ascending, prediction_column, ecdf_column, max_range) actual_df = pred_fn(input_df) assert_almost_equal(expected_df[ecdf_column].values, actual_df[ecdf_column].values, decimal=5) ascending = False pred_fn, data, log = ecdfer(fit_df, ascending, prediction_column, ecdf_column, max_range) expected_df = pd.DataFrame({ "prediction_ecdf": [800.0, 800.0, 700.0, 700.0, 500.0, 300.0, 300.0, 200.0, 100.0, 0.0, 0.0] }) actual_df = pred_fn(input_df) assert_almost_equal(expected_df[ecdf_column].values, actual_df[ecdf_column].values, decimal=5)