def test_fast_predict(): # test that the faster prediction change doesn't # affect out-of-sample predictions: # https://github.com/scikit-learn/scikit-learn/pull/6206 rng = np.random.RandomState(123) n_samples = 10**3 # X values over the -10,10 range X_train = 20.0 * rng.rand(n_samples) - 10 y_train = np.less(rng.rand(n_samples), expit(X_train)).astype('int64').astype('float64') weights = rng.rand(n_samples) # we also want to test that everything still works when some weights are 0 weights[rng.rand(n_samples) < 0.1] = 0 slow_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip") fast_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip") # Build interpolation function with ALL input data, not just the # non-redundant subset. The following 2 lines are taken from the # .fit() method, without removing unnecessary points X_train_fit, y_train_fit = slow_model._build_y(X_train, y_train, sample_weight=weights, trim_duplicates=False) slow_model._build_f(X_train_fit, y_train_fit) # fit with just the necessary data fast_model.fit(X_train, y_train, sample_weight=weights) X_test = 20.0 * rng.rand(n_samples) - 10 y_pred_slow = slow_model.predict(X_test) y_pred_fast = fast_model.predict(X_test) assert_array_equal(y_pred_slow, y_pred_fast)
def test_isotonic_regression_oob_clip(): # Set y and x y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing='auto', out_of_bounds="clip") ir.fit(x, y) # Predict from training and test x and check that min/max match. y1 = ir.predict([min(x) - 10, max(x) + 10]) y2 = ir.predict(x) assert max(y1) == max(y2) assert min(y1) == min(y2)
def test_isotonic_regression(): y = np.array([3, 7, 5, 9, 8, 7, 10]) y_ = np.array([3, 6, 6, 8, 8, 8, 10]) assert_array_equal(y_, isotonic_regression(y)) y = np.array([10, 0, 2]) y_ = np.array([4, 4, 4]) assert_array_equal(y_, isotonic_regression(y)) x = np.arange(len(y)) ir = IsotonicRegression(y_min=0., y_max=1.) ir.fit(x, y) assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y)) assert_array_equal(ir.transform(x), ir.predict(x)) # check that it is immune to permutation perm = np.random.permutation(len(y)) ir = IsotonicRegression(y_min=0., y_max=1.) assert_array_equal(ir.fit_transform(x[perm], y[perm]), ir.fit_transform(x, y)[perm]) assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm]) # check we don't crash when all x are equal: ir = IsotonicRegression() assert_array_equal(ir.fit_transform(np.ones(len(x)), y), np.mean(y))
def test_isotonic_mismatched_dtype(y_dtype): # regression test for #15004 # check that data are converted when X and y dtype differ reg = IsotonicRegression() y = np.array([2, 1, 4, 3, 5], dtype=y_dtype) X = np.arange(len(y), dtype=np.float32) reg.fit(X, y) assert reg.predict(X).dtype == X.dtype
def test_isotonic_duplicate_min_entry(): x = [0, 0, 1] y = [0, 0, 1] ir = IsotonicRegression(increasing=True, out_of_bounds="clip") ir.fit(x, y) all_predictions_finite = np.all(np.isfinite(ir.predict(x))) assert all_predictions_finite
def test_isotonic_regression_pickle(): y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing='auto', out_of_bounds="clip") ir.fit(x, y) ir_ser = pickle.dumps(ir, pickle.HIGHEST_PROTOCOL) ir2 = pickle.loads(ir_ser) np.testing.assert_array_equal(ir.predict(x), ir2.predict(x))
def test_isotonic_regression_oob_nan(): # Set y and x y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing='auto', out_of_bounds="nan") ir.fit(x, y) # Predict from training and test x and check that we have two NaNs. y1 = ir.predict([min(x) - 10, max(x) + 10]) assert sum(np.isnan(y1)) == 2
def test_isotonic_dtype(): y = [2, 1, 4, 3, 5] weights = np.array([.9, .9, .9, .9, .9], dtype=np.float64) reg = IsotonicRegression() for dtype in (np.int32, np.int64, np.float32, np.float64): for sample_weight in (None, weights.astype(np.float32), weights): y_np = np.array(y, dtype=dtype) expected_dtype = \ check_array(y_np, dtype=[np.float64, np.float32], ensure_2d=False).dtype res = isotonic_regression(y_np, sample_weight=sample_weight) assert res.dtype == expected_dtype X = np.arange(len(y)).astype(dtype) reg.fit(X, y_np, sample_weight=sample_weight) res = reg.predict(X) assert res.dtype == expected_dtype