def check_dtype_object(name, Estimator): # check that estimators treat dtype object as numeric if possible rng = np.random.RandomState(0) X = rng.rand(40, 10).astype(object) y = (X[:, 0] * 4).astype(np.int) y = multioutput_estimator_convert_y_2d(name, y) with warnings.catch_warnings(): estimator = Estimator() set_fast_parameters(estimator) estimator.fit(X, y) if hasattr(estimator, "predict"): estimator.predict(X) if hasattr(estimator, "transform"): estimator.transform(X) try: estimator.fit(X, y.astype(object)) except Exception as e: if "Unknown label type" not in str(e): raise X[0, 0] = {'foo': 'bar'} assert_raise_message(TypeError, "string or a number", estimator.fit, X, y)
def test_assert_raise_message(): def _raise_ValueError(message): raise ValueError(message) assert_raise_message(ValueError, "test", _raise_ValueError, "test") assert_raises(AssertionError, assert_raise_message, ValueError, "something else", _raise_ValueError, "test") assert_raises(ValueError, assert_raise_message, TypeError, "something else", _raise_ValueError, "test")
def check_estimators_empty_data_messages(name, Estimator): e = Estimator() set_fast_parameters(e) set_random_state(e, 1) X_zero_samples = np.empty(0).reshape(0, 3) # The precise message can change depending on whether X or y is # validated first. Let us test the type of exception only: assert_raises(ValueError, e.fit, X_zero_samples, []) X_zero_features = np.empty(0).reshape(3, 0) # the following y should be accepted by both classifiers and regressors # and ignored by unsupervised models y = multioutput_estimator_convert_y_2d(name, np.array([1, 0, 1])) msg = "0 feature(s) (shape=(3, 0)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, e.fit, X_zero_features, y)
def test_check_array_min_samples_and_features_messages(): # empty list is considered 2D by default: msg = "0 feature(s) (shape=(1, 0)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, check_array, []) # If considered a 1D collection when ensure_2d=False, then the minimum # number of samples will break: msg = "0 sample(s) (shape=(0,)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, check_array, [], ensure_2d=False) # Invalid edge case when checking the default minimum sample of a scalar msg = "Singleton array array(42) cannot be considered a valid collection." assert_raise_message(TypeError, msg, check_array, 42, ensure_2d=False) # But this works if the input data is forced to look like a 2 array with # one sample and one feature: X_checked = check_array(42, ensure_2d=True) assert_array_equal(np.array([[42]]), X_checked) # Simulate a model that would need at least 2 samples to be well defined X = np.ones((1, 10)) y = np.ones(1) msg = "1 sample(s) (shape=(1, 10)) while a minimum of 2 is required." assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_samples=2) # The same message is raised if the data has 2 dimensions even if this is # not mandatory assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_samples=2, ensure_2d=False) # Simulate a model that would require at least 3 features (e.g. SelectKBest # with k=3) X = np.ones((10, 2)) y = np.ones(2) msg = "2 feature(s) (shape=(10, 2)) while a minimum of 3 is required." assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_features=3) # Only the feature check is enabled whenever the number of dimensions is 2 # even if allow_nd is enabled: assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_features=3, allow_nd=True) # Simulate a case where a pipeline stage as trimmed all the features of a # 2D dataset. X = np.empty(0).reshape(10, 0) y = np.ones(10) msg = "0 feature(s) (shape=(10, 0)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, check_X_y, X, y) # nd-data is not checked for any minimum number of features by default: X = np.ones((10, 0, 28, 28)) y = np.ones(10) X_checked, y_checked = check_X_y(X, y, allow_nd=True) assert_array_equal(X, X_checked) assert_array_equal(y, y_checked)