def test_random_descent(): # Test that both random and cyclic selection give the same results. # Ensure that the test models fully converge and check a wide # range of conditions. # This uses the coordinate descent algo using the gram trick. X, y, _, _ = build_dataset(n_samples=50, n_features=20) clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8) clf_cyclic.fit(X, y) clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42) clf_random.fit(X, y) assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_) assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_) # This uses the descent algo without the gram trick clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8) clf_cyclic.fit(X.T, y[:20]) clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42) clf_random.fit(X.T, y[:20]) assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_) assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_) # Sparse Case clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8) clf_cyclic.fit(sparse.csr_matrix(X), y) clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42) clf_random.fit(sparse.csr_matrix(X), y) assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_) assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_) # Multioutput case. new_y = np.hstack((y[:, np.newaxis], y[:, np.newaxis])) clf_cyclic = MultiTaskElasticNet(selection='cyclic', tol=1e-8) clf_cyclic.fit(X, new_y) clf_random = MultiTaskElasticNet(selection='random', tol=1e-8, random_state=42) clf_random.fit(X, new_y) assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_) assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_) # Raise error when selection is not in cyclic or random. clf_random = ElasticNet(selection='invalid') assert_raises(ValueError, clf_random.fit, X, y)
def test_correct_RandomProjection_dimensions_embedding(): for RandomProjection in all_RandomProjection: rp = RandomProjection(n_components='auto', random_state=0, eps=0.5).fit(data) # the number of components is adjusted from the shape of the training # set assert rp.n_components == 'auto' assert rp.n_components_ == 110 if RandomProjection in all_SparseRandomProjection: assert rp.density == 'auto' assert_almost_equal(rp.density_, 0.03, 2) assert rp.components_.shape == (110, n_features) projected_1 = rp.transform(data) assert projected_1.shape == (n_samples, 110) # once the RP is 'fitted' the projection is always the same projected_2 = rp.transform(data) assert_array_equal(projected_1, projected_2) # fit transform with same random seed will lead to the same results rp2 = RandomProjection(random_state=0, eps=0.5) projected_3 = rp2.fit_transform(data) assert_array_equal(projected_1, projected_3) # Try to transform with an input X of size different from fitted. assert_raises(ValueError, rp.transform, data[:, 1:5]) # it is also possible to fix the number of components and the density # level if RandomProjection in all_SparseRandomProjection: rp = RandomProjection(n_components=100, density=0.001, random_state=0) projected = rp.fit_transform(data) assert projected.shape == (n_samples, 100) assert rp.components_.shape == (100, n_features) assert rp.components_.nnz < 115 # close to 1% density assert 85 < rp.components_.nnz # close to 1% density
def test_inplace_swap_column(): X = np.array([[0, 3, 0], [2, 4, 0], [0, 0, 0], [9, 8, 7], [4, 0, 5]], dtype=np.float64) X_csr = sp.csr_matrix(X) X_csc = sp.csc_matrix(X) swap = linalg.get_blas_funcs(('swap', ), (X, )) swap = swap[0] X[:, 0], X[:, -1] = swap(X[:, 0], X[:, -1]) inplace_swap_column(X_csr, 0, -1) inplace_swap_column(X_csc, 0, -1) assert_array_equal(X_csr.toarray(), X_csc.toarray()) assert_array_equal(X, X_csc.toarray()) assert_array_equal(X, X_csr.toarray()) X[:, 0], X[:, 1] = swap(X[:, 0], X[:, 1]) inplace_swap_column(X_csr, 0, 1) inplace_swap_column(X_csc, 0, 1) assert_array_equal(X_csr.toarray(), X_csc.toarray()) assert_array_equal(X, X_csc.toarray()) assert_array_equal(X, X_csr.toarray()) assert_raises(TypeError, inplace_swap_column, X_csr.tolil()) X = np.array([[0, 3, 0], [2, 4, 0], [0, 0, 0], [9, 8, 7], [4, 0, 5]], dtype=np.float32) X_csr = sp.csr_matrix(X) X_csc = sp.csc_matrix(X) swap = linalg.get_blas_funcs(('swap', ), (X, )) swap = swap[0] X[:, 0], X[:, -1] = swap(X[:, 0], X[:, -1]) inplace_swap_column(X_csr, 0, -1) inplace_swap_column(X_csc, 0, -1) assert_array_equal(X_csr.toarray(), X_csc.toarray()) assert_array_equal(X, X_csc.toarray()) assert_array_equal(X, X_csr.toarray()) X[:, 0], X[:, 1] = swap(X[:, 0], X[:, 1]) inplace_swap_column(X_csr, 0, 1) inplace_swap_column(X_csc, 0, 1) assert_array_equal(X_csr.toarray(), X_csc.toarray()) assert_array_equal(X, X_csc.toarray()) assert_array_equal(X, X_csr.toarray()) assert_raises(TypeError, inplace_swap_column, X_csr.tolil())
def test_ransac_max_trials(): base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, max_trials=0, random_state=0) assert_raises(ValueError, ransac_estimator.fit, X, y) # there is a 1e-9 chance it will take these many trials. No good reason # 1e-2 isn't enough, can still happen # 2 is the what ransac defines as min_samples = X.shape[1] + 1 max_trials = _dynamic_max_trials( len(X) - len(outliers), X.shape[0], 2, 1 - 1e-9) ransac_estimator = RANSACRegressor(base_estimator, min_samples=2) for i in range(50): ransac_estimator.set_params(min_samples=2, random_state=i) ransac_estimator.fit(X, y) assert ransac_estimator.n_trials_ < max_trials + 1
def test_config_context(): assert get_config() == {'assume_finite': False, 'working_memory': 1024, 'print_changed_only': True} # Not using as a context manager affects nothing config_context(assume_finite=True) assert get_config()['assume_finite'] is False with config_context(assume_finite=True): assert get_config() == {'assume_finite': True, 'working_memory': 1024, 'print_changed_only': True} assert get_config()['assume_finite'] is False with config_context(assume_finite=True): with config_context(assume_finite=None): assert get_config()['assume_finite'] is True assert get_config()['assume_finite'] is True with config_context(assume_finite=False): assert get_config()['assume_finite'] is False with config_context(assume_finite=None): assert get_config()['assume_finite'] is False # global setting will not be retained outside of context that # did not modify this setting set_config(assume_finite=True) assert get_config()['assume_finite'] is True assert get_config()['assume_finite'] is False assert get_config()['assume_finite'] is True assert get_config() == {'assume_finite': False, 'working_memory': 1024, 'print_changed_only': True} # No positional arguments assert_raises(TypeError, config_context, True) # No unknown arguments assert_raises(TypeError, config_context(do_something_else=True).__enter__)
def test_bagging_classifier_with_missing_inputs(): # Check that SerialBaggingClassifier can accept X with missing/infinite data X = np.array([[1, 3, 5], [2, None, 6], [2, np.nan, 6], [2, np.inf, 6], [2, np.NINF, 6]]) y = np.array([3, 6, 6, 6, 6]) classifier = DecisionTreeClassifier() pipeline = make_pipeline(FunctionTransformer(replace), classifier) pipeline.fit(X, y).predict(X) bagging_classifier = SerialBaggingClassifier(pipeline) bagging_classifier.fit(X, y) y_hat = bagging_classifier.predict(X) assert y.shape == y_hat.shape bagging_classifier.predict_log_proba(X) bagging_classifier.predict_proba(X) # Verify that exceptions can be raised by wrapper classifier classifier = DecisionTreeClassifier() pipeline = make_pipeline(classifier) assert_raises(ValueError, pipeline.fit, X, y) bagging_classifier = SerialBaggingClassifier(pipeline) assert_raises(ValueError, bagging_classifier.fit, X, y)
def test_transformation_dimensions(): X = np.arange(12).reshape(4, 3) y = [1, 1, 2, 2] # Fail if transformation input dimension does not match inputs dimensions transformation = np.array([[1, 2], [3, 4]]) assert_raises(ValueError, NeighborhoodComponentsAnalysis(init=transformation).fit, X, y) # Fail if transformation output dimension is larger than # transformation input dimension transformation = np.array([[1, 2], [3, 4], [5, 6]]) # len(transformation) > len(transformation[0]) assert_raises(ValueError, NeighborhoodComponentsAnalysis(init=transformation).fit, X, y) # Pass otherwise transformation = np.arange(9).reshape(3, 3) NeighborhoodComponentsAnalysis(init=transformation).fit(X, y)
def test_discretenb_input_check_partial_fit(cls): # check shape consistency assert_raises(ValueError, cls().partial_fit, X2, y2[:-1], classes=np.unique(y2)) # classes is required for first call to partial fit assert_raises(ValueError, cls().partial_fit, X2, y2) # check consistency of consecutive classes values clf = cls() clf.partial_fit(X2, y2, classes=np.unique(y2)) assert_raises(ValueError, clf.partial_fit, X2, y2, classes=np.arange(42)) # check consistency of input shape for partial_fit assert_raises(ValueError, clf.partial_fit, X2[:, :-1], y2) # check consistency of input shape for predict assert_raises(ValueError, clf.predict, X2[:, :-1])
def test_features_zero_var(): # Test that features with 0 variance throw error X = np.empty((10, 2)) X[:, 0] = -0.13725701 X[:, 1] = -0.9853293 y = np.zeros((10)) y[0] = 1 clf = NearestCentroid(shrink_threshold=0.1) with assert_raises(ValueError): clf.fit(X, y)
def test_column_or_1d(): EXAMPLES = [ ("binary", ["spam", "egg", "spam"]), ("binary", [0, 1, 0, 1]), ("continuous", np.arange(10) / 20.), ("multiclass", [1, 2, 3]), ("multiclass", [0, 1, 2, 2, 0]), ("multiclass", [[1], [2], [3]]), ("multilabel-indicator", [[0, 1, 0], [0, 0, 1]]), ("multiclass-multioutput", [[1, 2, 3]]), ("multiclass-multioutput", [[1, 1], [2, 2], [3, 1]]), ("multiclass-multioutput", [[5, 1], [4, 2], [3, 1]]), ("multiclass-multioutput", [[1, 2, 3]]), ("continuous-multioutput", np.arange(30).reshape((-1, 3))), ] for y_type, y in EXAMPLES: if y_type in ["binary", 'multiclass', "continuous"]: assert_array_equal(column_or_1d(y), np.ravel(y)) else: assert_raises(ValueError, column_or_1d, y)
def test_lasso_lars_ic(): # Test the LassoLarsIC object by checking that # - some good features are selected. # - alpha_bic > alpha_aic # - n_nonzero_bic < n_nonzero_aic lars_bic = linear_model.LassoLarsIC('bic') lars_aic = linear_model.LassoLarsIC('aic') rng = np.random.RandomState(42) X = diabetes.data X = np.c_[X, rng.randn(X.shape[0], 5)] # add 5 bad features lars_bic.fit(X, y) lars_aic.fit(X, y) nonzero_bic = np.where(lars_bic.coef_)[0] nonzero_aic = np.where(lars_aic.coef_)[0] assert lars_bic.alpha_ > lars_aic.alpha_ assert len(nonzero_bic) < len(nonzero_aic) assert np.max(nonzero_bic) < diabetes.data.shape[1] # test error on unknown IC lars_broken = linear_model.LassoLarsIC('<unknown>') assert_raises(ValueError, lars_broken.fit, X, y)
def test_clone_buggy(): # Check that clone raises an error on buggy estimators. buggy = Buggy() buggy.a = 2 assert_raises(RuntimeError, clone, buggy) no_estimator = NoEstimator() assert_raises(TypeError, clone, no_estimator) varg_est = VargEstimator() assert_raises(RuntimeError, clone, varg_est) est = ModifyInitParams() assert_raises(RuntimeError, clone, est)
def test_plot_partial_dependence_multiclass(pyplot): # Test partial dependence plot function on multi-class input. clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, iris.target) grid_resolution = 25 fig, axs = plot_partial_dependence(clf, iris.data, [0, 1], label=0, grid_resolution=grid_resolution) assert len(axs) == 2 assert all(ax.has_data for ax in axs) # now with symbol labels target = iris.target_names[iris.target] clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, target) grid_resolution = 25 fig, axs = plot_partial_dependence(clf, iris.data, [0, 1], label='setosa', grid_resolution=grid_resolution) assert len(axs) == 2 assert all(ax.has_data for ax in axs) # label not in gbrt.classes_ assert_raises(ValueError, plot_partial_dependence, clf, iris.data, [0, 1], label='foobar', grid_resolution=grid_resolution) # label not provided assert_raises(ValueError, plot_partial_dependence, clf, iris.data, [0, 1], grid_resolution=grid_resolution)
def test_ransac_dynamic_max_trials(): # Numbers hand-calculated and confirmed on page 119 (Table 4.3) in # Hartley, R.~I. and Zisserman, A., 2004, # Multiple View Geometry in Computer Vision, Second Edition, # Cambridge University Press, ISBN: 0521540518 # e = 0%, min_samples = X assert _dynamic_max_trials(100, 100, 2, 0.99) == 1 # e = 5%, min_samples = 2 assert _dynamic_max_trials(95, 100, 2, 0.99) == 2 # e = 10%, min_samples = 2 assert _dynamic_max_trials(90, 100, 2, 0.99) == 3 # e = 30%, min_samples = 2 assert _dynamic_max_trials(70, 100, 2, 0.99) == 7 # e = 50%, min_samples = 2 assert _dynamic_max_trials(50, 100, 2, 0.99) == 17 # e = 5%, min_samples = 8 assert _dynamic_max_trials(95, 100, 8, 0.99) == 5 # e = 10%, min_samples = 8 assert _dynamic_max_trials(90, 100, 8, 0.99) == 9 # e = 30%, min_samples = 8 assert _dynamic_max_trials(70, 100, 8, 0.99) == 78 # e = 50%, min_samples = 8 assert _dynamic_max_trials(50, 100, 8, 0.99) == 1177 # e = 0%, min_samples = 10 assert _dynamic_max_trials(1, 100, 10, 0) == 0 assert _dynamic_max_trials(1, 100, 10, 1) == float('inf') base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, stop_probability=-0.1) assert_raises(ValueError, ransac_estimator.fit, X, y) ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, stop_probability=1.1) assert_raises(ValueError, ransac_estimator.fit, X, y)
def test_ransac_min_n_samples(): base_estimator = LinearRegression() ransac_estimator1 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) ransac_estimator2 = RANSACRegressor(base_estimator, min_samples=2. / X.shape[0], residual_threshold=5, random_state=0) ransac_estimator3 = RANSACRegressor(base_estimator, min_samples=-1, residual_threshold=5, random_state=0) ransac_estimator4 = RANSACRegressor(base_estimator, min_samples=5.2, residual_threshold=5, random_state=0) ransac_estimator5 = RANSACRegressor(base_estimator, min_samples=2.0, residual_threshold=5, random_state=0) ransac_estimator6 = RANSACRegressor(base_estimator, residual_threshold=5, random_state=0) ransac_estimator7 = RANSACRegressor(base_estimator, min_samples=X.shape[0] + 1, residual_threshold=5, random_state=0) ransac_estimator1.fit(X, y) ransac_estimator2.fit(X, y) ransac_estimator5.fit(X, y) ransac_estimator6.fit(X, y) assert_array_almost_equal(ransac_estimator1.predict(X), ransac_estimator2.predict(X)) assert_array_almost_equal(ransac_estimator1.predict(X), ransac_estimator5.predict(X)) assert_array_almost_equal(ransac_estimator1.predict(X), ransac_estimator6.predict(X)) assert_raises(ValueError, ransac_estimator3.fit, X, y) assert_raises(ValueError, ransac_estimator4.fit, X, y) assert_raises(ValueError, ransac_estimator7.fit, X, y)
def test_multi_output_exceptions(): # NotFittedError when fit is not done but score, predict and # and predict_proba are called moc = MultiOutputClassifier(LinearSVC(random_state=0)) assert_raises(NotFittedError, moc.predict, y) assert_raises(NotFittedError, moc.predict_proba, y) assert_raises(NotFittedError, moc.score, X, y) # ValueError when number of outputs is different # for fit and score y_new = np.column_stack((y1, y2)) moc.fit(X, y) assert_raises(ValueError, moc.score, X, y_new) # ValueError when y is continuous assert_raise_message(ValueError, "Unknown label type", moc.fit, X, X[:, 1])
def check_min_samples_split(name): X, y = hastie_X, hastie_y ForestEstimator = FOREST_ESTIMATORS[name] # test boundary value assert_raises(ValueError, ForestEstimator(min_samples_split=-1).fit, X, y) assert_raises(ValueError, ForestEstimator(min_samples_split=0).fit, X, y) assert_raises(ValueError, ForestEstimator(min_samples_split=1.1).fit, X, y) est = ForestEstimator(min_samples_split=10, n_estimators=1, random_state=0) est.fit(X, y) node_idx = est.estimators_[0].tree_.children_left != -1 node_samples = est.estimators_[0].tree_.n_node_samples[node_idx] assert np.min(node_samples) > len(X) * 0.5 - 1, ( "Failed with {0}".format(name)) est = ForestEstimator(min_samples_split=0.5, n_estimators=1, random_state=0) est.fit(X, y) node_idx = est.estimators_[0].tree_.children_left != -1 node_samples = est.estimators_[0].tree_.n_node_samples[node_idx] assert np.min(node_samples) > len(X) * 0.5 - 1, ( "Failed with {0}".format(name))
def test_unique_labels(): # Empty iterable assert_raises(ValueError, unique_labels) # Multiclass problem assert_array_equal(unique_labels(range(10)), np.arange(10)) assert_array_equal(unique_labels(np.arange(10)), np.arange(10)) assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4])) # Multilabel indicator assert_array_equal(unique_labels(np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])), np.arange(3)) assert_array_equal(unique_labels(np.array([[0, 0, 1], [0, 0, 0]])), np.arange(3)) # Several arrays passed assert_array_equal(unique_labels([4, 0, 2], range(5)), np.arange(5)) assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)), np.arange(3)) # Border line case with binary indicator matrix assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5))) assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5))) assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))), np.arange(5))
def test_sparse_random_projection_transformer_invalid_density(): for RandomProjection in all_SparseRandomProjection: assert_raises(ValueError, RandomProjection(density=1.1).fit, data) assert_raises(ValueError, RandomProjection(density=0).fit, data) assert_raises(ValueError, RandomProjection(density=-0.1).fit, data)
def test_bagging_regressor_with_missing_inputs(): # Check that SerialBaggingRegressor can accept X with missing/infinite data X = np.array([[1, 3, 5], [2, None, 6], [2, np.nan, 6], [2, np.inf, 6], [2, np.NINF, 6]]) y_values = [ np.array([2, 3, 3, 3, 3]), np.array([[2, 1, 9], [3, 6, 8], [3, 6, 8], [3, 6, 8], [3, 6, 8]]), ] for y in y_values: regressor = DecisionTreeRegressor() pipeline = make_pipeline(FunctionTransformer(replace), regressor) pipeline.fit(X, y).predict(X) bagging_regressor = SerialBaggingRegressor(pipeline) y_hat = bagging_regressor.fit(X, y).predict(X) assert y.shape == y_hat.shape # Verify that exceptions can be raised by wrapper regressor regressor = DecisionTreeRegressor() pipeline = make_pipeline(regressor) assert_raises(ValueError, pipeline.fit, X, y) bagging_regressor = SerialBaggingRegressor(pipeline) assert_raises(ValueError, bagging_regressor.fit, X, y)
def test_skewed_chi2_sampler(): # test that RBFSampler approximates kernel on random data # compute exact kernel c = 0.03 # set on negative component but greater than c to ensure that the kernel # approximation is valid on the group (-c; +\infty) endowed with the skewed # multiplication. Y[0, 0] = -c / 2. # abbreviations for easier formula X_c = (X + c)[:, np.newaxis, :] Y_c = (Y + c)[np.newaxis, :, :] # we do it in log-space in the hope that it's more stable # this array is n_samples_x x n_samples_y big x n_features log_kernel = ((np.log(X_c) / 2.) + (np.log(Y_c) / 2.) + np.log(2.) - np.log(X_c + Y_c)) # reduce to n_samples_x x n_samples_y by summing over features in log-space kernel = np.exp(log_kernel.sum(axis=2)) # approximate kernel mapping transform = SkewedChi2Sampler(skewedness=c, n_components=1000, random_state=42) X_trans = transform.fit_transform(X) Y_trans = transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) assert_array_almost_equal(kernel, kernel_approx, 1) assert np.isfinite(kernel).all(), \ 'NaNs found in the Gram matrix' assert np.isfinite(kernel_approx).all(), \ 'NaNs found in the approximate Gram matrix' # test error is raised on when inputs contains values smaller than -c Y_neg = Y.copy() Y_neg[0, 0] = -c * 2. assert_raises(ValueError, transform.transform, Y_neg)
def test_min_max_axis_errors(): X = np.array([[0, 3, 0], [2, -1, 0], [0, 0, 0], [9, 8, 7], [4, 0, 5]], dtype=np.float64) X_csr = sp.csr_matrix(X) X_csc = sp.csc_matrix(X) assert_raises(TypeError, min_max_axis, X_csr.tolil(), axis=0) assert_raises(ValueError, min_max_axis, X_csr, axis=2) assert_raises(ValueError, min_max_axis, X_csc, axis=-3)
def test_callback(capsys): X = iris_data y = iris_target nca = NeighborhoodComponentsAnalysis(callback='my_cb') assert_raises(ValueError, nca.fit, X, y) max_iter = 10 def my_cb(transformation, n_iter): assert transformation.shape == (iris_data.shape[1]**2, ) rem_iter = max_iter - n_iter print('{} iterations remaining...'.format(rem_iter)) # assert that my_cb is called nca = NeighborhoodComponentsAnalysis(max_iter=max_iter, callback=my_cb, verbose=1) nca.fit(iris_data, iris_target) out, _ = capsys.readouterr() # check output assert ('{} iterations remaining...'.format(max_iter - 1) in out)
def test_params_validation(): # Test that invalid parameters raise value error X = np.arange(12).reshape(4, 3) y = [1, 1, 2, 2] NCA = NeighborhoodComponentsAnalysis rng = np.random.RandomState(42) # TypeError assert_raises(TypeError, NCA(max_iter='21').fit, X, y) assert_raises(TypeError, NCA(verbose='true').fit, X, y) assert_raises(TypeError, NCA(tol='1').fit, X, y) assert_raises(TypeError, NCA(n_components='invalid').fit, X, y) assert_raises(TypeError, NCA(warm_start=1).fit, X, y) # ValueError assert_raise_message( ValueError, "`init` must be 'auto', 'pca', 'lda', 'identity', " "'random' or a numpy array of shape " "(n_components, n_features).", NCA(init=1).fit, X, y) assert_raise_message(ValueError, '`max_iter`= -1, must be >= 1.', NCA(max_iter=-1).fit, X, y) init = rng.rand(5, 3) assert_raise_message( ValueError, 'The output dimensionality ({}) of the given linear ' 'transformation `init` cannot be greater than its ' 'input dimensionality ({}).'.format(init.shape[0], init.shape[1]), NCA(init=init).fit, X, y) n_components = 10 assert_raise_message( ValueError, 'The preferred dimensionality of the ' 'projected space `n_components` ({}) cannot ' 'be greater than the given data ' 'dimensionality ({})!'.format(n_components, X.shape[1]), NCA(n_components=n_components).fit, X, y)
def test_calibration_curve(): """Check calibration_curve function""" y_true = np.array([0, 0, 0, 1, 1, 1]) y_pred = np.array([0., 0.1, 0.2, 0.8, 0.9, 1.]) prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=2) prob_true_unnormalized, prob_pred_unnormalized = \ calibration_curve(y_true, y_pred * 2, n_bins=2, normalize=True) assert len(prob_true) == len(prob_pred) assert len(prob_true) == 2 assert_almost_equal(prob_true, [0, 1]) assert_almost_equal(prob_pred, [0.1, 0.9]) assert_almost_equal(prob_true, prob_true_unnormalized) assert_almost_equal(prob_pred, prob_pred_unnormalized) # probabilities outside [0, 1] should not be accepted when normalize # is set to False assert_raises(ValueError, calibration_curve, [1.1], [-0.1], normalize=False) # test that quantiles work as expected y_true2 = np.array([0, 0, 0, 0, 1, 1]) y_pred2 = np.array([0., 0.1, 0.2, 0.5, 0.9, 1.]) prob_true_quantile, prob_pred_quantile = calibration_curve( y_true2, y_pred2, n_bins=2, strategy='quantile') assert len(prob_true_quantile) == len(prob_pred_quantile) assert len(prob_true_quantile) == 2 assert_almost_equal(prob_true_quantile, [0, 2 / 3]) assert_almost_equal(prob_pred_quantile, [0.1, 0.8]) # Check that error is raised when invalid strategy is selected assert_raises(ValueError, calibration_curve, y_true2, y_pred2, strategy='percentile')
def test_set_pipeline_steps(): transf1 = Transf() transf2 = Transf() pipeline = Pipeline([('mock', transf1)]) assert pipeline.named_steps['mock'] is transf1 # Directly setting attr pipeline.steps = [('mock2', transf2)] assert 'mock' not in pipeline.named_steps assert pipeline.named_steps['mock2'] is transf2 assert [('mock2', transf2)] == pipeline.steps # Using set_params pipeline.set_params(steps=[('mock', transf1)]) assert [('mock', transf1)] == pipeline.steps # Using set_params to replace single step pipeline.set_params(mock=transf2) assert [('mock', transf2)] == pipeline.steps # With invalid data pipeline.set_params(steps=[('junk', ())]) assert_raises(TypeError, pipeline.fit, [[1]], [1]) assert_raises(TypeError, pipeline.fit_transform, [[1]], [1])
def test_ridge_individual_penalties(): # Tests the ridge object using individual penalties rng = np.random.RandomState(42) n_samples, n_features, n_targets = 20, 10, 5 X = rng.randn(n_samples, n_features) y = rng.randn(n_samples, n_targets) penalties = np.arange(n_targets) coef_cholesky = np.array([ Ridge(alpha=alpha, solver="cholesky").fit(X, target).coef_ for alpha, target in zip(penalties, y.T)]) coefs_indiv_pen = [ Ridge(alpha=penalties, solver=solver, tol=1e-8).fit(X, y).coef_ for solver in ['svd', 'sparse_cg', 'lsqr', 'cholesky', 'sag', 'saga']] for coef_indiv_pen in coefs_indiv_pen: assert_array_almost_equal(coef_cholesky, coef_indiv_pen) # Test error is raised when number of targets and penalties do not match. ridge = Ridge(alpha=penalties[:-1]) assert_raises(ValueError, ridge.fit, X, y)
def test_qda(): # QDA classification. # This checks that QDA implements fit and predict and returns # correct values for a simple toy dataset. clf = QuadraticDiscriminantAnalysis() y_pred = clf.fit(X6, y6).predict(X6) assert_array_equal(y_pred, y6) # Assure that it works with 1D data y_pred1 = clf.fit(X7, y6).predict(X7) assert_array_equal(y_pred1, y6) # Test probas estimates y_proba_pred1 = clf.predict_proba(X7) assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y6) y_log_proba_pred1 = clf.predict_log_proba(X7) assert_array_almost_equal(np.exp(y_log_proba_pred1), y_proba_pred1, 8) y_pred3 = clf.fit(X6, y7).predict(X6) # QDA shouldn't be able to separate those assert np.any(y_pred3 != y7) # Classes should have at least 2 elements assert_raises(ValueError, clf.fit, X6, y4)
def check_edge_case_of_sample_int(sample_without_replacement): # n_population < n_sample assert_raises(ValueError, sample_without_replacement, 0, 1) assert_raises(ValueError, sample_without_replacement, 1, 2) # n_population == n_samples assert sample_without_replacement(0, 0).shape == (0, ) assert sample_without_replacement(1, 1).shape == (1, ) # n_population >= n_samples assert sample_without_replacement(5, 0).shape == (0, ) assert sample_without_replacement(5, 1).shape == (1, ) # n_population < 0 or n_samples < 0 assert_raises(ValueError, sample_without_replacement, -1, 5) assert_raises(ValueError, sample_without_replacement, 5, -1)
def test_ovr_exceptions(): ovr = OneVsRestClassifier(LinearSVC(random_state=0)) assert_raises(ValueError, ovr.predict, []) # Fail on multioutput data assert_raises(ValueError, OneVsRestClassifier(MultinomialNB()).fit, np.array([[1, 0], [0, 1]]), np.array([[1, 2], [3, 1]])) assert_raises(ValueError, OneVsRestClassifier(MultinomialNB()).fit, np.array([[1, 0], [0, 1]]), np.array([[1.5, 2.4], [3.1, 0.8]]))