def test_embed_dim(estimator, build_dataset): # Checks that the the dimension of the output space is as expected input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert model.transform(X).shape == X.shape # assert that ValueError is thrown if input shape is 1D context = make_context(estimator) err_msg = ("2D array of formed points expected{}. Found 1D array " "instead:\ninput={}. Reshape your data and/or use a " "preprocessor.\n".format(context, X[0])) with pytest.raises(ValueError) as raised_error: model.score_pairs(model.transform(X[0, :])) assert str(raised_error.value) == err_msg # we test that the shape is also OK when doing dimensionality reduction if type(model).__name__ in {'LFDA', 'MLKR', 'NCA', 'RCA'}: # TODO: # avoid this enumeration and rather test if hasattr n_components # as soon as we have made the arguments names as such (issue #167) model.set_params(num_dims=2) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert model.transform(X).shape == (X.shape[0], 2) # assert that ValueError is thrown if input shape is 1D with pytest.raises(ValueError) as raised_error: model.transform(model.transform(X[0, :])) assert str(raised_error.value) == err_msg
def test_embed_dim(estimator, build_dataset): # Checks that the the dimension of the output space is as expected input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert model.transform(X).shape == X.shape # assert that ValueError is thrown if input shape is 1D context = make_context(estimator) err_msg = ("2D array of formed points expected{}. Found 1D array " "instead:\ninput={}. Reshape your data and/or use a " "preprocessor.\n".format(context, X[0])) with pytest.raises(ValueError) as raised_error: model.score_pairs(model.transform(X[0, :])) assert str(raised_error.value) == err_msg # we test that the shape is also OK when doing dimensionality reduction if hasattr(model, 'n_components'): model.set_params(n_components=2) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert model.transform(X).shape == (X.shape[0], 2) # assert that ValueError is thrown if input shape is 1D with pytest.raises(ValueError) as raised_error: model.transform(model.transform(X[0, :])) assert str(raised_error.value) == err_msg
def test_transformer_is_2D(estimator, build_dataset): """Tests that the transformer of metric learners is 2D""" input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) # test that it works for X.shape[1] features model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert model.transformer_.shape == (X.shape[1], X.shape[1]) # test that it works for 1 feature trunc_data = input_data[..., :1] # we drop duplicates that might have been formed, i.e. of the form # aabc or abcc or aabb for quadruplets, and aa for pairs. if isinstance(estimator, _QuadrupletsClassifierMixin): for slice_idx in [slice(0, 2), slice(2, 4)]: pairs = trunc_data[:, slice_idx, :] diffs = pairs[:, 1, :] - pairs[:, 0, :] to_keep = np.where(np.abs(diffs.ravel()) > 1e-9) trunc_data = trunc_data[to_keep] labels = labels[to_keep] elif isinstance(estimator, _PairsClassifierMixin): diffs = trunc_data[:, 1, :] - trunc_data[:, 0, :] to_keep = np.where(np.abs(diffs.ravel()) > 1e-9) trunc_data = trunc_data[to_keep] labels = labels[to_keep] model.fit(*remove_y_quadruplets(estimator, trunc_data, labels)) assert model.transformer_.shape == (1, 1) # the transformer must be 2D
def test_cross_validation_is_finite(estimator, build_dataset): """Tests that validation on metric-learn estimators returns something finite """ input_data, labels, preprocessor, _ = build_dataset() estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) assert np.isfinite( cross_val_score(estimator, *remove_y_quadruplets(estimator, input_data, labels))).all() assert np.isfinite( cross_val_predict(estimator, *remove_y_quadruplets(estimator, input_data, labels))).all()
def test_pipeline_consistency(estimator, build_dataset, with_preprocessor): # Adapted from scikit learn # check that make_pipeline(est) gives same score as est # we do this test on all except quadruplets (since they don't have a y # in fit): if estimator.__class__.__name__ not in [e.__class__.__name__ for (e, _) in quadruplets_learners]: input_data, y, preprocessor, _ = build_dataset(with_preprocessor) def make_random_state(estimator, in_pipeline): rs = {} name_estimator = estimator.__class__.__name__ if name_estimator[-11:] == '_Supervised': name_param = 'random_state' if in_pipeline: name_param = name_estimator.lower() + '__' + name_param rs[name_param] = check_random_state(0) return rs estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) pipeline = make_pipeline(estimator) estimator.fit(*remove_y_quadruplets(estimator, input_data, y), **make_random_state(estimator, False)) pipeline.fit(*remove_y_quadruplets(estimator, input_data, y), **make_random_state(estimator, True)) if hasattr(estimator, 'score'): result = estimator.score(*remove_y_quadruplets(estimator, input_data, y)) result_pipe = pipeline.score(*remove_y_quadruplets(estimator, input_data, y)) assert_allclose_dense_sparse(result, result_pipe) if hasattr(estimator, 'predict'): result = estimator.predict(input_data) result_pipe = pipeline.predict(input_data) assert_allclose_dense_sparse(result, result_pipe) if issubclass(estimator.__class__, TransformerMixin): if hasattr(estimator, 'transform'): result = estimator.transform(input_data) result_pipe = pipeline.transform(input_data) assert_allclose_dense_sparse(result, result_pipe)
def test_pipeline_consistency(estimator, build_dataset, with_preprocessor): # Adapted from scikit learn # check that make_pipeline(est) gives same score as est # we do this test on all except quadruplets (since they don't have a y # in fit): if estimator.__class__.__name__ not in [e.__class__.__name__ for (e, _) in quadruplets_learners]: input_data, y, preprocessor, _ = build_dataset(with_preprocessor) def make_random_state(estimator, in_pipeline): rs = {} name_estimator = estimator.__class__.__name__ if name_estimator[-11:] == '_Supervised': name_param = 'random_state' if in_pipeline: name_param = name_estimator.lower() + '__' + name_param rs[name_param] = check_random_state(0) return rs estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) pipeline = make_pipeline(estimator) estimator.fit(*remove_y_quadruplets(estimator, input_data, y), **make_random_state(estimator, False)) pipeline.fit(*remove_y_quadruplets(estimator, input_data, y), **make_random_state(estimator, True)) if hasattr(estimator, 'score'): result = estimator.score(*remove_y_quadruplets(estimator, input_data, y)) result_pipe = pipeline.score(*remove_y_quadruplets(estimator, input_data, y)) assert_allclose_dense_sparse(result, result_pipe) if hasattr(estimator, 'predict'): result = estimator.predict(input_data) result_pipe = pipeline.predict(input_data) assert_allclose_dense_sparse(result, result_pipe) if issubclass(estimator.__class__, TransformerMixin): if hasattr(estimator, 'transform'): result = estimator.transform(input_data) result_pipe = pipeline.transform(input_data) assert_allclose_dense_sparse(result, result_pipe)
def test_embed_finite(estimator, build_dataset): # Checks that embed returns vectors with finite values input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert np.isfinite(model.transform(X)).all()
def test_cross_validation_is_finite(estimator, build_dataset): """Tests that validation on metric-learn estimators returns something finite """ input_data, labels, preprocessor, _ = build_dataset() estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) assert np.isfinite(cross_val_score(estimator, *remove_y_quadruplets(estimator, input_data, labels))).all() assert np.isfinite(cross_val_predict(estimator, *remove_y_quadruplets(estimator, input_data, labels) )).all()
def test_score_pairs_finite(estimator, build_dataset): # tests that the score is finite input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) pairs = np.array(list(product(X, X))) assert np.isfinite(model.score_pairs(pairs)).all()
def test_get_metric_compatible_with_scikit_learn(estimator, build_dataset): """Check that the metric returned by get_metric is compatible with scikit-learn's algorithms using a custom metric, DBSCAN for instance""" input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) clustering = DBSCAN(metric=model.get_metric()) clustering.fit(X)
def test_estimators_fit_returns_self(estimator, build_dataset, with_preprocessor): """Check if self is returned when calling fit""" # Adapted from scikit-learn input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) assert estimator.fit( *remove_y_quadruplets(estimator, input_data, labels)) is estimator
def test_cross_validation_manual_vs_scikit(estimator, build_dataset, with_preprocessor): """Tests that if we make a manual cross-validation, the result will be the same as scikit-learn's cross-validation (some code for generating the folds is taken from scikit-learn). """ if any(hasattr(estimator, method) for method in ["predict", "score"]): input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) n_splits = 3 kfold = KFold(shuffle=False, n_splits=n_splits) n_samples = input_data.shape[0] fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int) fold_sizes[:n_samples % n_splits] += 1 current = 0 scores, predictions = [], np.zeros(input_data.shape[0]) for fold_size in fold_sizes: start, stop = current, current + fold_size current = stop test_slice = slice(start, stop) train_mask = np.ones(input_data.shape[0], bool) train_mask[test_slice] = False y_train, y_test = labels[train_mask], labels[test_slice] estimator.fit(*remove_y_quadruplets( estimator, input_data[train_mask], y_train)) if hasattr(estimator, "score"): scores.append( estimator.score(*remove_y_quadruplets( estimator, input_data[test_slice], y_test))) if hasattr(estimator, "predict"): predictions[test_slice] = estimator.predict( input_data[test_slice]) if hasattr(estimator, "score"): assert all(scores == cross_val_score( estimator, *remove_y_quadruplets(estimator, input_data, labels), cv=kfold)) if hasattr(estimator, "predict"): assert all(predictions == cross_val_predict( estimator, *remove_y_quadruplets(estimator, input_data, labels), cv=kfold))
def test_embed_toy_example(estimator, build_dataset): # Checks that embed works on a toy example input_data, labels, _, X = build_dataset() n_samples = 20 X = X[:n_samples] model = clone(estimator) set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) embedded_points = X.dot(model.transformer_.T) assert_array_almost_equal(model.transform(X), embedded_points)
def test_estimators_fit_returns_self(estimator, build_dataset, with_preprocessor): """Check if self is returned when calling fit""" # Adapted from scikit-learn input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) assert estimator.fit(*remove_y_quadruplets(estimator, input_data, labels)) is estimator
def test_cross_validation_manual_vs_scikit(estimator, build_dataset, with_preprocessor): """Tests that if we make a manual cross-validation, the result will be the same as scikit-learn's cross-validation (some code for generating the folds is taken from scikit-learn). """ if any(hasattr(estimator, method) for method in ["predict", "score"]): input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) n_splits = 3 kfold = KFold(shuffle=False, n_splits=n_splits) n_samples = input_data.shape[0] fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int) fold_sizes[:n_samples % n_splits] += 1 current = 0 scores, predictions = [], np.zeros(input_data.shape[0]) for fold_size in fold_sizes: start, stop = current, current + fold_size current = stop test_slice = slice(start, stop) train_mask = np.ones(input_data.shape[0], bool) train_mask[test_slice] = False y_train, y_test = labels[train_mask], labels[test_slice] estimator.fit(*remove_y_quadruplets(estimator, input_data[train_mask], y_train)) if hasattr(estimator, "score"): scores.append(estimator.score(*remove_y_quadruplets( estimator, input_data[test_slice], y_test))) if hasattr(estimator, "predict"): predictions[test_slice] = estimator.predict(input_data[test_slice]) if hasattr(estimator, "score"): assert all(scores == cross_val_score( estimator, *remove_y_quadruplets(estimator, input_data, labels), cv=kfold)) if hasattr(estimator, "predict"): assert all(predictions == cross_val_predict( estimator, *remove_y_quadruplets(estimator, input_data, labels), cv=kfold))
def test_embed_is_linear(estimator, build_dataset): # Checks that the embedding is linear input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert_array_almost_equal( model.transform(X[:10] + X[10:20]), model.transform(X[:10]) + model.transform(X[10:20])) assert_array_almost_equal(model.transform(5 * X[:10]), 5 * model.transform(X[:10]))
def test_array_like_inputs(estimator, build_dataset, with_preprocessor): """Test that metric-learners can have as input (of all functions that are applied on data) any array-like object.""" input_data, labels, preprocessor, X = build_dataset(with_preprocessor) # we subsample the data for the test to be more efficient input_data, _, labels, _ = train_test_split(input_data, labels, train_size=20) X = X[:10] estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) input_variants, label_variants = generate_array_like(input_data, labels) for input_variant in input_variants: for label_variant in label_variants: estimator.fit( *remove_y_quadruplets(estimator, input_variant, label_variant)) if hasattr(estimator, "predict"): estimator.predict(input_variant) if hasattr(estimator, "predict_proba"): estimator.predict_proba(input_variant) # anticipation in case some # time we have that, or if ppl want to contribute with new algorithms # it will be checked automatically if hasattr(estimator, "decision_function"): estimator.decision_function(input_variant) if hasattr(estimator, "score"): for label_variant in label_variants: estimator.score(*remove_y_quadruplets(estimator, input_variant, label_variant)) X_variants, _ = generate_array_like(X) for X_variant in X_variants: estimator.transform(X_variant) pairs = np.array([[X[0], X[1]], [X[0], X[2]]]) pairs_variants, _ = generate_array_like(pairs) for pairs_variant in pairs_variants: estimator.score_pairs(pairs_variant)
def test_score_pairs_toy_example(estimator, build_dataset): # Checks that score_pairs works on a toy example input_data, labels, _, X = build_dataset() n_samples = 20 X = X[:n_samples] model = clone(estimator) set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) pairs = np.stack([X[:10], X[10:20]], axis=1) embedded_pairs = pairs.dot(model.transformer_.T) distances = np.sqrt( np.sum((embedded_pairs[:, 1] - embedded_pairs[:, 0])**2, axis=-1)) assert_array_almost_equal(model.score_pairs(pairs), distances)
def test_metric_raises_deprecation_warning(estimator, build_dataset): """assert that a deprecation warning is raised if someones wants to call the `metric` function""" # TODO: remove this method in version 0.6.0 input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) with pytest.warns(DeprecationWarning) as raised_warning: model.metric() assert (str(raised_warning[0].message) == ( "`metric` is deprecated since version 0.5.0 and will be removed " "in 0.6.0. Use `get_mahalanobis_matrix` instead."))
def test_simple_estimator(estimator, build_dataset, with_preprocessor): """Tests that fit, predict and scoring works. """ if any(hasattr(estimator, method) for method in ["predict", "score"]): input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) (tuples_train, tuples_test, y_train, y_test) = train_test_split(input_data, labels, random_state=RNG) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) estimator.fit(*remove_y_quadruplets(estimator, tuples_train, y_train)) check_score(estimator, tuples_test, y_test) check_predict(estimator, tuples_test)
def test_simple_estimator(estimator, build_dataset, with_preprocessor): """Tests that fit, predict and scoring works. """ if any(hasattr(estimator, method) for method in ["predict", "score"]): input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) (tuples_train, tuples_test, y_train, y_test) = train_test_split(input_data, labels, random_state=RNG) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) estimator.fit(*remove_y_quadruplets(estimator, tuples_train, y_train)) check_score(estimator, tuples_test, y_test) check_predict(estimator, tuples_test)
def test_get_squared_metric(estimator, build_dataset): """Test that the squared metric returned is indeed the square of the metric""" input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] for seed in range(10): rng = np.random.RandomState(seed) a, b = (rng.randn(n_features) for _ in range(2)) assert_allclose(metric(a, b, squared=True), metric(a, b, squared=False)**2, rtol=1e-15)
def test_get_metric_equivalent_to_explicit_mahalanobis(estimator, build_dataset): """Tests that using the get_metric method of mahalanobis metric learners is equivalent to explicitely calling scipy's mahalanobis metric """ rng = np.random.RandomState(42) input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] a, b = (rng.randn(n_features), rng.randn(n_features)) expected_dist = mahalanobis(a[None], b[None], VI=model.get_mahalanobis_matrix()) assert_allclose(metric(a, b), expected_dist, rtol=1e-13)
def test_score_pairs_dim(estimator, build_dataset): # scoring of 3D arrays should return 1D array (several tuples), # and scoring of 2D arrays (one tuple) should return an error (like # scikit-learn's error when scoring 1D arrays) input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) tuples = np.array(list(product(X, X))) assert model.score_pairs(tuples).shape == (tuples.shape[0], ) context = make_context(estimator) msg = ("3D array of formed tuples expected{}. Found 2D array " "instead:\ninput={}. Reshape your data and/or use a preprocessor.\n" .format(context, tuples[1])) with pytest.raises(ValueError) as raised_error: model.score_pairs(tuples[1]) assert str(raised_error.value) == msg
def test_dont_overwrite_parameters(estimator, build_dataset, with_preprocessor): # Adapted from scikit-learn # check that fit method only changes or sets private attributes input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) if hasattr(estimator, "n_components"): estimator.n_components = 1 dict_before_fit = estimator.__dict__.copy() estimator.fit(*remove_y_quadruplets(estimator, input_data, labels)) dict_after_fit = estimator.__dict__ public_keys_after_fit = [ key for key in dict_after_fit.keys() if is_public_parameter(key) ] attrs_added_by_fit = [ key for key in public_keys_after_fit if key not in dict_before_fit.keys() ] # check that fit doesn't add any public attribute assert not attrs_added_by_fit, ( "Estimator adds public attribute(s) during" " the fit method." " Estimators are only allowed to add private " "attributes" " either started with _ or ended" " with _ but %s added" % ', '.join(attrs_added_by_fit)) # check that fit doesn't change any public attribute attrs_changed_by_fit = [ key for key in public_keys_after_fit if (dict_before_fit[key] is not dict_after_fit[key]) ] assert not attrs_changed_by_fit, ( "Estimator changes public attribute(s) during" " the fit method. Estimators are only allowed" " to change attributes started" " or ended with _, but" " %s changed" % ', '.join(attrs_changed_by_fit))
def test_score_pairs_pairwise(estimator, build_dataset): # Computing pairwise scores should return a euclidean distance matrix. input_data, labels, _, X = build_dataset() n_samples = 20 X = X[:n_samples] model = clone(estimator) set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) pairwise = model.score_pairs(np.array(list(product(X, X))))\ .reshape(n_samples, n_samples) check_is_distance_matrix(pairwise) # a necessary condition for euclidean distance matrices: (see # https://en.wikipedia.org/wiki/Euclidean_distance_matrix) assert np.linalg.matrix_rank(pairwise**2) <= min(X.shape) + 2 # assert that this distance is coherent with pdist on embeddings assert_array_almost_equal(squareform(pairwise), pdist(model.transform(X)))
def test_dont_overwrite_parameters(estimator, build_dataset, with_preprocessor): # Adapted from scikit-learn # check that fit method only changes or sets private attributes input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) if hasattr(estimator, "num_dims"): estimator.num_dims = 1 dict_before_fit = estimator.__dict__.copy() estimator.fit(*remove_y_quadruplets(estimator, input_data, labels)) dict_after_fit = estimator.__dict__ public_keys_after_fit = [key for key in dict_after_fit.keys() if is_public_parameter(key)] attrs_added_by_fit = [key for key in public_keys_after_fit if key not in dict_before_fit.keys()] # check that fit doesn't add any public attribute assert not attrs_added_by_fit, ( "Estimator adds public attribute(s) during" " the fit method." " Estimators are only allowed to add private " "attributes" " either started with _ or ended" " with _ but %s added" % ', '.join(attrs_added_by_fit)) # check that fit doesn't change any public attribute attrs_changed_by_fit = [key for key in public_keys_after_fit if (dict_before_fit[key] is not dict_after_fit[key])] assert not attrs_changed_by_fit, ( "Estimator changes public attribute(s) during" " the fit method. Estimators are only allowed" " to change attributes started" " or ended with _, but" " %s changed" % ', '.join(attrs_changed_by_fit))
def test_get_metric_is_pseudo_metric(estimator, build_dataset): """Tests that the get_metric method of mahalanobis metric learners returns a pseudo-metric (metric but without one side of the equivalence of the identity of indiscernables property) """ input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) model.fit(*remove_y_quadruplets(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] for seed in range(10): rng = np.random.RandomState(seed) a, b, c = (rng.randn(n_features) for _ in range(3)) assert metric(a, b) >= 0 # positivity assert metric(a, b) == metric(b, a) # symmetry # one side of identity indiscernables: x == y => d(x, y) == 0. The other # side of the equivalence is not always true for Mahalanobis distances. assert metric(a, a) == 0 # triangular inequality assert (metric(a, c) < metric(a, b) + metric(b, c) or np.isclose( metric(a, c), metric(a, b) + metric(b, c), rtol=1e-20))
def test_dict_unchanged(estimator, build_dataset, with_preprocessor): # Adapted from scikit-learn (input_data, labels, preprocessor, to_transform) = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) if hasattr(estimator, "num_dims"): estimator.num_dims = 1 estimator.fit(*remove_y_quadruplets(estimator, input_data, labels)) def check_dict(): assert estimator.__dict__ == dict_before, ( "Estimator changes __dict__ during %s" % method) for method in ["predict", "decision_function", "predict_proba"]: if hasattr(estimator, method): dict_before = estimator.__dict__.copy() getattr(estimator, method)(input_data) check_dict() if hasattr(estimator, "transform"): dict_before = estimator.__dict__.copy() # we transform only dataset of points estimator.transform(to_transform) check_dict()
def test_dict_unchanged(estimator, build_dataset, with_preprocessor): # Adapted from scikit-learn (input_data, labels, preprocessor, to_transform) = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) if hasattr(estimator, "num_dims"): estimator.num_dims = 1 estimator.fit(*remove_y_quadruplets(estimator, input_data, labels)) def check_dict(): assert estimator.__dict__ == dict_before, ( "Estimator changes __dict__ during %s" % method) for method in ["predict", "decision_function", "predict_proba"]: if hasattr(estimator, method): dict_before = estimator.__dict__.copy() getattr(estimator, method)(input_data) check_dict() if hasattr(estimator, "transform"): dict_before = estimator.__dict__.copy() # we transform only dataset of points estimator.transform(to_transform) check_dict()
def check_score(estimator, tuples, y): if hasattr(estimator, "score"): score = estimator.score(*remove_y_quadruplets(estimator, tuples, y)) assert np.isfinite(score)
def check_score(estimator, tuples, y): if hasattr(estimator, "score"): score = estimator.score(*remove_y_quadruplets(estimator, tuples, y)) assert np.isfinite(score)