def test_sample_weight(): """Check sample_weight param works""" # Check constant sample_weight has no effect sample_weight = np.ones(boston.target.shape[0]) est1 = SymbolicRegressor(generations=2, random_state=0) est1.fit(boston.data, boston.target) est2 = SymbolicRegressor(generations=2, random_state=0) est2.fit(boston.data, boston.target, sample_weight=sample_weight) # And again with a scaled sample_weight est3 = SymbolicRegressor(generations=2, random_state=0) est3.fit(boston.data, boston.target, sample_weight=sample_weight * 1.1) assert_almost_equal(est1._program.fitness_, est2._program.fitness_) assert_almost_equal(est1._program.fitness_, est3._program.fitness_) # And again for the transformer sample_weight = np.ones(boston.target.shape[0]) est1 = SymbolicTransformer(generations=2, random_state=0) est1 = est1.fit_transform(boston.data, boston.target) est2 = SymbolicTransformer(generations=2, random_state=0) est2 = est2.fit_transform(boston.data, boston.target, sample_weight=sample_weight) # And again with a scaled sample_weight est3 = SymbolicTransformer(generations=2, random_state=0) est3 = est3.fit_transform(boston.data, boston.target, sample_weight=sample_weight * 1.1) assert_array_almost_equal(est1, est2) assert_array_almost_equal(est1, est3)
def check_transformer_pickle(name, Transformer): X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]], random_state=0, n_features=2, cluster_std=0.1) n_samples, n_features = X.shape X = StandardScaler().fit_transform(X) X -= X.min() # catch deprecation warnings with warnings.catch_warnings(record=True): transformer = Transformer() if not hasattr(transformer, 'transform'): return set_random_state(transformer) set_fast_parameters(transformer) # fit if name in CROSS_DECOMPOSITION: random_state = np.random.RandomState(seed=12345) y_ = np.vstack([y, 2 * y + random_state.randint(2, size=len(y))]) y_ = y_.T else: y_ = y transformer.fit(X, y_) X_pred = transformer.fit(X, y_).transform(X) pickled_transformer = pickle.dumps(transformer) unpickled_transformer = pickle.loads(pickled_transformer) pickled_X_pred = unpickled_transformer.transform(X) assert_array_almost_equal(pickled_X_pred, X_pred)
def check_regressors_int(name, Regressor): X, _ = _boston_subset() X = X[:50] rnd = np.random.RandomState(0) y = rnd.randint(3, size=X.shape[0]) y = multioutput_estimator_convert_y_2d(name, y) rnd = np.random.RandomState(0) # catch deprecation warnings with warnings.catch_warnings(record=True): # separate estimators to control random seeds regressor_1 = Regressor() regressor_2 = Regressor() set_fast_parameters(regressor_1) set_fast_parameters(regressor_2) set_random_state(regressor_1) set_random_state(regressor_2) if name in CROSS_DECOMPOSITION: y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))]) y_ = y_.T else: y_ = y # fit regressor_1.fit(X, y_) pred1 = regressor_1.predict(X) regressor_2.fit(X, y_.astype(np.float)) pred2 = regressor_2.predict(X) assert_array_almost_equal(pred1, pred2, 2, name)
def test_all_metrics(): """Check all supported metrics work""" params = {'function_set': ['add2', 'sub2', 'mul2', 'div2'], 'arities': {2: ['add2', 'sub2', 'mul2', 'div2']}, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1} random_state = check_random_state(415) # Test for a small program test_gp = ['mul2', 'div2', 8, 1, 'sub2', 9, .5] gp = _Program(random_state=random_state, program=test_gp, **params) X = np.reshape(random_state.uniform(size=50), (5, 10)) y = random_state.uniform(size=5) sample_weight = np.ones(5) expected = [1.48719809776, 1.82389179833, 1.76013763179, 0.98663772258, -0.2928200724, -0.5] result = [] for m in ['mean absolute error', 'mse', 'rmse', 'rmsle', 'pearson', 'spearman']: gp.metric = m gp.raw_fitness_ = gp.raw_fitness(X, y, sample_weight) result.append(gp.fitness()) assert_array_almost_equal(result, expected) # And check a fake one gp.metric = 'the larch' assert_raises(ValueError, gp.raw_fitness, X, y, sample_weight)
def check_regressors_pickle(name, Regressor): X, y = _boston_subset() y = StandardScaler().fit_transform(y) # X is already scaled y = multioutput_estimator_convert_y_2d(name, y) rnd = np.random.RandomState(0) # catch deprecation warnings with warnings.catch_warnings(record=True): regressor = Regressor() set_fast_parameters(regressor) if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'): # linear regressors need to set alpha, but not generalized CV ones regressor.alpha = 0.01 if name in CROSS_DECOMPOSITION: y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))]) y_ = y_.T else: y_ = y regressor.fit(X, y_) y_pred = regressor.predict(X) # store old predictions pickled_regressor = pickle.dumps(regressor) unpickled_regressor = pickle.loads(pickled_regressor) pickled_y_pred = unpickled_regressor.predict(X) assert_array_almost_equal(pickled_y_pred, y_pred)
def check_class_weight_auto_linear_classifier(name, Classifier): """Test class weights with non-contiguous class labels.""" X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y = [1, 1, 1, -1, -1] with warnings.catch_warnings(record=True): classifier = Classifier() if hasattr(classifier, "n_iter"): # This is a very small dataset, default n_iter are likely to prevent # convergence classifier.set_params(n_iter=1000) set_random_state(classifier) # Let the model compute the class frequencies classifier.set_params(class_weight='auto') coef_auto = classifier.fit(X, y).coef_.copy() # Count each label occurrence to reweight manually mean_weight = (1. / 3 + 1. / 2) / 2 class_weight = { 1: 1. / 3 / mean_weight, -1: 1. / 2 / mean_weight, } classifier.set_params(class_weight=class_weight) coef_manual = classifier.fit(X, y).coef_.copy() assert_array_almost_equal(coef_auto, coef_manual)
def test_sample_weight(): """Check sample_weight param works""" # Check constant sample_weight has no effect sample_weight = np.ones(boston.target.shape[0]) est1 = SymbolicRegressor(generations=2, random_state=0) est1.fit(boston.data, boston.target) est2 = SymbolicRegressor(generations=2, random_state=0) est2.fit(boston.data, boston.target, sample_weight=sample_weight) # And again with a scaled sample_weight est3 = SymbolicRegressor(generations=2, random_state=0) est3.fit(boston.data, boston.target, sample_weight=sample_weight * 1.1) assert_almost_equal(est1._program.fitness_, est2._program.fitness_) assert_almost_equal(est1._program.fitness_, est3._program.fitness_) # And again for the transformer sample_weight = np.ones(boston.target.shape[0]) est1 = SymbolicTransformer(generations=2, random_state=0) est1 = est1.fit_transform(boston.data, boston.target) est2 = SymbolicTransformer(generations=2, random_state=0) est2 = est2.fit_transform(boston.data, boston.target, sample_weight=sample_weight) assert_array_almost_equal(est1, est2)
def check_pipeline_consistency(name, Estimator): if name in ('CCA', 'LocallyLinearEmbedding', 'KernelPCA') and _is_32bit(): # Those transformers yield non-deterministic output when executed on # a 32bit Python. The same transformers are stable on 64bit Python. # FIXME: try to isolate a minimalistic reproduction case only depending # scipy and/or maybe generate a test dataset that does not # cause such unstable behaviors. msg = name + ' is non deterministic on 32bit Python' raise SkipTest(msg) # check that make_pipeline(est) gives same score as est X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]], random_state=0, n_features=2, cluster_std=0.1) X -= X.min() y = multioutput_estimator_convert_y_2d(name, y) estimator = Estimator() set_fast_parameters(estimator) set_random_state(estimator) pipeline = make_pipeline(estimator) estimator.fit(X, y) pipeline.fit(X, y) funcs = ["score", "fit_transform"] for func_name in funcs: func = getattr(estimator, func_name, None) if func is not None: func_pipeline = getattr(pipeline, func_name) result = func(X, y) result_pipe = func_pipeline(X, y) assert_array_almost_equal(result, result_pipe)
def test_execute(): """Check executing the program works""" params = { 'function_set': [add2, sub2, mul2, div2], 'arities': { 2: [add2, sub2, mul2, div2] }, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1 } random_state = check_random_state(415) # Test for a small program test_gp = [mul2, div2, 8, 1, sub2, 9, .5] X = np.reshape(random_state.uniform(size=50), (5, 10)) gp = _Program(random_state=random_state, program=test_gp, **params) result = gp.execute(X) expected = [-0.19656208, 0.78197782, -1.70123845, -0.60175969, -0.01082618] assert_array_almost_equal(result, expected)
def test_compute_class_weight_auto_unordered(): # Test compute_class_weight when classes are unordered classes = np.array([1, 0, 3]) y = np.asarray([1, 0, 0, 3, 3, 3]) cw = compute_class_weight("auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([1.636, 0.818, 0.545]), decimal=3)
def test_parallel_train(): """Check predictions are the same for different n_jobs""" # Check the regressor ests = [ SymbolicRegressor(population_size=100, generations=4, n_jobs=n_jobs, random_state=0).fit(boston.data[:100, :], boston.target[:100]) for n_jobs in [1, 2, 3, 8, 16] ] preds = [e.predict(boston.data[500:, :]) for e in ests] for pred1, pred2 in zip(preds, preds[1:]): assert_array_almost_equal(pred1, pred2) lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests]) for len1, len2 in zip(lengths, lengths[1:]): assert_array_almost_equal(len1, len2) # Check the transformer ests = [ SymbolicTransformer(population_size=100, hall_of_fame=50, generations=4, n_jobs=n_jobs, random_state=0).fit(boston.data[:100, :], boston.target[:100]) for n_jobs in [1, 2, 3, 8, 16] ] preds = [e.transform(boston.data[500:, :]) for e in ests] for pred1, pred2 in zip(preds, preds[1:]): assert_array_almost_equal(pred1, pred2) lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests]) for len1, len2 in zip(lengths, lengths[1:]): assert_array_almost_equal(len1, len2)
def test_compute_class_weight_auto_negative(): # Test compute_class_weight when labels are negative # Test with balanced class labels. classes = np.array([-2, -1, 0]) y = np.asarray([-1, -1, 0, 0, -2, -2]) cw = compute_class_weight("auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([1., 1., 1.])) # Test with unbalanced class labels. y = np.asarray([-1, 0, 0, -2, -2, -2]) cw = compute_class_weight("auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([0.545, 1.636, 0.818]), decimal=3)
def check_classifiers_pickle(name, Classifier): X, y = make_blobs(random_state=0) X, y = shuffle(X, y, random_state=7) X -= X.min() # catch deprecation warnings with warnings.catch_warnings(record=True): classifier = Classifier() set_fast_parameters(classifier) # raises error on malformed input for fit assert_raises(ValueError, classifier.fit, X, y[:-1]) # fit classifier.fit(X, y) y_pred = classifier.predict(X) pickled_classifier = pickle.dumps(classifier) unpickled_classifier = pickle.loads(pickled_classifier) pickled_y_pred = unpickled_classifier.predict(X) assert_array_almost_equal(pickled_y_pred, y_pred)
def test_execute(): """Check executing the program works""" params = {'function_set': ['add2', 'sub2', 'mul2', 'div2'], 'arities': {2: ['add2', 'sub2', 'mul2', 'div2']}, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1} random_state = check_random_state(415) # Test for a small program test_gp = ['mul2', 'div2', 8, 1, 'sub2', 9, .5] X = np.reshape(random_state.uniform(size=50), (5, 10)) gp = _Program(random_state=random_state, program=test_gp, **params) result = gp.execute(X) expected = [-0.19656208, 0.78197782, -1.70123845, -0.60175969, -0.01082618] assert_array_almost_equal(result, expected)
def test_all_metrics(): """Check all supported metrics work""" params = { 'function_set': [add2, sub2, mul2, div2], 'arities': { 2: [add2, sub2, mul2, div2] }, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1 } random_state = check_random_state(415) # Test for a small program test_gp = [mul2, div2, 8, 1, sub2, 9, .5] gp = _Program(random_state=random_state, program=test_gp, **params) X = np.reshape(random_state.uniform(size=50), (5, 10)) y = random_state.uniform(size=5) sample_weight = np.ones(5) expected = [ 1.48719809776, 1.82389179833, 1.76013763179, 0.98663772258, -0.2928200724, -0.5 ] result = [] for m in [ 'mean absolute error', 'mse', 'rmse', 'rmsle', 'pearson', 'spearman' ]: gp.metric = m gp.raw_fitness_ = gp.raw_fitness(X, y, sample_weight) result.append(gp.fitness()) assert_array_almost_equal(result, expected) # And check a fake one gp.metric = 'the larch' assert_raises(ValueError, gp.raw_fitness, X, y, sample_weight)
def check_estimators_data_not_an_array(name, Estimator, X, y): if name in CROSS_DECOMPOSITION: raise SkipTest # catch deprecation warnings with warnings.catch_warnings(record=True): # separate estimators to control random seeds estimator_1 = Estimator() estimator_2 = Estimator() set_fast_parameters(estimator_1) set_fast_parameters(estimator_2) set_random_state(estimator_1) set_random_state(estimator_2) y_ = NotAnArray(np.asarray(y)) X_ = NotAnArray(np.asarray(X)) # fit estimator_1.fit(X_, y_) pred1 = estimator_1.predict(X_) estimator_2.fit(X, y) pred2 = estimator_2.predict(X) assert_array_almost_equal(pred1, pred2, 2, name)
def test_pickle(): """Check pickability""" # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) score = est.score(boston.data[500:, :], boston.target[500:]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) score2 = est2.score(boston.data[500:, :], boston.target[500:]) assert_equal(score, score2) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) X_new = est.transform(boston.data[500:, :]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) X_new2 = est2.transform(boston.data[500:, :]) assert_array_almost_equal(X_new, X_new2)
def test_pickle(): """Check pickability""" # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) score = est.score(boston.data[500:, :], boston.target[500:]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) score2 = est2.score(boston.data[500:, :], boston.target[500:]) assert_equal(score, score2) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) X_new = est.transform(boston.data[500:, :]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) X_new2 = est2.transform(boston.data[500:, :]) assert_array_almost_equal(X_new, X_new2)
def test_parallel_train(): """Check predictions are the same for different n_jobs""" # Check the regressor ests = [ SymbolicRegressor(population_size=100, generations=4, n_jobs=n_jobs, random_state=0).fit(boston.data[:100, :], boston.target[:100]) for n_jobs in [1, 2, 3, 8, 16] ] preds = [e.predict(boston.data[500:, :]) for e in ests] for pred1, pred2 in zip(preds, preds[1:]): assert_array_almost_equal(pred1, pred2) lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests]) for len1, len2 in zip(lengths, lengths[1:]): assert_array_almost_equal(len1, len2) # Check the transformer ests = [ SymbolicTransformer(population_size=100, hall_of_fame=50, generations=4, n_jobs=n_jobs, random_state=0).fit(boston.data[:100, :], boston.target[:100]) for n_jobs in [1, 2, 3, 8, 16] ] preds = [e.transform(boston.data[500:, :]) for e in ests] for pred1, pred2 in zip(preds, preds[1:]): assert_array_almost_equal(pred1, pred2) lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests]) for len1, len2 in zip(lengths, lengths[1:]): assert_array_almost_equal(len1, len2)
def check_classifiers_train(name, Classifier): X_m, y_m = make_blobs(random_state=0) X_m, y_m = shuffle(X_m, y_m, random_state=7) X_m = StandardScaler().fit_transform(X_m) # generate binary problem from multi-class one y_b = y_m[y_m != 2] X_b = X_m[y_m != 2] for (X, y) in [(X_m, y_m), (X_b, y_b)]: # catch deprecation warnings classes = np.unique(y) n_classes = len(classes) n_samples, n_features = X.shape with warnings.catch_warnings(record=True): classifier = Classifier() if name in ['BernoulliNB', 'MultinomialNB']: X -= X.min() set_fast_parameters(classifier) set_random_state(classifier) # raises error on malformed input for fit assert_raises(ValueError, classifier.fit, X, y[:-1]) # fit classifier.fit(X, y) # with lists classifier.fit(X.tolist(), y.tolist()) assert_true(hasattr(classifier, "classes_")) y_pred = classifier.predict(X) assert_equal(y_pred.shape, (n_samples,)) # training set performance if name not in ['BernoulliNB', 'MultinomialNB']: assert_greater(accuracy_score(y, y_pred), 0.83) # raises error on malformed input for predict assert_raises(ValueError, classifier.predict, X.T) if hasattr(classifier, "decision_function"): try: # decision_function agrees with predict decision = classifier.decision_function(X) if n_classes is 2: assert_equal(decision.shape, (n_samples,)) dec_pred = (decision.ravel() > 0).astype(np.int) assert_array_equal(dec_pred, y_pred) if n_classes is 3: assert_equal(decision.shape, (n_samples, n_classes)) assert_array_equal(np.argmax(decision, axis=1), y_pred) # raises error on malformed input assert_raises(ValueError, classifier.decision_function, X.T) # raises error on malformed input for decision_function assert_raises(ValueError, classifier.decision_function, X.T) except NotImplementedError: pass if hasattr(classifier, "predict_proba"): # predict_proba agrees with predict y_prob = classifier.predict_proba(X) assert_equal(y_prob.shape, (n_samples, n_classes)) assert_array_equal(np.argmax(y_prob, axis=1), y_pred) # check that probas for all classes sum to one assert_array_almost_equal(np.sum(y_prob, axis=1), np.ones(n_samples)) # raises error on malformed input assert_raises(ValueError, classifier.predict_proba, X.T) # raises error on malformed input for predict_proba assert_raises(ValueError, classifier.predict_proba, X.T)
def _check_transformer(name, Transformer, X, y): if name in ('CCA', 'LocallyLinearEmbedding', 'KernelPCA') and _is_32bit(): # Those transformers yield non-deterministic output when executed on # a 32bit Python. The same transformers are stable on 64bit Python. # FIXME: try to isolate a minimalistic reproduction case only depending # on numpy & scipy and/or maybe generate a test dataset that does not # cause such unstable behaviors. msg = name + ' is non deterministic on 32bit Python' raise SkipTest(msg) n_samples, n_features = np.asarray(X).shape # catch deprecation warnings with warnings.catch_warnings(record=True): transformer = Transformer() set_random_state(transformer) set_fast_parameters(transformer) # fit if name in CROSS_DECOMPOSITION: y_ = np.c_[y, y] y_[::2, 1] *= 2 else: y_ = y transformer.fit(X, y_) X_pred = transformer.fit_transform(X, y=y_) if isinstance(X_pred, tuple): for x_pred in X_pred: assert_equal(x_pred.shape[0], n_samples) else: assert_equal(X_pred.shape[0], n_samples) if hasattr(transformer, 'transform'): if name in CROSS_DECOMPOSITION: X_pred2 = transformer.transform(X, y_) X_pred3 = transformer.fit_transform(X, y=y_) else: X_pred2 = transformer.transform(X) X_pred3 = transformer.fit_transform(X, y=y_) if isinstance(X_pred, tuple) and isinstance(X_pred2, tuple): for x_pred, x_pred2, x_pred3 in zip(X_pred, X_pred2, X_pred3): assert_array_almost_equal( x_pred, x_pred2, 2, "fit_transform and transform outcomes not consistent in %s" % Transformer) assert_array_almost_equal( x_pred, x_pred3, 2, "consecutive fit_transform outcomes not consistent in %s" % Transformer) else: assert_array_almost_equal( X_pred, X_pred2, 2, "fit_transform and transform outcomes not consistent in %s" % Transformer) assert_array_almost_equal( X_pred, X_pred3, 2, "consecutive fit_transform outcomes not consistent in %s" % Transformer) # raises error on malformed input for transform if hasattr(X, 'T'): # If it's not an array, it does not have a 'T' property assert_raises(ValueError, transformer.transform, X.T)
def test_compute_sample_weight_with_subsample(): # Test compute_sample_weight with subsamples specified. # Test with balanced classes and all samples present y = np.asarray([1, 1, 1, 2, 2, 2]) sample_weight = compute_sample_weight("auto", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) # Test with column vector of balanced classes and all samples present y = np.asarray([[1], [1], [1], [2], [2], [2]]) sample_weight = compute_sample_weight("auto", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) # Test with a subsample y = np.asarray([1, 1, 1, 2, 2, 2]) sample_weight = compute_sample_weight("auto", y, range(4)) assert_array_almost_equal(sample_weight, [.5, .5, .5, 1.5, 1.5, 1.5]) # Test with a bootstrap subsample y = np.asarray([1, 1, 1, 2, 2, 2]) sample_weight = compute_sample_weight("auto", y, [0, 1, 1, 2, 2, 3]) expected = np.asarray([1/3., 1/3., 1/3., 5/3., 5/3., 5/3.]) assert_array_almost_equal(sample_weight, expected) # Test with a bootstrap subsample for multi-output y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]]) sample_weight = compute_sample_weight("auto", y, [0, 1, 1, 2, 2, 3]) assert_array_almost_equal(sample_weight, expected ** 2) # Test with a missing class y = np.asarray([1, 1, 1, 2, 2, 2, 3]) sample_weight = compute_sample_weight("auto", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.]) # Test with a missing class for multi-output y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [2, 2]]) sample_weight = compute_sample_weight("auto", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.])
def test_compute_sample_weight(): # Test (and demo) compute_sample_weight. # Test with balanced classes y = np.asarray([1, 1, 1, 2, 2, 2]) sample_weight = compute_sample_weight("auto", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) # Test with user-defined weights sample_weight = compute_sample_weight({1: 2, 2: 1}, y) assert_array_almost_equal(sample_weight, [2., 2., 2., 1., 1., 1.]) # Test with column vector of balanced classes y = np.asarray([[1], [1], [1], [2], [2], [2]]) sample_weight = compute_sample_weight("auto", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) # Test with unbalanced classes y = np.asarray([1, 1, 1, 2, 2, 2, 3]) sample_weight = compute_sample_weight("auto", y) expected = np.asarray([.6, .6, .6, .6, .6, .6, 1.8]) assert_array_almost_equal(sample_weight, expected) # Test with `None` weights sample_weight = compute_sample_weight(None, y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 1.]) # Test with multi-output of balanced classes y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]]) sample_weight = compute_sample_weight("auto", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) # Test with multi-output with user-defined weights y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]]) sample_weight = compute_sample_weight([{1: 2, 2: 1}, {0: 1, 1: 2}], y) assert_array_almost_equal(sample_weight, [2., 2., 2., 2., 2., 2.]) # Test with multi-output of unbalanced classes y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [3, -1]]) sample_weight = compute_sample_weight("auto", y) assert_array_almost_equal(sample_weight, expected ** 2)