def test_default_configuration_iterative_fit(self): classifier = SimpleClassificationPipeline( include={'classifier': ['random_forest'], 'feature_preprocessor': ['no_preprocessing']}) X_train, Y_train, X_test, Y_test = get_dataset(dataset='iris') classifier.fit_transformer(X_train, Y_train) for i in range(1, 11): classifier.iterative_fit(X_train, Y_train) self.assertEqual(classifier.steps[-1][-1].choice.estimator.n_estimators, i)
def test_default_configuration_iterative_fit(self): classifier = SimpleClassificationPipeline( include={'classifier': ['random_forest'], 'preprocessor': ['no_preprocessing']}) X_train, Y_train, X_test, Y_test = get_dataset(dataset='iris') XT = classifier.fit_transformer(X_train, Y_train) for i in range(1, 11): classifier.iterative_fit(X_train, Y_train) self.assertEqual(classifier.steps[-1][-1].choice.estimator.n_estimators, i)
def test_weighting_effect(self): data = sklearn.datasets.make_classification( n_samples=200, n_features=10, n_redundant=2, n_informative=2, n_repeated=2, n_clusters_per_class=2, weights=[0.8, 0.2], random_state=1) for name, clf, acc_no_weighting, acc_weighting in \ [('adaboost', AdaboostClassifier, 0.810, 0.735), ('decision_tree', DecisionTree, 0.780, 0.643), ('extra_trees', ExtraTreesClassifier, 0.75, 0.800), ('gradient_boosting', GradientBoostingClassifier, 0.789, 0.762), ('random_forest', RandomForest, 0.75, 0.821), ('libsvm_svc', LibSVM_SVC, 0.769, 0.72), ('liblinear_svc', LibLinear_SVC, 0.762, 0.735), ('sgd', SGD, 0.704, 0.667) ]: for strategy, acc in [('none', acc_no_weighting), ('weighting', acc_weighting)]: # Fit data_ = copy.copy(data) X_train = data_[0][:100] Y_train = data_[1][:100] X_test = data_[0][100:] Y_test = data_[1][100:] include = {'classifier': [name], 'preprocessor': ['no_preprocessing']} classifier = SimpleClassificationPipeline( random_state=1, include=include) cs = classifier.get_hyperparameter_search_space() default = cs.get_default_configuration() default._values['balancing:strategy'] = strategy classifier = SimpleClassificationPipeline( default, random_state=1, include=include) predictor = classifier.fit(X_train, Y_train) predictions = predictor.predict(X_test) self.assertAlmostEqual( sklearn.metrics.f1_score(predictions, Y_test), acc, places=3, msg=(name, strategy)) # fit_transformer and fit_estimator data_ = copy.copy(data) X_train = data_[0][:100] Y_train = data_[1][:100] X_test = data_[0][100:] Y_test = data_[1][100:] classifier = SimpleClassificationPipeline( default, random_state=1, include=include) classifier.set_hyperparameters(configuration=default) Xt, fit_params = classifier.fit_transformer(X_train, Y_train) classifier.fit_estimator(Xt, Y_train, **fit_params) predictions = classifier.predict(X_test) self.assertAlmostEqual( sklearn.metrics.f1_score(predictions, Y_test), acc, places=3) for name, pre, acc_no_weighting, acc_weighting in \ [('extra_trees_preproc_for_classification', ExtraTreesPreprocessorClassification, 0.691, 0.692), ('liblinear_svc_preprocessor', LibLinear_Preprocessor, 0.692, 0.590)]: for strategy, acc in [('none', acc_no_weighting), ('weighting', acc_weighting)]: data_ = copy.copy(data) X_train = data_[0][:100] Y_train = data_[1][:100] X_test = data_[0][100:] Y_test = data_[1][100:] include = {'classifier': ['sgd'], 'preprocessor': [name]} classifier = SimpleClassificationPipeline( random_state=1, include=include) cs = classifier.get_hyperparameter_search_space() default = cs.get_default_configuration() default._values['balancing:strategy'] = strategy classifier.set_hyperparameters(default) predictor = classifier.fit(X_train, Y_train) predictions = predictor.predict(X_test) self.assertAlmostEqual( sklearn.metrics.f1_score(predictions, Y_test), acc, places=3, msg=(name, strategy)) # fit_transformer and fit_estimator data_ = copy.copy(data) X_train = data_[0][:100] Y_train = data_[1][:100] X_test = data_[0][100:] Y_test = data_[1][100:] default._values['balancing:strategy'] = strategy classifier = SimpleClassificationPipeline( default, random_state=1, include=include) Xt, fit_params = classifier.fit_transformer(X_train, Y_train) classifier.fit_estimator(Xt, Y_train, **fit_params) predictions = classifier.predict(X_test) self.assertAlmostEqual( sklearn.metrics.f1_score(predictions, Y_test), acc, places=3)
def test_weighting_effect(self): data = sklearn.datasets.make_classification( n_samples=200, n_features=10, n_redundant=2, n_informative=2, n_repeated=2, n_clusters_per_class=2, weights=[0.8, 0.2], random_state=1) for name, clf, acc_no_weighting, acc_weighting, places in \ [('adaboost', AdaboostClassifier, 0.810, 0.735, 3), ('decision_tree', DecisionTree, 0.780, 0.643, 3), ('extra_trees', ExtraTreesClassifier, 0.780, 0.8, 3), ('gradient_boosting', GradientBoostingClassifier, 0.737, 0.684, 3), ('random_forest', RandomForest, 0.780, 0.789, 3), ('libsvm_svc', LibSVM_SVC, 0.769, 0.72, 3), ('liblinear_svc', LibLinear_SVC, 0.762, 0.735, 3), ('passive_aggressive', PassiveAggressive, 0.642, 0.449, 3), ('sgd', SGD, 0.818, 0.575, 2) ]: for strategy, acc in [ ('none', acc_no_weighting), ('weighting', acc_weighting) ]: # Fit data_ = copy.copy(data) X_train = data_[0][:100] Y_train = data_[1][:100] X_test = data_[0][100:] Y_test = data_[1][100:] include = {'classifier': [name], 'preprocessor': ['no_preprocessing']} classifier = SimpleClassificationPipeline( random_state=1, include=include) cs = classifier.get_hyperparameter_search_space() default = cs.get_default_configuration() default._values['balancing:strategy'] = strategy classifier = SimpleClassificationPipeline( default, random_state=1, include=include) predictor = classifier.fit(X_train, Y_train) predictions = predictor.predict(X_test) self.assertAlmostEqual( sklearn.metrics.f1_score(predictions, Y_test), acc, places=places, msg=(name, strategy)) # fit_transformer and fit_estimator data_ = copy.copy(data) X_train = data_[0][:100] Y_train = data_[1][:100] X_test = data_[0][100:] Y_test = data_[1][100:] classifier = SimpleClassificationPipeline( default, random_state=1, include=include) classifier.set_hyperparameters(configuration=default) Xt, fit_params = classifier.fit_transformer(X_train, Y_train) classifier.fit_estimator(Xt, Y_train, **fit_params) predictions = classifier.predict(X_test) self.assertAlmostEqual( sklearn.metrics.f1_score(predictions, Y_test), acc, places=places) for name, pre, acc_no_weighting, acc_weighting in \ [('extra_trees_preproc_for_classification', ExtraTreesPreprocessorClassification, 0.810, 0.563), ('liblinear_svc_preprocessor', LibLinear_Preprocessor, 0.837, 0.567)]: for strategy, acc in [('none', acc_no_weighting), ('weighting', acc_weighting)]: data_ = copy.copy(data) X_train = data_[0][:100] Y_train = data_[1][:100] X_test = data_[0][100:] Y_test = data_[1][100:] include = {'classifier': ['sgd'], 'preprocessor': [name]} classifier = SimpleClassificationPipeline( random_state=1, include=include) cs = classifier.get_hyperparameter_search_space() default = cs.get_default_configuration() default._values['balancing:strategy'] = strategy classifier.set_hyperparameters(default) predictor = classifier.fit(X_train, Y_train) predictions = predictor.predict(X_test) self.assertAlmostEqual( sklearn.metrics.f1_score(predictions, Y_test), acc, places=3, msg=(name, strategy)) # fit_transformer and fit_estimator data_ = copy.copy(data) X_train = data_[0][:100] Y_train = data_[1][:100] X_test = data_[0][100:] Y_test = data_[1][100:] default._values['balancing:strategy'] = strategy classifier = SimpleClassificationPipeline( default, random_state=1, include=include) Xt, fit_params = classifier.fit_transformer(X_train, Y_train) classifier.fit_estimator(Xt, Y_train, **fit_params) predictions = classifier.predict(X_test) self.assertAlmostEqual( sklearn.metrics.f1_score(predictions, Y_test), acc, places=3)