def test_invalids(self): estimator, pd, pt = _set_parameters_and_estimator( 'RandomForestClassifier', self.table_chard_fp, self.md_chard_fp, 'Region', n_estimators=10, n_jobs=1, cv=1, random_state=123, parameter_tuning=False, classification=True) regressor, pd, pt = _set_parameters_and_estimator( 'RandomForestRegressor', self.table_chard_fp, self.md_chard_fp, 'Region', n_estimators=10, n_jobs=1, cv=1, random_state=123, parameter_tuning=False, classification=True) # zero samples (if mapping file and table have no common samples) with self.assertRaisesRegex(ValueError, "metadata"): estimator, cm, accuracy, importances = split_optimize_classify( self.table_ecam_fp, self.md_chard_fp, 'Region', estimator, self.temp_dir.name, test_size=0.5, cv=1, random_state=123, n_jobs=1, optimize_feature_selection=False, parameter_tuning=False, param_dist=None, calc_feature_importance=False) # too few samples to stratify with self.assertRaisesRegex(ValueError, "metadata"): estimator, cm, accuracy, importances = split_optimize_classify( self.table_chard_fp, self.md_chard_fp, 'Region', estimator, self.temp_dir.name, test_size=0.9, cv=1, random_state=123, n_jobs=1, optimize_feature_selection=False, parameter_tuning=False, param_dist=None, calc_feature_importance=False) # regressor chosen for classification problem with self.assertRaisesRegex(ValueError, "convert"): estimator, cm, accuracy, importances = split_optimize_classify( self.table_chard_fp, self.md_chard_fp, 'Region', regressor, self.temp_dir.name, test_size=0.5, cv=1, random_state=123, n_jobs=1, optimize_feature_selection=False, parameter_tuning=False, param_dist=None, calc_feature_importance=False)
def test_regressors(self): for regressor in [ 'RandomForestRegressor', 'ExtraTreesRegressor', 'GradientBoostingRegressor', 'AdaBoostRegressor', 'Lasso', 'Ridge', 'ElasticNet', 'KNeighborsRegressor', 'LinearSVR', 'SVR' ]: tmpd = join(self.temp_dir.name, regressor) mkdir(tmpd) estimator, pad, pt = _set_parameters_and_estimator( regressor, self.table_ecam_fp, self.md_ecam_fp, 'month', n_estimators=10, n_jobs=1, cv=1, random_state=123, parameter_tuning=False, classification=False) estimator, cm, accuracy, importances = split_optimize_classify( self.table_ecam_fp, self.md_ecam_fp, 'month', estimator, tmpd, test_size=0.5, cv=1, random_state=123, n_jobs=1, optimize_feature_selection=False, parameter_tuning=False, param_dist=None, classification=False, calc_feature_importance=False, scoring=mean_squared_error) self.assertAlmostEqual( accuracy, seeded_results[regressor], places=4, msg='Accuracy of %s regressor was %f, but expected %f' % (regressor, accuracy, seeded_results[regressor]))
def test_classifiers(self): for classifier in ['RandomForestClassifier', 'ExtraTreesClassifier', 'GradientBoostingClassifier', 'AdaBoostClassifier', 'LinearSVC', 'SVC', 'KNeighborsClassifier']: tmpd = join(self.temp_dir.name, classifier) mkdir(tmpd) estimator, pd, pt = _set_parameters_and_estimator( classifier, self.table_chard_fp, self.md_chard_fp, 'Region', n_estimators=10, n_jobs=1, cv=1, random_state=123, parameter_tuning=False, classification=True) estimator, cm, accuracy, importances = split_optimize_classify( self.table_chard_fp, self.md_chard_fp, 'Region', estimator, tmpd, test_size=0.5, cv=1, random_state=123, n_jobs=1, optimize_feature_selection=False, parameter_tuning=False, param_dist=None, calc_feature_importance=False) self.assertAlmostEqual(accuracy, seeded_results[classifier]) self.assertAlmostEqual( accuracy, seeded_results[classifier], places=4, msg='Accuracy of %s classifier was %f, but expected %f' % ( classifier, accuracy, seeded_results[classifier]))