def test_invalids(self):
     estimator, pd, pt = _set_parameters_and_estimator(
         'RandomForestClassifier', self.table_chard_fp, self.md_chard_fp,
         'Region', n_estimators=10, n_jobs=1, cv=1,
         random_state=123, parameter_tuning=False, classification=True)
     regressor, pd, pt = _set_parameters_and_estimator(
         'RandomForestRegressor', self.table_chard_fp, self.md_chard_fp,
         'Region', n_estimators=10, n_jobs=1, cv=1,
         random_state=123, parameter_tuning=False, classification=True)
     # zero samples (if mapping file and table have no common samples)
     with self.assertRaisesRegex(ValueError, "metadata"):
         estimator, cm, accuracy, importances = split_optimize_classify(
             self.table_ecam_fp, self.md_chard_fp, 'Region', estimator,
             self.temp_dir.name, test_size=0.5, cv=1, random_state=123,
             n_jobs=1, optimize_feature_selection=False,
             parameter_tuning=False, param_dist=None,
             calc_feature_importance=False)
     # too few samples to stratify
     with self.assertRaisesRegex(ValueError, "metadata"):
         estimator, cm, accuracy, importances = split_optimize_classify(
             self.table_chard_fp, self.md_chard_fp, 'Region', estimator,
             self.temp_dir.name, test_size=0.9, cv=1, random_state=123,
             n_jobs=1, optimize_feature_selection=False,
             parameter_tuning=False, param_dist=None,
             calc_feature_importance=False)
     # regressor chosen for classification problem
     with self.assertRaisesRegex(ValueError, "convert"):
         estimator, cm, accuracy, importances = split_optimize_classify(
             self.table_chard_fp, self.md_chard_fp, 'Region', regressor,
             self.temp_dir.name, test_size=0.5, cv=1, random_state=123,
             n_jobs=1, optimize_feature_selection=False,
             parameter_tuning=False, param_dist=None,
             calc_feature_importance=False)
Ejemplo n.º 2
0
 def test_regressors(self):
     for regressor in [
             'RandomForestRegressor', 'ExtraTreesRegressor',
             'GradientBoostingRegressor', 'AdaBoostRegressor', 'Lasso',
             'Ridge', 'ElasticNet', 'KNeighborsRegressor', 'LinearSVR',
             'SVR'
     ]:
         tmpd = join(self.temp_dir.name, regressor)
         mkdir(tmpd)
         estimator, pad, pt = _set_parameters_and_estimator(
             regressor,
             self.table_ecam_fp,
             self.md_ecam_fp,
             'month',
             n_estimators=10,
             n_jobs=1,
             cv=1,
             random_state=123,
             parameter_tuning=False,
             classification=False)
         estimator, cm, accuracy, importances = split_optimize_classify(
             self.table_ecam_fp,
             self.md_ecam_fp,
             'month',
             estimator,
             tmpd,
             test_size=0.5,
             cv=1,
             random_state=123,
             n_jobs=1,
             optimize_feature_selection=False,
             parameter_tuning=False,
             param_dist=None,
             classification=False,
             calc_feature_importance=False,
             scoring=mean_squared_error)
         self.assertAlmostEqual(
             accuracy,
             seeded_results[regressor],
             places=4,
             msg='Accuracy of %s regressor was %f, but expected %f' %
             (regressor, accuracy, seeded_results[regressor]))
 def test_classifiers(self):
     for classifier in ['RandomForestClassifier', 'ExtraTreesClassifier',
                        'GradientBoostingClassifier', 'AdaBoostClassifier',
                        'LinearSVC', 'SVC', 'KNeighborsClassifier']:
         tmpd = join(self.temp_dir.name, classifier)
         mkdir(tmpd)
         estimator, pd, pt = _set_parameters_and_estimator(
             classifier, self.table_chard_fp, self.md_chard_fp, 'Region',
             n_estimators=10, n_jobs=1, cv=1,
             random_state=123, parameter_tuning=False, classification=True)
         estimator, cm, accuracy, importances = split_optimize_classify(
             self.table_chard_fp, self.md_chard_fp, 'Region', estimator,
             tmpd, test_size=0.5, cv=1, random_state=123,
             n_jobs=1, optimize_feature_selection=False,
             parameter_tuning=False, param_dist=None,
             calc_feature_importance=False)
         self.assertAlmostEqual(accuracy, seeded_results[classifier])
         self.assertAlmostEqual(
             accuracy, seeded_results[classifier], places=4,
             msg='Accuracy of %s classifier was %f, but expected %f' % (
                 classifier, accuracy, seeded_results[classifier]))