def test_less_than_three_columns_raises_error_with_correct_message(self):
     try:
         calculate_random_forest_mtry_hyperparameter(2, 'classification')
         # Fail the test if the above call doesn't throw an error
         self.fail()
     except HealthcareAIError as e:
         self.assertEqual(e.message, 'You need more than two columns to tune hyperparameters.')
 def test_non_integer_columns_raises_error(self):
     try:
         calculate_random_forest_mtry_hyperparameter('regression_metrics', 'classification')
         # Fail the test if the above call doesn't throw an error
         self.fail()
     except HealthcareAIError as e:
         self.assertEqual(e.message, 'The number_of_columns must be an integer')
 def test_bad_model_type_raises_error_with_correct_message(self):
     try:
         calculate_random_forest_mtry_hyperparameter(3, 'regression_metrics')
         # Fail the test if the above call doesn't throw an error
         self.fail()
     except HealthcareAIError as e:
         self.assertEqual(e.message, 'Please specify model type of \'regression\' or \'classification\'')
Example #4
0
 def test_bad_model_type_raises_error_with_correct_message(self):
     try:
         calculate_random_forest_mtry_hyperparameter(3, 'regression_metrics')
         # Fail the test if the above call doesn't throw an error
         self.fail()
     except HealthcareAIError as e:
         self.assertEqual(e.message, 'Please specify model type of \'regression\' or \'classification\'')
Example #5
0
 def test_non_integer_columns_raises_error(self):
     try:
         calculate_random_forest_mtry_hyperparameter('regression_metrics', 'classification')
         # Fail the test if the above call doesn't throw an error
         self.fail()
     except HealthcareAIError as e:
         self.assertEqual(e.message, 'The number_of_columns must be an integer')
Example #6
0
 def test_negative_columns_raises_error_with_correct_message(self):
     try:
         calculate_random_forest_mtry_hyperparameter(-10, 'classification')
         # Fail the test if the above call doesn't throw an error
         self.fail()
     except HealthcareAIError as e:
         self.assertEqual(e.message, 'You need more than two columns to tune hyperparameters.')
Example #7
0
    def random_forest_regressor(self,
                                trees=200,
                                scoring_metric='neg_mean_squared_error',
                                hyperparameter_grid=None,
                                randomized_search=True,
                                number_iteration_samples=5):
        """
        A light wrapper for Sklearn's random forest regressor that performs 
        randomized search over an overridable default hyperparameter grid.
        
        Args:
            trees (int): number of trees to use if not performing a randomized 

            grid search scoring_metric (str): Any sklearn scoring metric 
            appropriate for regression

            hyperparameter_grid (dict): hyperparameters by name
            randomized_search (bool): True for randomized search (default)

            number_iteration_samples (int): Number of models to train during the 
            randomized search for exploring the hyperparameter space. More may 
            lead to a better model, but will take longer.

        Returns:
            TrainedSupervisedModel: 
        """
        self.validate_regression('Random Forest Regressor')
        if hyperparameter_grid is None:
            max_features = hcai_helpers.calculate_random_forest_mtry_hyperparameter(
                len(self.X_test.columns), self.model_type)
            hyperparameter_grid = {
                'n_estimators': [10, 50, 200],
                'max_features': max_features
            }
            number_iteration_samples = 5

        algorithm = get_algorithm(
            RandomForestRegressor,
            scoring_metric,
            hyperparameter_grid,
            randomized_search,
            number_iteration_samples=number_iteration_samples,
            n_estimators=trees)

        trained_supervised_model = self._create_trained_supervised_model(
            algorithm)

        return trained_supervised_model
    def random_forest_regressor(self,
                                trees=200,
                                scoring_metric='neg_mean_squared_error',
                                hyperparameter_grid=None,
                                randomized_search=True,
                                number_iteration_samples=5):
        """
        A light wrapper for Sklearn's random forest regressor that performs 
        randomized search over an overridable default hyperparameter grid.
        
        Args:
            trees (int): number of trees to use if not performing a randomized 

            grid search scoring_metric (str): Any sklearn scoring metric 
            appropriate for regression

            hyperparameter_grid (dict): hyperparameters by name
            randomized_search (bool): True for randomized search (default)

            number_iteration_samples (int): Number of models to train during the 
            randomized search for exploring the hyperparameter space. More may 
            lead to a better model, but will take longer.

        Returns:
            TrainedSupervisedModel: 
        """
        self.validate_regression('Random Forest Regressor')
        if hyperparameter_grid is None:
            max_features = hcai_helpers.calculate_random_forest_mtry_hyperparameter(len(self.X_test.columns),
                                                                                    self.model_type)
            hyperparameter_grid = {'n_estimators': [10, 50, 200], 'max_features': max_features}
            number_iteration_samples = 5

        algorithm = get_algorithm(RandomForestRegressor,
                                  scoring_metric,
                                  hyperparameter_grid,
                                  randomized_search,
                                  number_iteration_samples=number_iteration_samples,
                                  n_estimators=trees)

        trained_supervised_model = self._create_trained_supervised_model(algorithm)

        return trained_supervised_model
 def test_one_hundred_columns_regression(self):
     result = calculate_random_forest_mtry_hyperparameter(100, 'regression')
     self.assertEqual(result, [32, 33, 34])
 def test_one_hundred_columns_classification(self):
     result = calculate_random_forest_mtry_hyperparameter(
         100, 'classification')
     self.assertEqual(result, [9, 10, 11])
 def test_ten_columns_regression(self):
     result = calculate_random_forest_mtry_hyperparameter(10, 'regression')
     self.assertEqual(result, [2, 3, 4])
 def test_three_columns_regression(self):
     result = calculate_random_forest_mtry_hyperparameter(3, 'regression')
     self.assertEqual(result, [1, 2, 3])
 def test_one_hundred_columns_regression(self):
     result = calculate_random_forest_mtry_hyperparameter(100, 'regression')
     self.assertEqual(result, [32, 33, 34])
 def test_one_hundred_columns_classification(self):
     result = calculate_random_forest_mtry_hyperparameter(100, 'classification')
     self.assertEqual(result, [9, 10, 11])
 def test_ten_columns_regression(self):
     result = calculate_random_forest_mtry_hyperparameter(10, 'regression')
     self.assertEqual(result, [2, 3, 4])
 def test_three_columns_regression(self):
     result = calculate_random_forest_mtry_hyperparameter(3, 'regression')
     self.assertEqual(result, [1, 2, 3])