def lasso_regression(self,
                         scoring_metric='neg_mean_squared_error',
                         hyperparameter_grid=None,
                         randomized_search=True,
                         number_iteration_samples=2):
        """
        A light wrapper for Sklearn's lasso regression that performs randomized search over an overridable default
        hyperparameter grid.

        Args:
            scoring_metric (str): Any sklearn scoring metric appropriate for regression
            hyperparameter_grid (dict): hyperparameters by name
            randomized_search (bool): True for randomized search (default)
            number_iteration_samples (int): Number of models to train during the randomized search for exploring the
                hyperparameter space. More may lead to a better model, but will take longer.

        Returns:
            TrainedSupervisedModel:
        """
        self.validate_regression('Lasso Regression')
        if hyperparameter_grid is None:
            hyperparameter_grid = {"fit_intercept": [True, False]}
            number_iteration_samples = 2

        algorithm = get_algorithm(
            Lasso,
            scoring_metric,
            hyperparameter_grid,
            randomized_search,
            number_iteration_samples=number_iteration_samples)

        trained_supervised_model = self._create_trained_supervised_model(
            algorithm)

        return trained_supervised_model
예제 #2
0
    def logistic_regression(self,
                            scoring_metric='roc_auc',
                            hyperparameter_grid=None,
                            randomized_search=True,
                            number_iteration_samples=10):
        """
        A light wrapper for Sklearn's logistic regression that performs randomized search over an overideable default 
        hyperparameter grid.

        Args:
            scoring_metric (str): Any sklearn scoring metric appropriate for regression
            hyperparameter_grid (dict): hyperparameters by name
            randomized_search (bool): True for randomized search (default)
            number_iteration_samples (int): Number of models to train during the randomized search for exploring the
                hyperparameter space. More may lead to a better model, but will take longer.

        Returns:
            TrainedSupervisedModel: 
        """
        self.validate_classification('Logistic Regression')
        if hyperparameter_grid is None:
            hyperparameter_grid = {'C': [0.01, 0.1, 1, 10, 100], 'class_weight': [None, 'balanced']}
            number_iteration_samples = 10

        algorithm = get_algorithm(LogisticRegression,
                                  scoring_metric,
                                  hyperparameter_grid,
                                  randomized_search,
                                  number_iteration_samples=number_iteration_samples)

        trained_supervised_model = self._create_trained_supervised_model(algorithm)

        return trained_supervised_model
예제 #3
0
    def knn(self,
            scoring_metric='roc_auc',
            hyperparameter_grid=None,
            randomized_search=True,
            number_iteration_samples=10):
        """
        A light wrapper for Sklearn's knn classifier that performs randomized search over an overridable default
        hyperparameter grid.
        
        Args:
            scoring_metric (str): Any sklearn scoring metric appropriate for classification
            hyperparameter_grid (dict): hyperparameters by name
            randomized_search (bool): True for randomized search (default)
            number_iteration_samples (int): Number of models to train during the randomized search for exploring the
                hyperparameter space. More may lead to a better model, but will take longer.

        Returns:
            TrainedSupervisedModel: 
        """
        self.validate_classification('KNN')
        if hyperparameter_grid is None:
            neighbors = list(range(5, 26))
            hyperparameter_grid = {'n_neighbors': neighbors, 'weights': ['uniform', 'distance']}
            number_iteration_samples = 10

            print('KNN Grid: {}'.format(hyperparameter_grid))
        algorithm = get_algorithm(KNeighborsClassifier,
                                  scoring_metric,
                                  hyperparameter_grid,
                                  randomized_search,
                                  number_iteration_samples=number_iteration_samples)

        trained_supervised_model = self._create_trained_supervised_model(algorithm)

        return trained_supervised_model
    def lasso_regression(self, scoring_metric='neg_mean_squared_error',
                         hyperparameter_grid=None,
                         randomized_search=True,
                         number_iteration_samples=2):
        """
        A light wrapper for Sklearn's lasso regression that performs randomized 
        search over an overridable default hyperparameter grid.

        Args:
            scoring_metric (str): Any sklearn scoring metric appropriate for regression
            hyperparameter_grid (dict): hyperparameters by name
            randomized_search (bool): True for randomized search (default)

            number_iteration_samples (int): Number of models to train during the 
            randomized search for exploring the hyperparameter space. More may lead 
            to a better model, but will take longer.

        Returns:
            TrainedSupervisedModel:
        """
        self.validate_regression('Lasso Regression')
        if hyperparameter_grid is None:
            hyperparameter_grid = {"fit_intercept": [True, False]}
            number_iteration_samples = 2

        algorithm = get_algorithm(Lasso,
                                  scoring_metric,
                                  hyperparameter_grid,
                                  randomized_search,
                                  number_iteration_samples=number_iteration_samples)

        trained_supervised_model = self._create_trained_supervised_model(algorithm)

        return trained_supervised_model
예제 #5
0
    def random_forest_regressor(self,
                                trees=200,
                                scoring_metric='neg_mean_squared_error',
                                hyperparameter_grid=None,
                                randomized_search=True,
                                number_iteration_samples=5):
        """
        A light wrapper for Sklearn's random forest regressor that performs 
        randomized search over an overridable default hyperparameter grid.
        
        Args:
            trees (int): number of trees to use if not performing a randomized 

            grid search scoring_metric (str): Any sklearn scoring metric 
            appropriate for regression

            hyperparameter_grid (dict): hyperparameters by name
            randomized_search (bool): True for randomized search (default)

            number_iteration_samples (int): Number of models to train during the 
            randomized search for exploring the hyperparameter space. More may 
            lead to a better model, but will take longer.

        Returns:
            TrainedSupervisedModel: 
        """
        self.validate_regression('Random Forest Regressor')
        if hyperparameter_grid is None:
            max_features = hcai_helpers.calculate_random_forest_mtry_hyperparameter(
                len(self.X_test.columns), self.model_type)
            hyperparameter_grid = {
                'n_estimators': [10, 50, 200],
                'max_features': max_features
            }
            number_iteration_samples = 5

        algorithm = get_algorithm(
            RandomForestRegressor,
            scoring_metric,
            hyperparameter_grid,
            randomized_search,
            number_iteration_samples=number_iteration_samples,
            n_estimators=trees)

        trained_supervised_model = self._create_trained_supervised_model(
            algorithm)

        return trained_supervised_model
    def random_forest_regressor(self,
                                trees=200,
                                scoring_metric='neg_mean_squared_error',
                                hyperparameter_grid=None,
                                randomized_search=True,
                                number_iteration_samples=5):
        """
        A light wrapper for Sklearn's random forest regressor that performs 
        randomized search over an overridable default hyperparameter grid.
        
        Args:
            trees (int): number of trees to use if not performing a randomized 

            grid search scoring_metric (str): Any sklearn scoring metric 
            appropriate for regression

            hyperparameter_grid (dict): hyperparameters by name
            randomized_search (bool): True for randomized search (default)

            number_iteration_samples (int): Number of models to train during the 
            randomized search for exploring the hyperparameter space. More may 
            lead to a better model, but will take longer.

        Returns:
            TrainedSupervisedModel: 
        """
        self.validate_regression('Random Forest Regressor')
        if hyperparameter_grid is None:
            max_features = hcai_helpers.calculate_random_forest_mtry_hyperparameter(len(self.X_test.columns),
                                                                                    self.model_type)
            hyperparameter_grid = {'n_estimators': [10, 50, 200], 'max_features': max_features}
            number_iteration_samples = 5

        algorithm = get_algorithm(RandomForestRegressor,
                                  scoring_metric,
                                  hyperparameter_grid,
                                  randomized_search,
                                  number_iteration_samples=number_iteration_samples,
                                  n_estimators=trees)

        trained_supervised_model = self._create_trained_supervised_model(algorithm)

        return trained_supervised_model
    def knn(self,
            scoring_metric='roc_auc',
            hyperparameter_grid=None,
            randomized_search=True,
            number_iteration_samples=10):
        """
        A light wrapper for Sklearn's knn classifier that performs randomized 
        search over an overridable default
        hyperparameter grid.
        
        Args:
            scoring_metric (str): Any sklearn scoring metric appropriate for classification
            hyperparameter_grid (dict): hyperparameters by name
            randomized_search (bool): True for randomized search (default)

            number_iteration_samples (int): Number of models to train during the 
            randomized search for exploring the
            hyperparameter space. More may lead to a better model, but will take longer.

        Returns:
            TrainedSupervisedModel: 
        """
        self.validate_classification('KNN')
        if hyperparameter_grid is None:
            neighbors = list(range(5, 26))
            hyperparameter_grid = {'n_neighbors': neighbors, 'weights': ['uniform', 'distance']}
            number_iteration_samples = 10

            print('KNN Grid: {}'.format(hyperparameter_grid))
        algorithm = get_algorithm(KNeighborsClassifier,
                                  scoring_metric,
                                  hyperparameter_grid,
                                  randomized_search,
                                  number_iteration_samples=number_iteration_samples)

        trained_supervised_model = self._create_trained_supervised_model(algorithm)

        return trained_supervised_model
    def logistic_regression(self,
                            scoring_metric='roc_auc',
                            hyperparameter_grid=None,
                            randomized_search=True,
                            number_iteration_samples=10):
        """
        A light wrapper for Sklearn's logistic regression that performs randomized 
        search over an overideable default 
        hyperparameter grid.

        Args:
            scoring_metric (str): Any sklearn scoring metric appropriate for regression
            hyperparameter_grid (dict): hyperparameters by name
            randomized_search (bool): True for randomized search (default)

            number_iteration_samples (int): Number of models to train during 
            the randomized search for exploring the
            hyperparameter space. More may lead to a better model, but will 
            take longer.

        Returns:
            TrainedSupervisedModel: 
        """
        self.validate_classification('Logistic Regression')
        if hyperparameter_grid is None:
            hyperparameter_grid = {'C': [0.01, 0.1, 1, 10, 100], 'class_weight': [None, 'balanced']}
            number_iteration_samples = 10

        algorithm = get_algorithm(LogisticRegression,
                                  scoring_metric,
                                  hyperparameter_grid,
                                  randomized_search,
                                  number_iteration_samples=number_iteration_samples)

        trained_supervised_model = self._create_trained_supervised_model(algorithm)

        return trained_supervised_model