def objective03(automator, space): ''' Objective function for Random Forest Regressor. ''' algo = 'RandomForestRegressor' X = automator.x_train Y = automator.y_train #Define the subset of dictionary keys that should get passed to the machine learning #algorithm. keys = get_keys(algo) subspace = {k: space[k] for k in set(space).intersection(keys)} #Extract the remaining keys that are pertinent to data preprocessing. model = RandomForestRegressor(**subspace) scaler = space.get('scaler') num_features = space.get('k_best') #Assemble a data pipeline with the extracted data preprocessing keys. pipeline = [] pipeline = Pipeline([ ('scaler', scaler), ('select_best', SelectKBest(k=num_features)), ('classifier', model), ]) #perform two passes of 10-fold cross validation and return the mean score. kfold = RepeatedKFold(n_splits=10, n_repeats=1) scores = -cross_val_score(pipeline, X, Y, cv=kfold, scoring=automator.score_metric, verbose=False).mean() return scores, algo
def objective04(automator, space): ''' Objective function for Support Vector Machines. Note that this method uses a Bagged Classifier as a wrapper for SVC. Support Vector Machine run time scales by O(N^3). Using bagged classifiers break up the dataset into smaller samples so that runtime is manageable. ''' algo = 'SVR' X = automator.x_train Y = automator.y_train #Define the subset of dictionary keys that should get passed to the machine learning #algorithm. keys = get_keys(algo) subspace = {k: space[k] for k in set(space).intersection(keys)} #Build a model with the parameters from our Hyperopt search space. n_estimators = space.get('n_estimators') model = BaggingRegressor( SVR(**subspace), max_samples=automator.num_samples // n_estimators, n_estimators=n_estimators, ) scaler = space.get('scaler') num_features = space.get('k_best') #Assemble a data pipeline with the extracted data preprocessing keys. pipeline = [] pipeline = Pipeline([ ('scaler', scaler), ('select_best', SelectKBest(k=num_features)), ('classifier', model), ]) #perform cross validation and return the mean score. kfold = RepeatedKFold(n_splits=automator.num_cv_folds, n_repeats=automator.repeats) scores = -cross_val_score(pipeline, X, Y, cv=kfold, scoring=automator.score_metric, verbose=False, n_jobs=-1).mean() return scores, algo
def get_model(algo, space_dict): keys = get_keys(algo) space = {k: space_dict[k] for k in set(space_dict).intersection(keys)} model_lib = { 'xgboost_classifier': XGBClassifier, 'xgboost_regressor': XGBRegressor, 'SGDClassifier': SGDClassifier, 'SGDRegressor': SGDRegressor, 'RandomForestClassifier': RandomForestClassifier, 'RandomForestRegressor': RandomForestRegressor, 'SVC': SVC, 'SVR': SVR, 'LogisticRegression': LogisticRegression, 'KNeighborClassifier': KNeighborsClassifier, 'KNeighborRegressor': KNeighborsRegressor, 'GaussianNB': GaussianNB, } return model_lib[algo](**space)
def objective01(automator, space): ''' Objective function for XGBoost Classifier. ''' algo = 'xgboost_classifier' X = automator.x_train Y = automator.y_train #Define the subset of dictionary keys that should get passed to the machine learning #algorithm. keys = get_keys(algo) subspace = {k:space[k] for k in set(space).intersection(keys)} #Extract the remaining keys that are pertinent to data preprocessing. model = XGBClassifier(n_jobs=-1, **subspace) scaler = space.get('scaler') num_features = space.get('k_best') #Assemble a data pipeline with the extracted data preprocessing keys. pipeline = [] pipeline = Pipeline([ ('scaler', scaler), ('select_best', SelectKBest(k = num_features)), ('classifier', model), ]) #perform cross validation and return the mean score. kfold = RepeatedKFold(n_splits = automator.num_cv_folds, n_repeats = automator.repeats) try: scores = -cross_val_score(pipeline, X, Y, cv=kfold, scoring = automator.score_metric, verbose=False).mean() except ValueError: print('An error occurred with the following space: ') print(space) return automator.best, algo return scores, algo
def objective05(automator, space): ''' Objective function for K-Nearest Neighbors Voting Regressor. ''' algo = 'KNeighborRegressor' X = automator.x_train Y = automator.y_train #Define the subset of dictionary keys that should get passed to the machine learning #algorithm. keys = get_keys(algo) subspace = {k: space[k] for k in set(space).intersection(keys)} #Build a model with the parameters from our Hyperopt search space. model = KNeighborsRegressor(n_jobs=-1, **subspace) scaler = space.get('scaler') num_features = space.get('k_best') #Assemble a data pipeline with the extracted data preprocessing keys. pipeline = [] pipeline = Pipeline([ ('scaler', scaler), ('select_best', SelectKBest(k=num_features)), ('classifier', model), ]) #perform cross validation and return the mean score. kfold = RepeatedKFold(n_splits=automator.num_cv_folds, n_repeats=automator.repeats) scores = -cross_val_score(pipeline, X, Y, cv=kfold, scoring=automator.score_metric, verbose=False).mean() return scores, algo
def test_get_keys(self): for key in ALGORITHM_KEYS.keys(): self.assertIsNotNone(get_keys(key)) print(get_keys(key))