def get_automl_object(ml_algo): """ returns a ml model which can be trained :param ml_algo: string which model schoulkd be returned :return: """ cls = "" if ml_algo == "SGDRegressor": cls = sklearn.linear_model.SGDRegressor() return cls if ml_algo == "PassiveAggressiveRegressor": cls = sklearn.linear_model.PassiveAggressiveRegressor() return cls if ml_algo == "KNeighborsRegressor": cls = KNeighborsRegressor() return cls if ml_algo == "LinearRegression": cls = sklearn.linear_model.LinearRegression() return cls if ml_algo == "RandomForestRegressor": cls = RandomForestRegressor() return cls if ml_algo == "DecisionTreeRegressor": cls = DecisionTreeRegressor() return cls if ml_algo == "ExtraTreeRegressor": cls = ExtraTreeRegressor() return cls if ml_algo == "MLPRegressor": cls = MLPRegressor(verbose=True) return cls if ml_algo == "SVR": cls = SVR() return cls if ml_algo == "tpot": cls = TPOTRegressor(generations = 1, population_size=1, verbosity=2, scoring="neg_mean_absolute_error",n_jobs=4) return cls if ml_algo == "sklearn": runtime_seconds = 60 per_task = 60 mem_limit = 8000 #seconmds #cls = regression.AutoSklearnRegressor(include_estimators=["random_forest" , "decision_tree" , "extra_trees" ], time_left_for_this_task=60) cls = regression.AutoSklearnRegressor(#time_left_for_this_task=runtime_seconds, #per_run_time_limit = per_task, n_jobs=4, ensemble_nbest=5, ml_memory_limit=mem_limit #include_estimators = ["sgd"] ) # include_estimators = ["decision_tree"] # ) return cls
def auto_sklearn(self, time=60, estimators=None, resampling_strategy='cv'): if self.model is None: self.model = asc.AutoSklearnRegressor( time_left_for_this_task=time + 10, per_run_time_limit=time, initial_configurations_via_metalearning=0, include_estimators=estimators, resampling_strategy=resampling_strategy, resampling_strategy_arguments={'folds': 5}) self.model.fit(self.df_train.copy(), self.label.copy()) if resampling_strategy == 'cv': self.model.refit(self.df_train.copy(), self.label.copy())
# elif alg.name == 'TPOT_Regressor': from tpot import TPOTRegressor model = TPOTRegressor( generations=alg.generations, cv=alg.cv, scoring=alg.scoring, verbosity=alg.verbosity ) warn_not_gpu_support(alg) elif alg.name == 'AutoSklearn_Regressor': from autosklearn import regression if alg.sampling: model = regression.AutoSklearnRegressor( time_left_for_this_task=alg.task_time, per_run_time_limit=alg.run_time, resampling_strategy=alg.sampling_strategy, resampling_strategy_arguments={'folds': alg.folds} ) else: model = regression.AutoSklearnRegressor( time_left_for_this_task=alg.task_time, per_run_time_limit=alg.run_time ) warn_not_gpu_support(alg) elif alg.name == 'LinearRegression': if NVIDIA_RAPIDS_ENABLED: from cuml.linear_model import LinearRegression model = LinearRegression(**alg.input_variables.__dict__) else: from sklearn.linear_model import LinearRegression model = LinearRegression(**alg.input_variables.__dict__)
elif alg.name == 'TPOT_Regressor': from tpot import TPOTRegressor model = TPOTRegressor( generations=alg.generations, cv=alg.cv, scoring=alg.scoring, verbosity=alg.verbosity ) elif alg.name == 'AutoSklearn_Regressor': from autosklearn import regression print("alg.sampling",alg.sampling_strategy) if alg.sampling.lower()=='true': model = regression.AutoSklearnRegressor( time_left_for_this_task=alg.task_time, per_run_time_limit=alg.run_time, resampling_strategy= "".join(alg.sampling_strategy), resampling_strategy_arguments={'folds':int(alg.folds)} #feat_type = {Numerical,Numerical,Numerical,Numerical,Categorical} ) else: model = regression.AutoSklearnRegressor( time_left_for_this_task=alg.task_time, per_run_time_limit=alg.run_time ) elif alg.name == 'LinearRegression': from sklearn.linear_model import LinearRegression model = LinearRegression(**vars) elif alg.name == 'SupportVectorRegression': from sklearn.svm import SVR model = SVR(**vars) elif alg.name == 'BayesianRidgeRegression':