def get_trained_model(dataset, label, model_name, task, method_name='standard', max_evals=100, test_size=0.3, random_state=1): ''' Train the model with given data and hpo method . Returns the trained model Parameters: dataset(dataframe) : data to be used for training model label (string): target column of the dataframe task (string) : type of task model_name(string) : name of the model on which data is to be trained method_name(string) : Name of the hyper parameter method to be used while training max_evals(int) : Number of evaluators test_size(float) : Fraction of the data to be used for testing random_state(int) : Random state to be used Returns: model (model object) : the trained model on which hpo is performed ''' features = get_features(dataset, label) X, Y = dataset[features], dataset[label] X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=test_size, random_state=random_state) model = get_model(model_name) if method_name == 'standard': model = model() trained_model = model.fit(X_train, Y_train) print('Standard Model without HPO') print('Model Trained') return trained_model elif method_name == 'grid_search': trained_model = grid_search(model, X_train, Y_train) elif method_name == 'random_search': trained_model = random_search(model, X_train, Y_train) # elif method_name == 'bayesian_gp': # trained_model = bayesian_gp(model, X_train, Y_train) elif method_name == 'bayesian_tpe': trained_model = bayesian_tpe(model, X_train, X_test, Y_train, Y_test, task, max_evals=max_evals) else: raise Exception("'No hpo method named {}'.format(method_name)") print('Hyperparameters Optimized') print('Model Trained') return trained_model #---------------------------------------------------------------------------------------------------------------------#
def add_models(self, base_layer_models): ''' Initializes the base layer models of the object Parameters: self (object ref) base_layer_model (list) : list of models to be used at base layer in ensembling ''' self.models.extend([get_model(model) for model in base_layer_models])
def __init__(self, base_layer_models=None, meta_model='RandomForestClassifier', n_splits=5, optimize=True, max_evals=100): ''' Initializes the object values with given arguments. Parameters: self (object ref) base_layer_model (list) : list of models to be used at base layer in ensembling meta_layer_model (string) : name of model to be used at meta layer in ensembling n_splits(int) : number of splits to be made for cross validation optimize(boolean) : optimize the process max_evals(int) : max number of evaluations to be done ''' if base_layer_models == None: base_layer_models = [ 'LogisticRegression', 'DecisionTreeClassifier', 'RandomForestClassifier', 'GradientBoostingClassifier', 'ExtraTreesClassifier', 'AdaBoostClassifier' ] #base_layer_models = ['LogisticRegression', 'DecisionTreeClassifier'] self.models = [get_model(model) for model in base_layer_models] if meta_model == None: self.meta_model = None elif meta_model in ['AdaBoostClassifier', 'BaggingClassifier']: self.meta_model = get_model(meta_model)( base_estimator=get_model('RandomForestClassifier')()) else: self.meta_model = get_model(meta_model)() self.n_splits = n_splits self.trained_models = [] self.optimize = optimize self.max_evals = max_evals