def init(PROPERTIES_PATH, LOAD_FROM_DISK): # boost_params = {'n_estimators': 200, # 'min_samples_split': 40, # 'min_samples_leaf': 4, # 'max_features': 'sqrt', # 'max_depth': 20, # 'learning_rate': 0.05} # # boost = GradientBoostingRegressor(**boost_params) boost = LGBMRegressor(learning_rate=0.05, n_estimators=1127, max_depth=-1, min_child_weight=0, num_leaves=68, min_child_samples=5, objective='regression', subsample_for_bin=1000, min_split_gain=0, feature_fraction=0.5, nthread=-1) train_data = load_all_data(get_connection(PROPERTIES_PATH), TABLE_LIST, is_train=True, load_from_disk=LOAD_FROM_DISK) train_data = data_preprocessing(train_data) train_X, train_Y = train_data boost.fit(train_X, train_Y) np.save('col.npy', train_X.columns) print("training has been completed succesfully !!!!") print("--------------------------------------------") return boost
def score_of_nonlinearmodel(self, model=None): """ 树模型 :param models: :return: """ if not [model]: if (self.numNull != 0) | (self.numInf != 0): print('特征中有NaN或Inf!!!') print('NaN:{},Inf:{}'.format(self.numNull, self.numInf)) model = LGBMRegressor(n_estimators=100) model_name = str(model).split('(')[0] model.fit(self.train_X, self.train_y) if self.showFig: sns.barplot(abs(model.feature_importances_), self.continuous_feature_names) plt.title('{} importances of features'.format(model_name)) plt.show() sc = [abs(x) for x in model.feature_importances_] sum_sc = sum(sc) featureScore = [round(s / sum_sc, 4) for s in sc] print(model_name + ' is finished') return featureScore
def lightBGM_model(X, Y): model = LGBMRegressor(num_leaves=36, n_estimators=100, learning_rate=0.07, random_state=0) model.fit(X, Y, verbose=True) return model
def train_lightgbm(verbose=True): """Train a boosted tree with LightGBM.""" if verbose: print("Training with LightGBM") df = pd.read_csv(STAGE1_LABELS) x = np.array([ np.mean(np.load(os.path.join(FEATURE_FOLDER, '%s.npy' % str(id))), axis=0).flatten() for id in df['id'].tolist() ]) y = df['cancer'].as_matrix() trn_x, val_x, trn_y, val_y = cross_validation.train_test_split( x, y, random_state=42, stratify=y, test_size=0.20) ''' params = { 'task': 'train', 'boosting_type': 'gbdt', 'objective': 'regression', 'metric': {'l2'}, 'num_leaves': 21, 'learning_rate': 0.001, 'nthread':24, 'subsample':0.80, 'colsample_bytree':0.80, 'seed':42, 'verbose': verbose, } ''' skf = StratifiedKFold(n_splits=5, random_state=2048, shuffle=True) result = [] clfs = [] oof_preds = [] for train_index, test_index in skf.split(x, y): trn_x, val_x = x[train_index, :], x[test_index, :] trn_y, val_y = y[train_index], y[test_index] val_ids = pd.DataFrame(ids.iloc[test_index].values, columns=['id']) clf = LGBMRegressor(max_depth=50, num_leaves=21, n_estimators=5000, min_child_weight=1, learning_rate=0.001, nthread=24, subsample=0.80, colsample_bytree=0.80, seed=42) clf.fit(trn_x, trn_y, eval_set=[(val_x, val_y)], verbose=verbose, eval_metric='l2', early_stopping_rounds=300) val_preds = pd.DataFrame(clf.predict(val_x), columns=["cancer"]) oof_preds.append(pd.concat([val_ids, val_preds], axis=1)) clfs.append(clf) return clfs, oof_preds
def get_useful_features_byLightBGM(X, Y): # 特殊参数设置 importance_filter = 6 model_3 = LGBMRegressor(num_leaves=36, n_estimators=100, learning_rate=0.07, random_state=0) Y_log = np.log1p(Y) model_3.fit(X, Y_log, verbose=True) feature_score = model_3.feature_importances_ importance_feature_map = list(zip(feature_score, X.columns)) useless_feature = [] for i in importance_feature_map: if i[0] <= importance_filter: useless_feature.append(i[1]) feature = [c for c in X.columns] useful_feature = [aa for aa in feature if aa not in useless_feature] print('有用:', len(useful_feature)) print('无用:', len(useless_feature)) print('全部:', len(feature)) return useful_feature
class LGBMRegressorPrim(primitive): def __init__(self, random_state=0): super(LGBMRegressorPrim, self).__init__(name='LGBMRegressor') self.hyperparams = [] self.type = 'Regressor' self.description = "LightGBM is a gradient boosting framework that uses tree based learning algorithms." self.hyperparams_run = {'default': True} self.random_state = random_state self.model = LGBMRegressor() self.accept_type = 'c_r' def can_accept(self, data): return self.can_accept_c(data, 'Regression') def is_needed(self, data): # data = handle_data(data) return True def fit(self, data): data = handle_data(data) self.model.fit(data['X'], data['Y']) def produce(self, data): output = handle_data(data) output['predictions'] = self.model.predict(output['X']) output['X'] = pd.DataFrame(output['predictions'], columns=[self.name+"Pred"]) final_output = {0: output} return final_output
def bulid_onetrain(train_data, test,pred= features,label= 'label',seed=1099,est=6000, is_shuffle=True): train_x,train_y=train_data[features].values,train_data[label].values clf=LGBMRegressor( learning_rate=0.01, boosting_type = 'gbdt', objective = 'regression', n_estimators=est, num_leaves=156, subsample=0.8, njobs=-1, max_depth=8, reg_lambda=0, colsample_bytree=0.8, random_state=2019, # 2019 metric=['mse']) clf.fit( train_x, train_y, eval_set=[(train_x, train_y)], eval_metric=['mse'], categorical_feature='auto', verbose=100) #train_pred= clf.predict(train_x, num_iteration=clf.best_iteration_) test_pred= clf.predict(test[pred], num_iteration=clf.best_iteration_) #print('mean_squared_error:',mean_squared_error(train_y,train_pred)) test['label'] = test_pred return test[['loadingOrder', 'label']],clf
def get_ntree(): rmse_t_total, rmse_v_total = [], [] for ntree in range(10, 500, 10): lgb_base = LGBMRegressor(n_estimators=ntree, objective='regression', random_state=1234, n_jobs=2, colsample_bytree=0.8, reg_alpha=1, max_depth=10, subsample=0.8) print('此时 ntree = %s' % ntree) lgb_base.fit(X_t, y_t) y_t_pre = lgb_base.predict(X_t) y_v_pre = lgb_base.predict(X_v) rmse_t_each = np.sqrt(mean_squared_error(y_t, y_t_pre)) rmse_v_each = np.sqrt(mean_squared_error(y_v, y_v_pre)) rmse_t_total.append(rmse_t_each) rmse_v_total.append(rmse_v_each) myfile = open('D:\\workspace python\\statContest\\save\\' + 'lgbbase2_rmse_0412.txt', 'a', encoding='utf-8') print(rmse_t_each, ',', rmse_v_each, file=myfile) myfile.close() return rmse_t_total, rmse_v_total
def lightGBM_train_nocross(j,param,x_train, x_test, y_train, y_test): gbm = LGBMRegressor(**param,num_leaves=31,learning_rate=0.01,object='regression') gbm.fit(x_train, y_train) y_pred = gbm.predict(x_test) y_pred = DataFrame(y_pred) rmse_lightGBM.append(np.sqrt(mean_squared_error(y_pred, y_test))) r2_lightGBM.append(r2_score(y_test, y_pred)) return rmse_lightGBM,r2_lightGBM,gbm
def __init__(self, random_state=0): super(LGBMRegressorPrim, self).__init__(name='LGBMRegressor') self.hyperparams = [] self.type = 'Regressor' self.description = "LightGBM is a gradient boosting framework that uses tree based learning algorithms." self.hyperparams_run = {'default': True} self.random_state = random_state self.model = LGBMRegressor() self.accept_type = 'c_r'
def lgb(x_train, y_train, x_val, y_val): lgb = LGBMRegressor(n_estimators=1000, max_depth=10, subsample=0.8, colsample_bytree=0.8, learning_rate=0.01, random_state=2020) lgb.fit(x_train, y_train) result = lgb.predict(x_val) score = mean_absolute_error(result, y_val) return score
def train_lightgbm(trn_x, val_x, trn_y, val_y): clf = LGBMRegressor(max_depth=50, num_leaves=21, n_estimators=5000, min_child_weight=9, learning_rate=0.01, nthread=24, subsample=0.80, colsample_bytree=0.80, seed=42) clf.fit(trn_x, trn_y, eval_set=[(val_x, val_y)], verbose=True, eval_metric='l2', early_stopping_rounds=300) return clf
def setUp(self): X_train, y_train, X_test, y_test = titanic_fare() self.test_len = len(X_test) train_names, test_names = titanic_names() _, self.names = titanic_names() model = LGBMRegressor() model.fit(X_train, y_train) self.explainer = RegressionExplainer(model, X_test, y_test, r2_score, shap='tree', cats=['Sex', 'Deck', 'Embarked'], idxs=test_names, units="$")
def predict_lgb(X, y, df2, params, ind): X_train, y_train = X, y output = df2[(df2.index >= ind) & (df2.index < (ind + 28))] # dataset for prediction X = output.iloc[:, 1:] # this basically drops the "value" column lgb_model = LGBMRegressor(**params) lgb_reg = lgb_model.fit(X_train, y_train.value.ravel()) preds = lgb_reg.predict(X) return preds
def evaluate(params, X, y): # Initilize instance of estimator est = LGBMRegressor(boosting='gbdt', n_jobs=-1, random_state=2018) # Set params est.set_params(**params) # Calc CV score scores = cross_val_score(estimator=est, X=X, y=y, scoring='r2', cv=4) score = np.mean(scores) return score
def train_LGBM(self,train, t_target, valid, v_target,parm,use_custom_loss = False,reg_alpha = 0,reg_lambda = 0): #entity_features_columns = ['total_floor','building_material','city_town', 'building_type', 'building_use', 'parking_way', 'I_index_50', 'I_index_500', 'I_index_1000', 'I_index_5000', 'I_index_10000', 'II_index_50', 'II_index_500', 'II_index_1000', 'II_index_5000', 'II_index_10000', 'III_index_50', 'III_index_500', 'III_index_1000', 'III_index_5000', 'III_index_10000', 'IV_index_50', 'IV_index_500', 'IV_index_1000', 'IV_index_5000', 'IV_index_10000', 'V_index_50', 'V_index_500', 'V_index_1000', 'V_index_5000', 'V_index_10000', 'VI_index_50', 'VI_index_500', 'VI_index_1000', 'VI_index_5000', 'VI_index_10000', 'VII_index_50', 'VII_index_500', 'VII_index_1000', 'VII_index_5000', 'VII_index_10000', 'VIII_index_50', 'VIII_index_500', 'VIII_index_1000', 'VIII_index_5000', 'VIII_index_10000', 'IX_index_50', 'IX_index_500', 'IX_index_1000', 'IX_index_5000', 'IX_index_10000', 'X_index_50', 'X_index_500', 'X_index_1000', 'X_index_5000', 'X_index_10000', 'XI_index_50', 'XI_index_500', 'XI_index_1000', 'XI_index_5000', 'XI_index_10000', 'XII_index_50', 'XII_index_500', 'XII_index_1000', 'XII_index_5000', 'XII_index_10000', 'XIII_index_50', 'XIII_index_500', 'XIII_index_1000', 'XIII_index_5000', 'XIII_index_10000', 'XIV_index_50', 'XIV_index_500', 'XIV_index_1000', 'XIV_index_5000', 'XIV_index_10000','parking_price_isna','txn_floor_isna'] #entity_features_columns = ['building_material', 'city', 'town', 'village', 'building_type', 'building_use', 'parking_way','parking_price_isna','txn_floor_isna'] if use_custom_loss: self.loss = custom_loss learning_rate = parm['learning_rate'] n_estimators = parm['n_estimators'] max_depth = parm['max_depth'] num_leaves = parm['num_leaves'] feature_fraction = parm['feature_fraction'] flag = True good_depth = 0 good_leaves = 0 good_fraction = 0 for depth in max_depth: for leaves in num_leaves: for fraction in feature_fraction: rf = LGBMRegressor(learning_rate=learning_rate, objective='regression', n_estimators=n_estimators, max_depth=depth, num_leaves=leaves, reg_alpha=reg_alpha, reg_lambda = reg_lambda, feature_fraction=fraction, bagging_freq=1, metric='rmse') rf.fit(train, t_target, # should we drop the features that are not correlate to our target? eval_set=[(train, t_target), (valid, v_target)], #early_stopping_rounds=100, verbose=5000, eval_metric=self.loss, categorical_feature=self.entity_features_columns ) print("Finished.") if flag: self.model = rf flag = False y_predict ,y_true= self.predict(valid,v_target) point = self.score(y_true,y_predict) if point > self.max_point: self.max_point = point self.model = rf good_depth = depth good_leaves = leaves good_fraction = fraction print(f"depth : {good_depth} leaves : {good_leaves} fraction :{good_fraction}") self.model.booster_.save_model(f'models/lightgbm{good_depth}_{good_leaves}_{good_fraction}.txt') return self
def __init__(self): # 在创建类的时候需要哪些参数 self.model = LGBMRegressor(learning_rate=0.015, objective="regression", metric='mse', num_leaves=12, max_depth=9, max_bin=130, feature_fraction=0.9, reg_lambda=50, min_data=25, min_child_weight=0.001, num_boost_round=3000, random_state=42)
def get_model(brand_string, train_brand, test_brand): brand1 = pd.read_csv(brand_string) brand1 = brand1.iloc[90:, :].reset_index(drop=True) X_brand1 = brand1.drop(['brand', 'cnt'], axis=1) y_train = brand1['cnt'].values X_train = pd.concat([X_brand1, train_brand], axis=1) X_test = test.drop(['cnt'], axis=1) X_test = pd.concat([X_test, test_brand], axis=1) model = LGBMRegressor().fit(X_train, y_train) brand1_pre = model.predict(X_test) return brand1_pre
def tune_params(): rmse_t_total, rmse_v_total = [], [] for max_depth in range(6, 11): for subsample in [0.6, 0.7, 0.8]: for colsample_bytree in [0.6, 0.7, 0.8]: for reg_alpha in [0.1, 1, 10]: lgb_base = LGBMRegressor(n_estimators=150, objective='regression', random_state=1234, n_jobs=3, colsample_bytree=colsample_bytree, reg_alpha=reg_alpha, max_depth=max_depth, subsample=subsample) _params = { 'max_depth': max_depth, 'subsample': subsample, 'colsample_bytree': colsample_bytree, 'reg_alpha': reg_alpha, } lgb_base.fit(X_t, y_t) y_t_pre = lgb_base.predict(X_t) y_v_pre = lgb_base.predict(X_v) rmse_t_each = np.sqrt(mean_squared_error(y_t, y_t_pre)) rmse_v_each = np.sqrt(mean_squared_error(y_v, y_v_pre)) rmse_t_total.append(rmse_t_each) rmse_v_total.append(rmse_v_each) print(_params) myfile1 = open( 'D:\\workspace python\\statContest\\save\\' + 'lgbbase2_saveparams_rmse_0412.txt', 'a', encoding='utf-8') print(_params['max_depth'], _params['subsample'], _params['colsample_bytree'], _params['reg_alpha'], file=myfile1) myfile1.close() print(rmse_t_each, rmse_v_each) myfile = open('D:\\workspace python\\statContest\\save\\' + 'lgbbase2_tunparms_rmse_0412.txt', 'a', encoding='utf-8') print(rmse_t_each, ',', rmse_v_each, file=myfile) myfile.close() return rmse_t_total, rmse_v_total
def do(): train_data = pd.read_csv( 'D:/testFiles/for_excute_folder/activity_blFreight_2017_5_train_input.csv' ) test_data = pd.read_csv( 'D:/testFiles/for_excute_folder/activity_blFreight_2017_5_test_input.csv' ) # Filter the Timeused <= 1000s train_data = train_data[train_data["TIME_USED"] <= 1000] test_data = test_data[test_data["TIME_USED"] <= 1000] # convert second to minute train_data['TIME_USED'] = train_data['TIME_USED'] / 60 test_data['TIME_USED'] = test_data['TIME_USED'] / 60 train_data['TIME_USERD_MEDIAN_S2'] = train_data['TIME_USERD_MEDIAN']**2 test_data['TIME_USERD_MEDIAN_S2'] = test_data['TIME_USERD_MEDIAN']**2 # bkgOffice_median_by_task_type train_data['TIME_USERD_MEDIAN_S3'] = train_data[ 'TIME_USERD_MEDIAN'] * train_data['bkgOffice_median_by_task_type'] test_data['TIME_USERD_MEDIAN_S3'] = test_data[ 'TIME_USERD_MEDIAN'] * test_data['bkgOffice_median_by_task_type'] print(train_data.head()) y_train = train_data['TIME_USED'].values.tolist() X_train = train_data.drop(['TIME_USED'], axis=1).values.tolist() # 选一个模型 # regressor = SGDRegressor(l1_ratio=0.1) # regressor = Ridge() # regressor = SVR() # regressor = RandomForestRegressor(n_estimators=100) # regressor = AdaBoostRegressor() # regressor = GradientBoostingRegressor() # regressor = BaggingRegressor() # regressor = XGBRegressor(n_estimators=400) # NOT WORK! regressor = LGBMRegressor(n_estimators=400, learning_rate=0.02, seed=2017, colsample_bytree=1) rfecv = RFECV(estimator=regressor, step=1, cv=5, scoring='r2', n_jobs=-1) rfecv.fit(X_train, y_train) print("Optimal number of features : %d" % rfecv.n_features_) # Plot number of features VS. cross-validation scores plt.figure() plt.xlabel("Number of features selected") plt.ylabel("Cross validation score (nb of correct classifications)") plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_) plt.show() print(rfecv.support_) print(rfecv.ranking_)
def __init__(self, data, continuous_feature_names, label, score, t='R', showFig=False): self.data = data self.continuous_feature_names = continuous_feature_names self.label = label self.score = score self.K = len(continuous_feature_names) self.T = t self.showFig = showFig # 备选特征 self.train_X = data[continuous_feature_names] self.train_y = data[label] self.numNull = self.train_X.isnull().sum().sum() self.numInf = np.isinf(self.train_X.values).sum() # 备选模型 self.linearRegressionModel = [LinearRegression(), Ridge(), Lasso(), LinearSVR()] self.linearClassModel = [LogisticRegression(), LinearSVC(), RidgeClassifier()] self.treeRegressionModel = [ExtraTreesRegressor(), DecisionTreeRegressor(), RandomForestRegressor(), # RF相对较慢 GradientBoostingRegressor(), XGBRegressor(n_estimators=100, objective='reg:squarederror'), LGBMRegressor(n_estimators=100)] self.treeClassModel = [ExtraTreesClassifier(), DecisionTreeClassifier(), RandomForestClassifier(), GradientBoostingClassifier(), XGBClassifier(n_estimators=100, objective="binary:logistic"), LGBMClassifier(n_estimators=100)] self.nonlinearRegressionModel = self.treeRegressionModel + [SVR(), MLPRegressor(solver='lbfgs', max_iter=100),] self.nonlinearClassModel = self.treeClassModel + [SVC(), MLPClassifier(),]
def lightGBM_CV(): print('获取内存占用率: ' + (str)(psutil.virtual_memory().percent) + '%') samples_df, data_df = make_train_set(train_step=True) labels = samples_df['label'].values samples_df = None values = data_df.values data_df = None param_test = { 'max_depth': range(5, 15, 2), } estimator = LGBMRegressor( num_leaves=50, # cv调节50是最优值 max_depth=13, learning_rate=0.1, n_estimators=140, objective='regression', min_child_weight=1, subsample=0.8, colsample_bytree=0.8, nthread=7, ) gsearch = GridSearchCV(estimator, param_grid=param_test, scoring='roc_auc', cv=5) gsearch.fit(values, labels) gsearch.grid_scores_, gsearch.best_params_, gsearch.best_score_ print_best_score(gsearch, param_test)
def create_model(self): # TODO: if learning rates are identical throughout - create a regular Classifier self.model_params['n_estimators'] = self.best_n_iterations self.model_params["learning_rate"] = self.learning_rates[ 0] # TODO change final_model = LGBMRegressor(**self.model_params) return final_model # class LGBClassifierLR(ClassifierMixin): # def __init__(self, model_params=None, n_estimators=None, learning_rates=None): # self.model_params = model_params # self.n_estimators = n_estimators # self.learning_rates = learning_rates # # def fit(self, X, y, sample_weight=None): # dtrain = lgb.Dataset(X, label=y) # model = lgb.train(self.model_params # , dtrain # , num_boost_round=self.n_estimators # , learning_rates=self.learning_rates # ) # self.model = model # # def predict(self, X): # return self.model.predict(X) # # TODO Fix # # def predict_proba(self, X): # return self.model.predict(X) # # def get_params(self): # return self.learning_rates
def tune(self, training_set, logger=None, saver=None): self.training_set = training_set objective = generate_objective(self.training_set, self.tuning_metric) best = space_eval( self.space, fmin(fn=objective, space=self.space, trials=self.trials, algo=tpe.suggest, max_evals=self.max_evals)) print(f'Search space: {self.space}') print(f'Best hyperparams: {best}') self.model = LGBMRegressor() self.model.set_params(**best) self.model.fit(training_set.X, training_set.y)
def train_LightGBM(x_train, y_train): clf = LGBMRegressor( n_estimators=10000, learning_rate=0.02, boosting_type='gbdt', objective='regression_l1', max_depth=-1, num_leaves=31, min_child_samples=20, feature_fraction=0.8, bagging_freq=1, bagging_fraction=0.8, lambda_l2=2, random_state=2020, ) clf.fit(x_train, y_train) return clf
def train_lgb_model(best_nodes, X_train_scaled, Y_train): rsg = LGBMRegressor( learning_rate=best_nodes["learning_rate"], n_estimators=int(best_nodes["n_estimators"]), max_depth=best_nodes["max_depth"], #eval_metric=best_nodes["eval_metric"], num_leaves=best_nodes["num_leaves"], subsample=best_nodes["subsample"], colsample_bytree=best_nodes["colsample_bytree"], min_child_samples=best_nodes["min_child_samples"], min_child_weight=best_nodes["min_child_weight"]) rsg.fit(X_train_scaled, Y_train) Y_pred = rsg.predict(X_train_scaled) print("mse:", np.mean((Y_pred - Y_train)**2)) print("rmse:", np.sqrt(np.mean((Y_pred - Y_train)**2))) return rsg
def lightBGM_model_with_test(X, Y): model = LGBMRegressor(num_leaves=36, n_estimators=100, learning_rate=0.07, random_state=0) useful_feature = get_useful_features_byLightBGM(X, Y) X_U = X[useful_feature] x1, x2, y1, y2 = train_test_split(X_U, Y, test_size=0.2) y1_log = np.log1p(y1) model.fit(x1, y1_log, verbose=True) predict_log = model.predict(x2) predict = np.expm1(predict_log) error = error_fun(predict, y2)[1] del x1, x2, y1, y2 return error
def rf_cv(num_leaves, max_depth, subsample, min_child_samples): val = cross_val_score( LGBMRegressor(objective = 'regression_l1', num_leaves=int(num_leaves), max_depth=int(max_depth), subsample = subsample, min_child_samples = int(min_child_samples) ), X=train_X, y=train_y_ln, verbose=0, cv = 5, scoring=make_scorer(mean_absolute_error) ).mean() return 1 - val
def select_by_nonlinearmodel(self, models=None): """ 树模型 :param models: :return: """ if not models: if (self.numNull != 0) | (self.numInf != 0): print('特征中有NaN或Inf!!!') print('NaN:{},Inf:{}'.format(self.numNull, self.numInf)) models = [XGBRegressor(n_estimators=100, objective='reg:squarederror'), LGBMRegressor(n_estimators=100)] else: models = [ DecisionTreeRegressor(), # RF相对较慢 RandomForestRegressor(), GradientBoostingRegressor(), MLPRegressor(solver='lbfgs', max_iter=100), XGBRegressor(n_estimators=100, objective='reg:squarederror'), LGBMRegressor(n_estimators=100)] # 使用SelectFromModel训练一次,选择特征 for model in models: model_name = str(model).split('(')[0] selector = SelectFromModel(model, max_features=self.K, threshold=-np.inf) selector.fit_transform(X=self.train_X, y=self.train_y) mask = selector.get_support(True) feature_names = np.array(self.continuous_feature_names)[mask] print("{} selected feature:{}".format(model_name, feature_names)) if self.showFig: for model in models: model_name = str(model).split('(')[0] model.fit(self.train_X, self.train_y) self.dict_features_score(model.feature_importances_) # print(sorted(dict(zip(self.continuous_feature_names, model.feature_importances_)).items(), key=lambda x: x[1], reverse=True)) sns.barplot(abs(model.feature_importances_), self.continuous_feature_names) plt.title('{} importances of features'.format(model_name)) plt.show()
def get_estimator(estimator): if estimator == 'ridge': clf = Ridge() elif estimator == 'rfr': clf = RandomForestRegressor() elif estimator == 'lasso': clf = Lasso() elif estimator == 'lgbm': clf = LGBMRegressor() else: raise Exception("Name of esimator is error.") return clf