def test_plot_tree(breast_cancer_split): X_train, _, y_train, _ = breast_cancer_split gbm = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, verbose=-1) gbm.fit(X_train, y_train) with pytest.raises(IndexError): lgb.plot_tree(gbm, tree_index=83) ax = lgb.plot_tree(gbm, tree_index=3, figsize=(15, 8), show_info=['split_gain']) assert isinstance(ax, matplotlib.axes.Axes) w, h = ax.axes.get_figure().get_size_inches() assert int(w) == 15 assert int(h) == 8
def plot_model_information(bst, validation_metrics, my_own_metrics): print('Number of trees:', bst.num_trees()) print('Plot model performance') ax = lgb.plot_metric(validation_metrics, metric='auc') plt.show() print('Plot feature importances...') ax = lgb.plot_importance(bst, max_num_features=15) plt.show() def plot_my_own_metrics(my_own_metrics): x = list(my_own_metrics.keys()) y = list(my_own_metrics.values()) plt.barh(x, y) for index, value in enumerate(y): plt.text(value, index, str(value)) print('plot_my_own_metrics') plot_my_own_metrics(my_own_metrics) plt.show() tree_index = 0 print('Plot ' + str(tree_index) + 'th tree...') # one tree use categorical feature to split ax = lgb.plot_tree(bst, tree_index=tree_index, figsize=(64, 36), show_info=['split_gain']) plt.show()
def get_model_tree_visual(model, model_name="default", tree_index=1, outputpath="./"): ''' :param model: :param model_name: :param outputpath: :param importance_type: :param num_feature: :return: ''' try: outputpath = outputpath + model_name + "_tree.png" ax = lgb.plot_tree( model, tree_index=tree_index, figsize=(20, 13), ) plt.savefig(outputpath) except: logger.error("create model tree fail.") return False else: logger.info("create model tree sucess.") return True
def show(self): print("Feature importances:", list(self.pst.feature_importance())) for i in range(0, 1): ax = lgb.plot_tree(self.pst, tree_index=i) plt.show() ax = lgb.plot_importance(self.pst, importance_type="gain") plt.show()
def test_plot_example(): print('Loading data...') # load or create your dataset df_train = pd.read_csv( r'/Users/longguangbin/Work/Codes/MLlearn/src/reg_models/LightGBM/data/regression.train', header=None, sep='\t') df_test = pd.read_csv( r'/Users/longguangbin/Work/Codes/MLlearn/src/reg_models/LightGBM/data/regression.test', header=None, sep='\t') y_train = df_train[0] y_test = df_test[0] X_train = df_train.drop(0, axis=1) X_test = df_test.drop(0, axis=1) # create dataset for lightgbm lgb_train = lgb.Dataset(X_train, y_train) lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train) # specify your configurations as a dict params = {'num_leaves': 5, 'metric': ('l1', 'l2'), 'verbose': 0} evals_result = {} # to record eval results for plotting print('Starting training...') # train gbm = lgb.train( params, lgb_train, num_boost_round=100, valid_sets=[lgb_train, lgb_test], feature_name=['f' + str(i + 1) for i in range(X_train.shape[-1])], categorical_feature=[21], evals_result=evals_result, verbose_eval=10) print('Plotting metrics recorded during training...') ax = lgb.plot_metric(evals_result, metric='l1') plt.show() print('Plotting feature importances...') ax = lgb.plot_importance(gbm, max_num_features=10) plt.show() print('Plotting 84th tree...') # one tree use categorical feature to split ax = lgb.plot_tree(gbm, tree_index=83, figsize=(20, 8), show_info=['split_gain']) plt.show() print('Plotting 84th tree with graphviz...') graph = lgb.create_tree_digraph(gbm, tree_index=83, name='Tree84') graph.render(view=True)
def lgbm(model, featnames=None, num_trees=None, figsize=(25, 25), verbose=3): try: from lightgbm import plot_tree, plot_importance except: if verbose >= 1: raise ImportError( 'lightgbm must be installed. Try to: <pip install lightgbm>') return None # Check model _check_model(model, 'lgb') # Set env _set_graphviz_path() if (num_trees is None) and hasattr(model, 'best_iteration_'): num_trees = model.best_iteration_ if verbose >= 3: print('[treeplot] >Best detected tree: %.0d' % (num_trees)) elif num_trees is None: num_trees = 0 ax1 = None try: fig, ax1 = plt.subplots(1, 1, figsize=figsize) plot_tree(model, tree_index=num_trees, dpi=200, ax=ax1) except: if _get_platform() != "windows": print( '[treeplot] >Install graphviz first: <sudo apt install python-pydot python-pydot-ng graphviz>' ) # Plot importance ax2 = None try: fig, ax2 = plt.subplots(1, 1, figsize=figsize) plot_importance(model, max_num_features=50, ax=ax2) except: print( '[treeplot] >Error: importance can not be plotted. Booster.get_score() results in empty. This maybe caused by having all trees as decision dumps.' ) return (ax1, ax2)
def test_plot_tree(self): gbm = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, silent=True) gbm.fit(self.X_train, self.y_train, verbose=False) self.assertRaises(IndexError, lgb.plot_tree, gbm, tree_index=83) ax = lgb.plot_tree(gbm, tree_index=3, figsize=(15, 8), show_info=['split_gain']) self.assertIsInstance(ax, matplotlib.axes.Axes) w, h = ax.axes.get_figure().get_size_inches() self.assertEqual(int(w), 15) self.assertEqual(int(h), 8)
def lgb_plot(lgb_model, sav_path, is_decision_tree, class_names, x_columns): """ :param lgb_model: 模型 :param sav_path: 图片保存地址 :param is_decision_tree: 是否决策树,1为决策树 2为随机森林 :param class_names: list 类别名称,按排序 :param x_columns: 特征名称 :return: 无返回,请到图片保存路径查看 """ if not os.path.exists(sav_path): os.makedirs(sav_path) b = lgb_model.booster_.dump_model() tree_num = len(b['tree_info']) if not os.path.exists(sav_path): os.makedirs(sav_path) for i in range(tree_num): lgb.plot_tree(lgb_model, tree_index=i, figsize=(20, 8), show_info=['split_gain']) plt.savefig(sav_path + 'lgb_tree' + str(i) + '.png', dpi=1000) plt.savefig(sav_path + 'lgb_pdf_tree' + str(i) + '.pdf', dpi=1000)
def lgb_binary(X_train, y_train, X_test, y_test, params, num_rounds): # Convert dataset to lgb dataset d_train = lgb.Dataset(X_train, label=y_train) # Train Model clf = lgb.train(params, d_train, num_boost_round=num_rounds) # Predict yhat = clf.predict(X_test) # Convert Probabilities into binary variables y_hat = list(map(lambda x: 1 if x >= 0.5 else 0, yhat)) # Get Confusion Matrix cm = confusion_matrix(y_test, y_hat) # Get Accuracy Score score = accuracy_score(y_test, y_hat) print(cm, '\n') print(score) # Plotting ax = lgb.plot_importance(clf, max_num_features=10) plt.show() ax = lgb.plot_tree(clf) plt.show()
def train_light_gbm(self, dts): # create dataset for lightgbm lgb_train = lgb.Dataset(dts.trainX, dts.trainY) lgb_test = lgb.Dataset(dts.testX, dts.testY, reference=lgb_train) # specify your configurations as a dict params = { 'num_leaves': 5, 'metric': ('l1', 'l2'), 'verbose': 0 } evals_result = {} # to record eval results for plotting print('Starting training...') # train gbm = lgb.train(params, lgb_train, num_boost_round=100, valid_sets=[lgb_train, lgb_test], feature_name=['close', 'open', 'high', 'low', 'volume'], categorical_feature=[21], evals_result=evals_result, verbose_eval=10) print('Plotting metrics recorded during training...') ax = lgb.plot_metric(evals_result, metric='l1') plt.show() print('Plotting feature importances...') ax = lgb.plot_importance(gbm, max_num_features=10) plt.show() print('Plotting 84th tree...') # one tree use categorical feature to split ax = lgb.plot_tree(gbm, tree_index=83, figsize=(20, 8), show_info=['split_gain']) plt.show() print('Plotting 84th tree with graphviz...') graph = lgb.create_tree_digraph(gbm, tree_index=83, name='Tree84') graph.render(view=True)
lightgbm.plot_importance(gbm) plt.show() lightgbm.plot_metric(evals_result, metric='l1', title='l1 Metric during training') plt.show() lightgbm.plot_metric(evals_result, metric='l2', title='l2 Metric during training') plt.show() lightgbm.plot_tree(gbm, tree_index=1, figsize=(50, 50), show_info=['split_gain']) plt.show() # From the graph we can notice l1 loss to decrease linearly while l2 plateaus after 110 iterations. # After reaching the minima, l2 starts increasing after iter #140 # ### RMSE: Actual-Predicted # In[150]: from sklearn.metrics import mean_squared_error y_pred = gbm.predict(x_test) print('The rmse of prediction is:', mean_squared_error(y_test, y_pred)**0.5)
#idx_sort = np.argsort(f_imp)[::-1] f_imp = rf.feature_importances_ pd.Series(f_imp, index=list(X)).sort_values().plot.bar() pd.Series(prob_rf).unique() fig = plt.figure(figsize=(12,6)) ax1 = fig.add_subplot(1,2, 1) ax2 = fig.add_subplot(1,2, 2) (prob_rf[:df_train.shape[0]]*df_train.S12).plot.hist(ax=ax1) df_test.S12.plot.hist(ax=ax2) submit(pred_test, 'sub_lb_20_03_11_19.csv') lgb.plot_tree(models[0], figsize=(14,14)) target.apply(np.log1p).plot.hist() ax = scale_predictions(prob_oof).plot() scale_predictions(pred_test).plot(color='orange', ax=ax) plot_tree(models[0]) #y.value_counts() x.describe() def model_by_col(train, test, target, col): train = train.reset_index(drop=True) target = target.reset_index(drop=True) unique_col = train[col].unique()
######## FEATURE IMPORTANCE ########### # 29 # Column wise imporatnce. Default Criteria: "split". # "split": Result contains numbers of times feature is used in a model. # “gain”: Result contains total information-gains of splits # which use the feature print('Plot feature importances...') ax = lgb.plot_importance(bst_bayes, max_num_features=10) ax.tick_params(labelsize=20) plt.show() # 29.1 Does not work. Needs 'graphviz' ax= lgb.plot_tree(bst_bayes, tree_index=9, figsize=(40, 20), show_info=['split_gain']) plt.show() #################### Bayesian-optimization-II Normal method ################### # Ref: https://github.com/fmfn/BayesianOptimization # 25. Create lightgbm dataset, a binary file # LightGBM binary file # Also saving Dataset into a LightGBM binary file will make loading faster: d_train = lgb.Dataset(X_train, label=y_train) # transformed train data d_test = lgb.Dataset(X_test, label = y_test) # test data
def evaluate(preds, labels, prefix): acc = compute_acc(preds, labels) print(f"{prefix} Accuracy: {acc}") rmse = compute_rmse(preds, labels) print(f"{prefix} RMSE: {rmse}") train_fname = './data/agaricus.txt.train' test_fname = './data/agaricus.txt.test' # read in data dtrain = lgb.Dataset(train_fname, free_raw_data=True) dtest = lgb.Dataset(test_fname, free_raw_data=True) param = { 'max_depth': 2, 'learning_rate': 1, 'objective': 'binary', 'metric': ['binary_logloss', 'binary_error', 'rmse'] } num_round = 2 bst = lgb.train(param, dtrain, num_round, valid_sets=[dtest]) num_trees = bst.num_trees() print(f'Number of trees: {num_trees}') for i in range(num_trees): lgb.plot_tree(bst, tree_index=i) plt.show()
def plot_tress(self): fig, ax = plt.subplots(1, 1, figsize=(20, 20)) lgb.plot_tree(self.lgbm, ax=ax) plt.show()
'verbose': 0, 'random_state': 33, } print('Start training...') gbm = lgb.train(params, lgb_train, num_boost_round=70, valid_sets=[lgb_train, lgb_eval, lgb_test], evals_result=evals_result, early_stopping_rounds=10) print('Start predicting...') y_pred = gbm.predict(test_feature2, num_iteration=gbm.best_iteration) lgb.plot_importance(gbm, max_num_features=10) lgb.plot_tree(gbm, tree_index=3, figsize=(100, 40), show_info=['split_gain']) lgb.plot_metric(evals_result, metric='binary_logloss') lgb.plot_metric(evals_result, metric='auc') #calculate Normalized Cross Entropy NE = (-1) / len(y_pred) * sum(((1 + y_test_reset) / 2 * np.log(y_pred) + (1 - y_test_reset) / 2 * np.log(1 - y_pred))) print("Normalized Cross Entropy " + str(NE)) # from sklearn.linear_model import LogisticRegression # lm = LogisticRegression(penalty='l2',C=0.05) # logestic model construction # lm.fit(x_train,y_train) # fitting the data # y_pred_test = lm.predict_proba(x_validation) # Give the probabilty on each label # y_pred_label = lm.predict(x_validation)
valid_pred = bst.predict(val_X) valid_score = metrics.roc_auc_score(val_y, valid_pred) print(f"Validation AUC score: {valid_score:.4f}") import matplotlib.pyplot as plt from lightgbm import plot_importance from lightgbm import plot_split_value_histogram fig, ax = plt.subplots(figsize=(10, 8)) plot_importance(bst, ax=ax) fig, ax = plt.subplots(figsize=(10, 8)) plot_split_value_histogram(bst, 'Forecast', ax=ax) plt.show() ax = lgb.plot_tree(bst, tree_index=3, figsize=(200, 200), show_info=['split_gain']) """ -------------------------------------------------------------------------- -------------------------------------------------------------------------- -------------------------------------------------------------------------- """ # Fitting classifier to the Training set # Create your classifier here from sklearn.linear_model import LogisticRegression classifier = LogisticRegression(solver='sag', multi_class='multinomial', random_state=0, max_iter=100) classifier.fit(train_X, train_y)
lgb_train = lgb.Dataset(X_train, y_train) # specify your configurations as a dict params = { 'boosting_type': 'rf', 'objective': 'binary', 'metric': 'binary_logloss', 'num_leaves': 2000000, 'max_depth': 1, 'learning_rate': 0.05, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 5, 'verbose': 0, 'num_iterations': 10 } print('Starting training...') #gbm = lgb.train(params, lgb_train, num_boost_round=20,) model = lgb.LGBMClassifier(**params) model.fit(X_train, y_train) print('Plotting a tree...') # one tree use categorical feature to split ax = lgb.plot_tree(model, tree_index=2, figsize=(15, 15), show_info=['split_gain']) plt.show()
from sklearn.datasets import load_iris from sklearn import tree iris = load_iris() clf = tree.DecisionTreeClassifier() clf = clf.fit(iris.data, iris.target) tree.plot_tree(clf.fit(iris.data, iris.target)) import lightgbm as lgb from sklearn.datasets import load_iris %matplotlib inline X, y = load_iris(True) clf = lgb.LGBMClassifier() clf.fit(X, y) lgb.plot_tree(clf)
early_stopping_rounds=100, verbose_eval=20, evals_result=evals_result, ) ax = lgb.plot_metric(evals_result, metric='l1') #metric的值与之前的params里面的值对应 plt.show() print('画特征重要性排序...') ax = lgb.plot_importance( gbm, max_num_features=30) #max_features表示最多展示出前10个重要性特征,可以自行设置 plt.show() print('Plot 3th tree...') # 画出决策树,其中的第三颗 ax = lgb.plot_tree(gbm, tree_index=3, figsize=(20, 8), show_info=['split_gain']) plt.show() # print('导出决策树的pdf图像到本地')#这里需要安装graphviz应用程序和python安装包 # graph = lgb.create_tree_digraph(gbm, tree_index=3, name='Tree3') # graph.render(view=True) y_valid_pred = gbm.predict(X_valid) # y_valid_pred = np.expm1(y_valid_pred) # y_valid = np.expm1((y_valid)) mae = mean_absolute_error(y_valid, y_valid_pred) print('valid mae: ', mae) y_pred = gbm.predict(X_test)
def plot_tree(self): ''' Plots a tree to a new MPL figure''' lightgbm.plot_tree(self.ml_model, tree_index=0, show_info=["split_gain"])
early_stop_rounds = 10 params = { 'boosting_type': 'gbdt', 'objective': 'regression', 'metric': {'12', 'auc'}, 'num_leaves': 31, 'learning_rate': 0.05, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 5, 'verbose': 1 } results = {} gbm = lgb.train(params, lgb_train, num_boost_round=boost_round, valid_sets=(lgb_eval, lgb_train), valid_names=('validate', 'train'), early_stopping_rounds=early_stop_rounds, evals_result=results) y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration) lgb.plot_metric(results) plt.show() lgb.plot_importance(gbm, importance_type='split') plt.show() lgb.plot_tree(gbm, tree_index=0) plt.show()
# 加载数据 print('Load data...') X_train,X_test,y_train,y_test =train_test_split(x,l,test_size=0.2) print('Start training...') # 创建模型,训练模型 lgbm= lgb.LGBMClassifier() lgbm.fit(X_train, y_train,eval_set=[(X_test, y_test)],eval_metric='l1',early_stopping_rounds=5) lgb.create_tree_digraph(lgbm, tree_index=1) import matplotlib.pyplot as plt import matplotlib fig2 = plt.figure(figsize=(20, 20)) ax = fig2.subplots() lgb.plot_tree(lgbm._Booster, tree_index=1, ax=ax) plt.show() print('Start predicting...') # 测试机预测 y_pred = lgbm.predict(X_test, num_iteration=lgbm.best_iteration_) # feature importances print('Feature importances:', list(lgbm.feature_importances_)) recall = recall_score(y_pred,y_test) precision = precision_score(y_pred,y_test) f1 = f1_score(y_pred,y_test) # 模型评估 print('The #accuracy is:',np.mean( y_pred == y_test)) print('The #recall is:',recall)
def xgboost(model, featnames=None, num_trees=None, plottype='horizontal', figsize=(25, 25), verbose=3): """Plot tree based on a xgboost. Parameters ---------- model : model xgboost model. featnames : list, optional list of feature names. The default is None. num_trees : int, default None The best performing tree is choosen. Specify any other ordinal number for another target tree plottype : str, optional Make 'horizontal' or 'vertical' plot. The default is 'horizontal'. figsize: tuple, default (25,25) Figure size, (height, width) verbose : int, optional Print progress to screen. The default is 3. 0: NONE, 1: ERROR, 2: WARNING, 3: INFO (default), 4: DEBUG, 5: TRACE Returns ------- ax : Figure axis Figure axis of the input model. """ try: from xgboost import plot_tree, plot_importance except: if verbose >= 1: raise ImportError( 'xgboost must be installed. Try to: <pip install xgboost>') _check_model(model, 'xgb') # Set env _set_graphviz_path() if plottype == 'horizontal': plottype = 'UD' if plottype == 'vertical': plottype = 'LR' if (num_trees is None) and hasattr(model, 'best_iteration'): num_trees = model.best_iteration if verbose >= 3: print('[treeplot] >Best detected tree: %.0d' % (num_trees)) elif num_trees is None: num_trees = 0 ax1 = None try: fig, ax1 = plt.subplots(1, 1, figsize=figsize) plot_tree(model, num_trees=num_trees, rankdir=plottype, ax=ax1) except: if _get_platform() != "windows": print( '[treeplot] >Install graphviz first: <sudo apt install python-pydot python-pydot-ng graphviz>' ) # Plot importance ax2 = None try: fig, ax2 = plt.subplots(1, 1, figsize=figsize) plot_importance(model, max_num_features=50, ax=ax2) except: print( '[treeplot] >Error: importance can not be plotted. Booster.get_score() results in empty. This maybe caused by having all trees as decision dumps.' ) return (ax1, ax2)
bst = lgb.train(core_params, lgb_train, num_round, valid_sets=[lgb_valid]) ypred = bst.predict(X_test, num_iteration=bst.best_iteration) mapes[airline] = mean_absolute_percentage_error(y_test, ypred) core_params = { 'boosting_type': 'gbdt', # GBM type: gradient boosted decision tree, rf (random forest), dart, goss. 'objective': 'regression', # the optimization object: binary, regression, multiclass, xentropy. 'learning_rate': 0.01, # the gradient descent learning or shrinkage rate, controls the step size. 'num_leaves': 5, # the number of leaves in one tree. 'nthread': 4, # number of threads to use for LightGBM, best set to number of actual cores. 'metric': 'mape' # an additional metric to calculate during validation: area under curve (auc). } num_round = 1000 def mean_absolute_percentage_error(y_true, y_pred): return np.mean(np.abs((y_true - y_pred) / y_true)) * 1 print(mean_absolute_percentage_error(y_test, ypred)) import graphviz bst.save_model('model.txt') lgb.plot_tree(bst, figsize=(20, 20))
} evals_result = {} # to record eval results for plotting print('Start training...') # train gbm = lgb.train(params, lgb_train, num_boost_round=100, valid_sets=[lgb_train, lgb_test], feature_name=['f' + str(i + 1) for i in range(28)], categorical_feature=[21], evals_result=evals_result, verbose_eval=10) print('Plot metrics during training...') ax = lgb.plot_metric(evals_result, metric='l1') plt.show() print('Plot feature importances...') ax = lgb.plot_importance(gbm, max_num_features=10) plt.show() print('Plot 84th tree...') # one tree use categorical feature to split ax = lgb.plot_tree(gbm, tree_index=83, figsize=(20, 8), show_info=['split_gain']) plt.show() print('Plot 84th tree with graphviz...') graph = lgb.create_tree_digraph(gbm, tree_index=83, name='Tree84') graph.render(view=True)
X_train, y_train = load_svmlight_file(os.path.join(rank_train_dir, "rank.train.txt")) X_test, y_test = load_svmlight_file(os.path.join(rank_train_dir, "rank.test.txt")) qgsize_train = np.loadtxt(os.path.join(rank_train_dir, "rank.train.qgsize.txt")) qgsize_test = np.loadtxt(os.path.join(rank_train_dir, "rank.test.qgsize.txt")).reshape(-1) model = lightgbm.LGBMRanker(boosting_type='gbdt', num_leaves=4, max_depth=-1, learning_rate=0.1, n_estimators=100, min_child_samples=5) feature_names = ['Aux lang TTR','Overlap word-level','Overlap subword-level','Aux lang dataset size' ,'TTR difference ratio','Dataset size ratio','Task lang dataset size','GENETIC','SYNTACTIC','FEATURAL','PHONOLOGICAL','INVENTORY','GEOGRAPHIC'] gbm = model.fit(X_train, y_train, group=qgsize_train, eval_set=[(X_test, y_test)], eval_group=[qgsize_test], eval_at=3, early_stopping_rounds=40, eval_metric="ndcg", verbose=False,feature_name = feature_names) print(test_lang_set[0]) if test_lang_set[0]=='glg': model.booster_.save_model('./model_glg_leaves4.txt') ax = lightgbm.plot_tree(model.booster_, tree_index=15, figsize=(100, 40), precision = 2,show_info=['split_gain']) ax = lightgbm.plot_importance(gbm, max_num_features=10,figsize=(100, 40)) #plt.savefig("./glg_feature_importance.png") plt.savefig('./glg_tree15_leaves4.png') plt.show() #ax = lightgbm.plot_importance(gbm, max_num_features=10) #plt.show() print("================================") print("Features:", data[0, 5:]) print("Feature importance:", model.feature_importances_) #print("Best test NDCG@1 during training =", model.best_score_['valid_0']['ndcg@1']) #print("Best test NDCG@2 during training =", model.best_score_['valid_0']['ndcg@2']) print("Best test NDCG@3 during training =", model.best_score_['valid_0']['ndcg@3']) #print("Best test NDC@10 during training =", model.best_score_['valid_0']['ndcg@10'])
#LightGBM model split = int(len(X_train) * 0.8) lgbm_train_set = lgbm.Dataset(X_train[:split], y_train[:split]) lgbm_valid_set = lgbm.Dataset(X_train[split:], y_train[split:]) lgbm_params = { 'boosting_type': 'gbdt', 'objective': 'regression', 'n_estimators': 20000, 'metric': 'mse', 'num_leaves': 30, 'learning_rate': 0.002, 'early_stopping_rounds': 200 } model = lgbm.train(lgbm_params, lgbm_train_set, 2, verbose_eval=100, valid_sets=[lgbm_train_set, lgbm_valid_set]) predict = model.predict(X_test, num_iteration=model.best_iteration) #plot the decision tree plt.figure(figsize=(100, 50)) lgbm.plot_tree(model, tree_index=1) plt.savefig("lgbm_tree_demonstration.png") #save the prediction caseid = [i for i in range(143, 1001)] midprice = np.array(predict) + np.array(m_test) submit = pd.DataFrame({'caseid': caseid, 'midprice': midprice}) submit.to_csv('lgbm_final.csv', index=False)
'num_leaves': 10, 'verbose': 0 } l_progress = dict() l_model = lightgbm.train(l_params, l_train, valid_sets=[l_train, l_test], num_boost_round=2000, evals_result=l_progress, verbose_eval=10, feature_name=features) plt.rcParams['figure.figsize'] = [10, 7] lightgbm.plot_metric(l_progress) png = plt.gcf() lightgbm.plot_tree(l_model,tree_index=1,figsize=(60,60),show_info=['split_gain']) png.savefig(folder+"tree.png",dpi=100) plt.show() lightgbm.plot_importance(l_model) # building trees using XG_Boost import xgboost g_train = xgboost.DMatrix(x_train,y_train) g_test = xgboost.DMatrix(x_test,y_test) g_params = { "objective":"binary:logistic", 'colsample_bytree': 0.3, 'learning_rate': 0.05, #'tree_method': 'hist',
lgb_train, num_boost_round=100, valid_sets=[lgb_train, lgb_test], feature_name=[f'f{i + 1}' for i in range(X_train.shape[-1])], categorical_feature=[21], evals_result=evals_result, verbose_eval=10) print('Plotting metrics recorded during training...') ax = lgb.plot_metric(evals_result, metric='l1') plt.show() print('Plotting feature importances...') ax = lgb.plot_importance(gbm, max_num_features=10) plt.show() print('Plotting split value histogram...') ax = lgb.plot_split_value_histogram(gbm, feature='f26', bins='auto') plt.show() print('Plotting 54th tree...') # one tree use categorical feature to split ax = lgb.plot_tree(gbm, tree_index=53, figsize=(15, 15), show_info=['split_gain']) plt.show() print('Plotting 54th tree with graphviz...') graph = lgb.create_tree_digraph(gbm, tree_index=53, name='Tree54') graph.render(view=True)
gcv.fit(input_data, winner_data) print(gcv.best_params_) print(gcv.best_score_) print(gcv.cv_results_) with open('gcv', 'bw') as out_f: import pickle pickle.dump(gcv, out_f) lgb.plot_importance(gcv.best_estimator_, figsize=(20, 35), importance_type='gain', max_num_features=100) plt.savefig("importance.svg") lgb.plot_tree(gcv.best_estimator_, figsize=(80, 80)) plt.savefig("tree.svg") else: train_data = lgb.Dataset('train.bin') validation_data = lgb.Dataset('val.bin', reference=train_data) # Load num_categories if necessary. with open('num_categories.jsonl') as in_f: num_categories = json.loads(in_f.read()) param = {} param['max_bin_by_feature'] = num_categories param['num_leaves'] = 63 param['objective'] = 'binary'