def showPicture(xgb, num_trees=0): print('in %s' % sys._getframe().f_code.co_name) xgboost.to_graphviz(xgb, num_trees=0) img = xgboost.to_graphviz(xgb, num_trees=num_trees) # 这行直接可以放在jupyter理正确显示 img.format = 'png' img.view('image//xgb') return img
def show_tree_model(model, model_type='tree'): assert model_type in ['tree', 'randomforest', 'xgboost'] from sklearn import tree import pydotplus import tempfile from skimage import io #assert isinstance(model, tree.DecisionTreeClassifier) if model_type == 'tree': fout = tempfile.NamedTemporaryFile(suffix='.png') dot_fname = '.'.join([fout.name.split('.')[0], 'dot']) dot_data = tree.export_graphviz(model, out_file=dot_fname) os.system('dot -Tpng %s -o %s' % (dot_fname, fout.name)) show(show_image(io.imread(fout.name))) os.remove(dot_fname) elif model_type == 'randomforest': graph_plots = list() for tree_model in model.estimators_: fout = tempfile.NamedTemporaryFile(suffix='.png') dot_fname = '.'.join([fout.name.split('.')[0], 'dot']) dot_data = tree.export_graphviz(tree_model, out_file=dot_fname) os.system('dot -Tpng %s -o %s' % (dot_fname, fout.name)) graph_plots.append(show_image(io.imread(fout.name))) grid = gridplot(list(utils.chunks(graph_plots, size=3))) show(grid) os.remove(dot_fname) else: #It must be xgboost import xgboost xgboost.to_graphviz(model) fout = tempfile.NamedTemporaryFile(suffix='.png') dot_fname = '.'.join([fout.name.split('.')[0], 'dot']) dot_data = tree.export_graphviz(tree_model, out_file=dot_fname) os.system('dot -Tpng %s -o %s' % (dot_fname, fout.name)) show(show_image(io.imread(fout.name))) os.remove(dot_fname)
def _log_trees(self, model): if self.log_tree is not None: # for "cv" log trees for each cv fold (different model is trained on each fold) if self.cv: for i, fold in enumerate(model.cvfolds): trees = [] for j in self.log_tree: tree = xgb.to_graphviz(fold.bst, num_trees=j) _, ax = plt.subplots(1, 1, figsize=(self.tree_figsize, self.tree_figsize)) s = BytesIO() s.write(tree.pipe(format="png")) s.seek(0) ax.imshow(image.imread(s)) ax.axis("off") trees.append(neptune.types.File.as_image(ax.figure)) self.run[f"fold_{i}/plots/trees"] = neptune.types.FileSeries(trees) plt.close("all") else: trees = [] for j in self.log_tree: tree = xgb.to_graphviz(model, num_trees=j) _, ax = plt.subplots(1, 1, figsize=(self.tree_figsize, self.tree_figsize)) s = BytesIO() s.write(tree.pipe(format="png")) s.seek(0) ax.imshow(image.imread(s)) ax.axis("off") trees.append(neptune.types.File.as_image(ax.figure)) self.run["plots/trees"] = neptune.types.FileSeries(trees) plt.close("all")
def main(_): # creating training data data = np.random.rand(5, 10) # 5 entities, each contains 10 features label = np.random.randint(2, size=5) # binary target dtrain = xgb.DMatrix(data, label=label) #csr = scipy.sparse.csr_matrix((dat, (row, col))) # data creation using scipy #dtrain = xgb.DMatrix(csr) # Booster parameters param = { 'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic' } param['nthread'] = 4 param['eval_metric'] = 'auc' # Evaluation parameters #param['eval_metric'] = ['auc', 'ams@0'] plst = param.items() plst += [('eval_metric', 'ams@0')] # Testing data data = np.random.rand(7, 10) # 7 entities, each contains 10 features dtest = xgb.DMatrix(data) # Specify validations set to watch performance evallist = [(dtest, 'eval'), (dtrain, 'train')] # Training num_round = 10 bst = xgb.train(plst, dtrain, num_round, evallist) bst.save_model('0001.model') # Saving the model bst.dump_model('dump.raw.txt') # dump model bst.dump_model('dump.raw.txt', 'featmap.txt') # dump model with feature map bst = xgb.Booster({'nthread': 4}) # init model bst.load_model('model.bin') # load data # Testing ypred = bst.predict(dtest) #ypred = bst.predict(dtest, ntree_limit=bst.best_ntree_limit) # Use this one only if early stopping is enabled in training # Plotting xgb.plot_importance(bst) xgb.plot_tree(bst, num_trees=2) xgb.to_graphviz(bst, num_trees=2) file = open("results.txt", "w") file.write(ypred) file.close()
def model_plot(): import matplotlib.pyplot as plt bst = xgb.Booster({"nthread": 4}) # init model bst.load_model("../data/model/xgb.model") # load data # xgb.plot_importance(bst) # plt.show() # To plot the output tree via matplotlib, use plot_tree, specifying the ordinal number of the target tree. xgb.plot_tree(bst, num_trees=2) plt.show() # When using IPython, you can use the to_graphviz function, which converts the target tree to a graphviz instance. # The graphviz instance is automatically rendered in IPython. xgb.to_graphviz(bst, num_trees=2)
def visualizationPart(model_xgb): # 输出特征重要性 # 树模型对象,条形图高度,显示排序后的最大特征数量,X轴文字,grid不显示网格 # importance_type = weight是特征在树中出现的次数,gain是使用特征分裂的平均值增益,cover是作为分裂节点的覆盖的样本比例 xgb.plot_importance(model_xgb, height=0.5, importance_type='gain', max_num_features=10, xlabel='Gain Split', grid=False) plt.show() # 输出树形规则图 # 树模型对象, 树的个数0-9, yes_color为真的线条颜色 xgb.to_graphviz(model_xgb, num_trees=1, yes_color='#638e5e', no_color='#a40000').view() # 获取数据 importance = model_xgb.get_booster().get_score(importance_type='weight', fmap='') tuples = [(k, importance[k]) for k in importance] tuples = sorted(tuples, key=lambda x: x[1], reverse=True) labels, values = zip(*tuples) print(importance) print(tuples) print(labels) print(values) # 词云 mywordcloud = WordCloud() # 词云图的轮廓也可以选择,有 'circle', 'cardioid', 'diamond', 'triangle-forward', 'triangle', 'pentagon',默认的词云轮廓为circle mywordcloud.add('', tuples, shape='pentagon') # 渲染图片 # 指定渲染图片存放的路径 mywordcloud.render('词云.html') # 环形饼图 circular_pie_chart = ( Pie(init_opts=opts.InitOpts(width="1600px", height="1000px")) # 图形的大小设置 .add( series_name="特征重要性", data_pair=[list(z) for z in zip(labels, values)], radius=["15%", "50%"], # 饼图内圈和外圈的大小比例 center=["30%", "40%"], # 饼图的位置:左边距和上边距 label_opts=opts.LabelOpts(is_show=True), # 显示数据和百分比 ).set_global_opts(legend_opts=opts.LegendOpts( pos_left="left", orient="vertical")) # 图例在左边和垂直显示 .set_series_opts(tooltip_opts=opts.TooltipOpts( trigger="item", formatter="{a} <br/>{b}: {c} ({d}%)"), )) circular_pie_chart.render('环形饼图.html')
def main(): # Multiclass classification xgb_model = xgb.XGBClassifier(objective="multi:softprob", random_state=42) xgb_model.fit(X, y) y_pred = xgb_model.predict(X) # print(confusion_matrix(y, y_pred)) # plotting xgb_model = xgb.XGBClassifier(objective="binary:logistic", random_state=20002, eval_metric="auc") X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20002) xgb_model.fit(X_train, y_train) # xgb.plot_importance(xgb_model) # converts the target tree to a graphviz instance xgb.to_graphviz(xgb_model) xgb_model.predict(X_test)
def printXGBoostTree(fn, xgBoostTree, numTrees=2, yesColor='#0000FF', noColor='#FF0000'): """ Creates a pdf for the given XGBoost Tree from xgboost package :param fn: filename :param xgBoostTree: XGBoost tree object :param numTrees (Optional, Default = 2) : Number of decision trees to draw :param yesColor (Optional, Default = '#0000FF') : Color of correct output classes :param noColor (Optional, Default = '#FF0000'): Color of wrong output classes """ from subprocess import check_call if checkModuleExists("xgboost"): import xgboost as xgb else: print("Requires xgboost library. Cannot print xgboost tree") return with open(fn + ".dot", "w") as file: val = xgb.to_graphviz(xgBoostTree, num_trees=numTrees, yes_color=yesColor, no_color=noColor) val.save(fn + ".dot") check_call(["dot", "-Tpdf", fn + ".dot", "-o", fn + ".pdf"])
def to_graphviz(self, num_trees=0, rankdir='UT', yes_color='#0000FF', no_color='#FF0000', **kwargs): """Convert specified tree to graphviz instance. IPython can automatically plot the returned graphiz instance. Otherwise, you shoud call .render() method of the returned graphiz instance. Parameters ---------- num_trees : int, default 0 Specify the ordinal number of target tree rankdir : str, default "UT" Passed to graphiz via graph_attr yes_color : str, default '#0000FF' Edge color when meets the node condigion. no_color : str, default '#FF0000' Edge color when doesn't meet the node condigion. kwargs : Other keywords passed to graphviz graph_attr Returns ------- ax : matplotlib Axes """ import xgboost as xgb if not isinstance(self._df.estimator, xgb.XGBModel): raise ValueError('estimator must be XGBRegressor or XGBClassifier') return xgb.to_graphviz(self._df.estimator.booster(), num_trees=num_trees, rankdir=rankdir, yes_color=yes_color, no_color=no_color, **kwargs)
def test_sklearn_plotting(): tm._skip_if_no_sklearn() from sklearn.datasets import load_iris iris = load_iris() classifier = xgb.XGBClassifier() classifier.fit(iris.data, iris.target) import matplotlib matplotlib.use('Agg') from matplotlib.axes import Axes from graphviz import Digraph ax = xgb.plot_importance(classifier) assert isinstance(ax, Axes) assert ax.get_title() == 'Feature importance' assert ax.get_xlabel() == 'F score' assert ax.get_ylabel() == 'Features' assert len(ax.patches) == 4 g = xgb.to_graphviz(classifier, num_trees=0) assert isinstance(g, Digraph) ax = xgb.plot_tree(classifier, num_trees=0) assert isinstance(ax, Axes)
def main(): data = LoadFile( p=r'F:\ProximityDetection\Stacking\dataset_PNY\PNY_fft_cl_1.pickle') imp = Imputer(missing_values='NaN', strategy='mean', axis=0, verbose=0, copy=True) dataset_sim = imp.fit_transform(data) XGBoost = multi_XGBoost(max_depth=2, learning_rate=1e-2, n_estimators=300, objective='binary:logistic', nthread=4, gamma=0.1, min_child_weight=1, subsample=1, reg_lambda=2, scale_pos_weight=1.) training_main(model=XGBoost, dataset_sim=dataset_sim) digraph = xgb.to_graphviz(XGBoost, num_trees=2) digraph.format = 'png' digraph.view('./ProximityDetection_xgb') xgb.plot_importance(XGBoost) plt.show()
def save_model(self): with open(path + '/agent_{0}.pkl'.format(self.b_id), 'wb') as f: pickle.dump(self.model, f) # for count, tree_in_forest in enumerate(self.model.estimators_): # export_graphviz(tree_in_forest, out_file=path + '/agent_{0}_{1}.txt'.format(self.get_id(), count), # feature_names=self.feature_names) # export_graphviz(self.model, out_file=path + '/agent_{0}.txt'.format(self.get_id()), feature_names=self.feature_names) a = to_graphviz(self.model, num_trees=self.model.best_iteration) with open(path + '/agent_{0}.txt'.format(self.get_id()), 'w') as f: f.write(str(a))
def save_best_model(phase_number, phase_name, phase_params, phase_predictor, x_train, y_train): print(f"Save best model for phase: {phase_number}: {phase_name}") print(f"Column number: {x_train.num_col()}") print( f"Best number of rounds: {best_model_iterations[phase_number]} for phase: {phase_number}: {phase_name}" ) best_model = xgb.train(phase_params, x_train, num_boost_round=best_model_iterations[phase_number], evals=[(y_train, phase_predictor)]) xgb.plot_importance(best_model) # converts the target tree to a graphviz instance xgb.to_graphviz(best_model, num_trees=best_model.best_iteration) print(f"Saving model to phase_{phase_number}_model.model") best_model.save_model( f"models-saved/xgboost/phase_{phase_number}_model.model")
def _log_trees(booster, tree_list, img_name, npt, **kwargs): with tempfile.TemporaryDirectory(dir='.') as d: for i in tree_list: file_name = 'tree_{}'.format(i) tree = xgb.to_graphviz(booster=booster, num_trees=i, **kwargs) # pylint: disable=E1101 tree.render(filename=file_name, directory=d, view=False, format='png') npt.log_image(img_name, os.path.join(d, '{}.png'.format(file_name)), image_name=file_name)
def evaluate(self, dtrain_full, dtest_full, output_file): df_test = pd.read_csv(tf.gfile.Open(test_file_name), names=[] + CSV_COLUMNS[0:1] + CSV_COLUMNS[2:], skipinitialspace=True, engine="python", skiprows=1) bst = xgb.train(self.param, dtrain_full, self.num_round) prediction = bst.predict(dtest_full) bst.save_model('0001.model') xgb.to_graphviz(bst, num_trees=2) xgb.plot_importance(bst) xgb.plot_tree(bst, num_trees=2) timeStamp = datetime.now().strftime('%Y%m%d_%H%M%S') f = open(output_file, 'w') f.write("id,target\n") i = 0 for i, p in enumerate(prediction): f.write("{},{}\n".format(df_test['id'][i], p)) f.close()
def get_details_xgboost_model(model_path): """ Reference: https://xgboost.readthedocs.io/en/latest/python/python_api.html#xgboost.to_graphviz, https://xgboost.readthedocs.io/en/latest/python/python_api.html#xgboost.train """ model = ModelVisualizer.load_pkl_model(model_path) booster_dots = [] for i in range(model.best_ntree_limit): dot_model = xgboost.to_graphviz(model, num_trees=i, yes_color='#000000', no_color='#000000') booster_dots += [ ModelVisualizer.visualize_dot_model(dot_model.source) ] return {"boosters": booster_dots[:5]} ### TEMP [ :5]
def test_plotting(self): m = xgb.DMatrix(dpath) booster = xgb.train( { 'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic' }, m, num_boost_round=2) ax = xgb.plot_importance(booster) assert isinstance(ax, Axes) assert ax.get_title() == 'Feature importance' assert ax.get_xlabel() == 'F score' assert ax.get_ylabel() == 'Features' assert len(ax.patches) == 4 ax = xgb.plot_importance(booster, color='r', title='t', xlabel='x', ylabel='y') assert isinstance(ax, Axes) assert ax.get_title() == 't' assert ax.get_xlabel() == 'x' assert ax.get_ylabel() == 'y' assert len(ax.patches) == 4 for p in ax.patches: assert p.get_facecolor() == (1.0, 0, 0, 1.0) # red ax = xgb.plot_importance(booster, color=['r', 'r', 'b', 'b'], title=None, xlabel=None, ylabel=None) assert isinstance(ax, Axes) assert ax.get_title() == '' assert ax.get_xlabel() == '' assert ax.get_ylabel() == '' assert len(ax.patches) == 4 assert ax.patches[0].get_facecolor() == (1.0, 0, 0, 1.0) # red assert ax.patches[1].get_facecolor() == (1.0, 0, 0, 1.0) # red assert ax.patches[2].get_facecolor() == (0, 0, 1.0, 1.0) # blue assert ax.patches[3].get_facecolor() == (0, 0, 1.0, 1.0) # blue g = xgb.to_graphviz(booster, num_trees=0) assert isinstance(g, Source) ax = xgb.plot_tree(booster, num_trees=0) assert isinstance(ax, Axes)
def test_plotting(self): bst2 = xgb.Booster(model_file='xgb.model') # plotting import matplotlib matplotlib.use('Agg') from matplotlib.axes import Axes from graphviz import Digraph ax = xgb.plot_importance(bst2) assert isinstance(ax, Axes) assert ax.get_title() == 'Feature importance' assert ax.get_xlabel() == 'F score' assert ax.get_ylabel() == 'Features' assert len(ax.patches) == 4 ax = xgb.plot_importance(bst2, color='r', title='t', xlabel='x', ylabel='y') assert isinstance(ax, Axes) assert ax.get_title() == 't' assert ax.get_xlabel() == 'x' assert ax.get_ylabel() == 'y' assert len(ax.patches) == 4 for p in ax.patches: assert p.get_facecolor() == (1.0, 0, 0, 1.0) # red ax = xgb.plot_importance(bst2, color=['r', 'r', 'b', 'b'], title=None, xlabel=None, ylabel=None) assert isinstance(ax, Axes) assert ax.get_title() == '' assert ax.get_xlabel() == '' assert ax.get_ylabel() == '' assert len(ax.patches) == 4 assert ax.patches[0].get_facecolor() == (1.0, 0, 0, 1.0) # red assert ax.patches[1].get_facecolor() == (1.0, 0, 0, 1.0) # red assert ax.patches[2].get_facecolor() == (0, 0, 1.0, 1.0) # blue assert ax.patches[3].get_facecolor() == (0, 0, 1.0, 1.0) # blue g = xgb.to_graphviz(bst2, num_trees=0) assert isinstance(g, Digraph) ax = xgb.plot_tree(bst2, num_trees=0) assert isinstance(ax, Axes)
def main_3(cols=['乳酸脱氢酶', '淋巴细胞(%)', '超敏C反应蛋白']): data_df_unna, data_pre_df = data_preprocess() # cols = ['乳酸脱氢酶','淋巴细胞(%)','超敏C反应蛋白'] cols.append('Type2') Tets_Y = data_pre_df.reset_index()[['PATIENT_ID', '出院方式']].copy() Tets_Y = Tets_Y.rename(columns={'PATIENT_ID': 'ID', '出院方式': 'Y'}) Tets_Y['Y'] = (Tets_Y['Y'].map({'治愈': 0, '好转': 0, '死亡': 1})) y_true = Tets_Y['Y'].values x_col = cols[:-1] y_col = cols[-1] x_np = data_df_unna[x_col].values y_np = data_df_unna[y_col].values x_test = data_pre_df[x_col].values X_train, X_val, y_train, y_val = train_test_split(x_np, y_np, test_size=0.3, random_state=6) model = xgb.XGBClassifier( max_depth=3, n_estimators=1, ) model.fit(X_train, y_train) #训练集混淆矩阵 pred_train = model.predict(X_train) show_confusion_matrix(y_train, pred_train) print(classification_report(y_train, pred_train)) #验证集混淆矩阵 pred_val = model.predict(X_val) show_confusion_matrix(y_val, pred_val) print(classification_report(y_val, pred_val)) #测试集混淆矩阵 pred_test = model.predict(x_test) show_confusion_matrix(y_true, pred_test) print(classification_report(y_true, pred_test)) #单树可视化 ceate_feature_map(cols[:-1]) graph = xgb.to_graphviz(model, fmap='xgb.fmap', num_trees=0, **{'size': str(10)}) graph.render(filename='single-tree.dot')
def pickle_xgb(self, pickle_path, tree_path): model_dict = { 'clf': self.clf, 'params': self.params, 'vectorizer': self.vectorizer, 'test_pred': (self.y_test, self.pred), 'self_prob_pred': self.pred_proba, 'confusion_matrix': self.cm } labels = ['n_estimators', 'max_depth', 'learning_rate'] val = [self.params.get(v) for v in labels] file_name = "xgb_ntrees_{0}_depth_{1}_eta_{2}".format(*val) pickle_path += file_name pickle.dump(model_dict, open(pickle_path + file_name + ".p", "wb")) # Write Trees self.clf.get_booster( ).feature_names = self.vectorizer.get_feature_names() tree_path += file_name xgb.to_graphviz(self.clf, num_trees=1).render(tree_path + ".gv") xgb.plot_importance(self.clf, max_num_features=20) plt.show() return
def main(): out_dir = sys.argv[1] model_file = sys.argv[2] bst = xgb.Booster(model_file=model_file) num_trees = len(bst.get_dump()) if len(sys.argv) > 3: with open(sys.argv[3]) as f: features = [line.split() for line in f.readlines()] bst.feature_names = [f[1] for f in features] bst.feature_types = [f[2] for f in features] for i in range(num_trees): sys.stdout.write('\rtree id: %d' % i) graph = xgb.to_graphviz(bst, i) graph.format = 'png' graph.render(str(i), directory=out_dir, view=False, cleanup=True)
def model_train(data): with open('fmap.csv', encoding='utf-8') as f: fmap = f.read().strip().split(',') with open('fmap.txt', 'w', encoding='utf-8') as w: count = 0 for feature in fmap: w.write(str(count) + '\t' + feature + '\tq\n') count += 1 w.close() bst = xgb.Booster() bst.load_model('xgboost_3.model') score_weight = bst.get_score('fmap.txt', 'weight') score_gain = bst.get_score('fmap.txt', 'gain') score_importance = bst.get_fscore('fmap.txt') with open('./result/feature_result_3.csv', 'w', encoding='utf-8', newline='') as w: writer = csv.writer(w) writer.writerow( ['f_name', 'score_weight', 'score_gain', 'score_importance']) for f_name, importance in score_importance.items(): split_value = bst.get_split_value_histogram(f_name, 'fmap.txt') writer.writerow([ f_name, score_weight[f_name], score_gain[f_name], score_importance[f_name] ]) w.close() # fig, ax = plt.subplots() # fig.set_size_inches(60, 30) # xgb.plot_tree(bst, ax=ax, num_trees=0, rankdir='LR') # fig.savefig('tree_2.jpg', dpi=100) # fig.show() xgb.to_graphviz(bst, num_trees=1, rankdir='RL').render()
def fit(self, dataset: Dataset): self.set_dataset(dataset) self.trees = [] self.clf.fit(self.dataset.X, self.dataset.y) n_classes = self.dataset.num_classes() n_estimators = self.clf.n_estimators n_trees = n_estimators * n_classes if n_classes > 2 else n_estimators for tree_idx in range(0, n_trees): tree = xgb.to_graphviz(self.clf, num_trees=tree_idx) parsed_tree = XGBoostTree.parse(str(tree), self.dataset) self.trees.append(parsed_tree)
def run_categorical(self, tree_method: str) -> None: X, y = tm.make_categorical(1000, 31, 19, onehot=False) reg = xgb.XGBRegressor(enable_categorical=True, n_estimators=10, tree_method=tree_method) reg.fit(X, y) trees = reg.get_booster().get_dump(dump_format="json") for tree in trees: j_tree = json.loads(tree) assert "leaf" in j_tree.keys() or isinstance( j_tree["split_condition"], list) graph = xgb.to_graphviz(reg, num_trees=len(j_tree) - 1) assert isinstance(graph, Source) ax = xgb.plot_tree(reg, num_trees=len(j_tree) - 1) assert isinstance(ax, Axes)
def train_xgb(data_x, data_y): # xgboost 不需要归一化 train_x, temp_x, train_y, temp_y = train_test_split(data_x, data_y, test_size=0.3, shuffle=False, stratify=None) val_x, test_x, val_y, test_y = train_test_split(temp_x, temp_y, test_size=0.5, shuffle=False, stratify=None) print("X_train : " + str(train_x.shape) + " X_test : " + str(test_x.shape)) print("y_train : " + str(train_y.shape) + " y_test : " + str(test_y.shape)) # 训练 dtrain = xgb.DMatrix(train_x, label=train_y) dval = xgb.DMatrix(val_x, label=val_y) dtest = xgb.DMatrix(test_x, label=test_y) evallist = [(dval, 'eval'), (dtrain, 'train')] param = {'booster': 'gbtree', 'silent': 1, 'nthread': -1, 'objective': 'reg:linear', 'eval_metric': 'rmse', 'eta': 0.01, 'gamma': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.8, 'colsample_bytree': 0.8, 'lambda': 0.1, 'alpha': 0, 'tree_method': 'auto', 'predictor': 'cpu_predictor', } bst = xgb.train(param, dtrain, num_boost_round=500, evals=evallist, early_stopping_rounds=10, verbose_eval=20) # 反归一化 train_y_pred = bst.predict(dtrain) val_y_pred = bst.predict(dval) test_y_pred = bst.predict(dtest) ''' train_y_pred = np.expm1(train_y_pred) train_y = np.expm1(train_y) val_y_pred = np.expm1(val_y_pred) val_y = np.expm1(val_y) test_y_pred = np.expm1(test_y_pred) test_y = np.expm1(test_y) test_y_pred = np.concatenate((val_y_pred, test_y_pred), axis=0) test_y = np.concatenate((val_y, test_y), axis=0) include = pd.DataFrame(data={'train_y': train_y, 'train_y_pred': train_y_pred}) include.to_excel('E:/Pycharm/Mywork/aqi/log/result/train.xlsx') exclude = pd.DataFrame(data={'test_y': test_y, 'test_y_pred': test_y_pred}) exclude.to_excel('E:/Pycharm/Mywork/aqi/log/result/test.xlsx') ''' importance(bst, 24) xgb.plot_tree(bst, fmap='Gradient boosting tree', num_trees=0, rankdir='UT', ax=None) inst = xgb.to_graphviz(bst, fmap='', num_trees=0, rankdir='UT', yes_color='#0000FF', no_color='#FF0000') inst.render() rmse = math.sqrt(mse(train_y, train_y_pred)) r2 = r2_score(train_y, train_y_pred) print('Train RMSE = %f, R^2 = %f' % (rmse, r2)) rmse = math.sqrt(mse(test_y, test_y_pred)) r2 = r2_score(test_y, test_y_pred) print('Test RMSE = %f, R^2 = %f' % (rmse, r2)) drawing(test_y, test_y_pred)
def trees_graphviz_plot(model_file, featmap_file): model = xgb.Booster(model_file=model_file) model_name = model_file.split("/")[-1] dot = xgb.to_graphviz(model, fmap=featmap_file) dot.render(filename=model_name, directory="dot", format="png") with open("dot/" + model_name + ".png", "rb") as fp: tmpfile = fp.read() encoded = base64.b64encode(tmpfile).decode(encoding="utf-8") html = '<div style="text-align: center;"><img src="data:image/png;base64,{}"></div>'.format( encoded) shutil.rmtree("dot") return html
def plotio(inputs, outputs, xs): wh = shutil.get_terminal_size((111, 11)) wh = (wh.columns - 1, min(3, wh.lines - 3)) param = {'max_depth': 2, 'eta': 1, 'objective': 'reg:squarederror'} for rounds in range(1, 9): best = xgb.train(param, xgb.DMatrix(inputs, label=outputs), num_boost_round=rounds) ys = list(best.predict(xgb.DMatrix([[x]]))[0] for x in xs) log(rounds, xs, ys) a = [[' ' for x in range(wh[0])] for u in range(wh[1])] map = plot(a, xs, ys) for *_, ax, ay in map.zip([i[0] for i in inputs], [o[0] for o in outputs]): a[ay][ax] = '\033[34m*\033[0m' print('\n'.join(''.join(y) for y in a)) gv = xgb.to_graphviz(best) open('plotio.pdf', 'wb').write(gv.pipe())
def extract_tree_threshold(trees): """ Take BST TREE and return a dict = {features index : [splits position 1, splits position 2, ...]} """ n = len(trees.get_dump()) thr = {} for t in range(n): gv = xgb.to_graphviz(trees, num_trees=t) body = gv.body for i in range(len(body)): for l in body[i].split('"'): if 'f' in l and '<' in l: tmp = l.split("<") if tmp[0] in thr: thr[tmp[0]].append(float(tmp[1])) else: thr[tmp[0]] = [float(tmp[1])] for k in thr: thr[k] = np.sort(np.array(thr[k])) return thr
import graphviz import xgboost as xgb import pandas as pd import os from sklearn.model_selection import train_test_split from sklearn.metrics import plot_confusion_matrix import matplotlib.pyplot as plt print(os.getcwd()) data = pd.read_csv('../Data/WineDataset/wine.data.csv') X = data.values[:, :-1] y = data.values[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) xgb_clf = xgb.XGBClassifier() xgb_clf.fit(X_train, y_train) dot_data_xg = xgb.to_graphviz(xgb_clf) graph = graphviz.Source(dot_data_xg) # graph.render("xgb_tree_wine") y_hat = xgb_clf.predict(X_test) plot_confusion_matrix(xgb_clf, X_test, y_test, display_labels=['class 0', 'class 1', 'class 2'], values_format='.0f') X_test_leaves = xgb_clf.apply(X_test) # plt.show(block=True) x = 0
import xgboost as xgb _, inputs, outputs = load_inputs() velocity = [o[1] for o in outputs] param = {'max_depth': 3} dtrain = xgb.DMatrix(inputs, label=velocity) best = xgb.train(param, dtrain, evals=[(dtrain, 'train')], num_boost_round=314) for count, (input, expected) in enumerate(zip(inputs, outputs)): prediction = best.predict(xgb.DMatrix([input]))[0] log(f"prediction {prediction} expected {expected[1]}") if 32 < count: break gv = xgb.to_graphviz(best) open('velocity.pdf', 'wb').write(gv.pipe()) if '--tf' in sys.argv: # Inference with TF from tensorflow import keras from tensorflow.keras import layers tf_inputs = keras.Input(shape=(5, ), name="state-and-action") x = layers.Dense(314, activation="relu", name="dense_1")(tf_inputs) x = layers.Dense(314, activation="relu", name="dense_2")(x) tf_outputs = layers.Dense(4, activation="softmax", name="state-prediction")(x) # 3 .. 0.18 # 31 .. 0.18 # 314 .. 0.18