def showPicture(xgb, num_trees=0):
    print('in %s' % sys._getframe().f_code.co_name)
    xgboost.to_graphviz(xgb, num_trees=0)
    img = xgboost.to_graphviz(xgb, num_trees=num_trees)  # 这行直接可以放在jupyter理正确显示
    img.format = 'png'
    img.view('image//xgb')
    return img
def show_tree_model(model, model_type='tree'):
    assert model_type in ['tree', 'randomforest', 'xgboost']
    from sklearn import tree
    import pydotplus
    import tempfile
    from skimage import io
    #assert isinstance(model, tree.DecisionTreeClassifier)
    if model_type == 'tree':
        fout = tempfile.NamedTemporaryFile(suffix='.png')
        dot_fname = '.'.join([fout.name.split('.')[0], 'dot'])
        dot_data = tree.export_graphviz(model, out_file=dot_fname)
        os.system('dot -Tpng %s -o %s' % (dot_fname, fout.name))
        show(show_image(io.imread(fout.name)))
        os.remove(dot_fname)
    elif model_type == 'randomforest':
        graph_plots = list()
        for tree_model in model.estimators_:
            fout = tempfile.NamedTemporaryFile(suffix='.png')
            dot_fname = '.'.join([fout.name.split('.')[0], 'dot'])
            dot_data = tree.export_graphviz(tree_model, out_file=dot_fname)
            os.system('dot -Tpng %s -o %s' % (dot_fname, fout.name))
            graph_plots.append(show_image(io.imread(fout.name)))
        grid = gridplot(list(utils.chunks(graph_plots, size=3)))
        show(grid)
        os.remove(dot_fname)
    else:
        #It must be xgboost
        import xgboost
        xgboost.to_graphviz(model)
        fout = tempfile.NamedTemporaryFile(suffix='.png')
        dot_fname = '.'.join([fout.name.split('.')[0], 'dot'])
        dot_data = tree.export_graphviz(tree_model, out_file=dot_fname)
        os.system('dot -Tpng %s -o %s' % (dot_fname, fout.name))
        show(show_image(io.imread(fout.name)))
        os.remove(dot_fname)
 def _log_trees(self, model):
     if self.log_tree is not None:
         # for "cv" log trees for each cv fold (different model is trained on each fold)
         if self.cv:
             for i, fold in enumerate(model.cvfolds):
                 trees = []
                 for j in self.log_tree:
                     tree = xgb.to_graphviz(fold.bst, num_trees=j)
                     _, ax = plt.subplots(1, 1, figsize=(self.tree_figsize, self.tree_figsize))
                     s = BytesIO()
                     s.write(tree.pipe(format="png"))
                     s.seek(0)
                     ax.imshow(image.imread(s))
                     ax.axis("off")
                     trees.append(neptune.types.File.as_image(ax.figure))
                 self.run[f"fold_{i}/plots/trees"] = neptune.types.FileSeries(trees)
                 plt.close("all")
         else:
             trees = []
             for j in self.log_tree:
                 tree = xgb.to_graphviz(model, num_trees=j)
                 _, ax = plt.subplots(1, 1, figsize=(self.tree_figsize, self.tree_figsize))
                 s = BytesIO()
                 s.write(tree.pipe(format="png"))
                 s.seek(0)
                 ax.imshow(image.imread(s))
                 ax.axis("off")
                 trees.append(neptune.types.File.as_image(ax.figure))
             self.run["plots/trees"] = neptune.types.FileSeries(trees)
             plt.close("all")
Exemple #4
0
def main(_):

    # creating training data
    data = np.random.rand(5, 10)  # 5 entities, each contains 10 features
    label = np.random.randint(2, size=5)  # binary target
    dtrain = xgb.DMatrix(data, label=label)

    #csr = scipy.sparse.csr_matrix((dat, (row, col))) #  data creation using scipy
    #dtrain = xgb.DMatrix(csr)

    # Booster parameters
    param = {
        'max_depth': 2,
        'eta': 1,
        'silent': 1,
        'objective': 'binary:logistic'
    }
    param['nthread'] = 4
    param['eval_metric'] = 'auc'

    # Evaluation parameters
    #param['eval_metric'] = ['auc', 'ams@0']
    plst = param.items()
    plst += [('eval_metric', 'ams@0')]

    # Testing data
    data = np.random.rand(7, 10)  # 7 entities, each contains 10 features
    dtest = xgb.DMatrix(data)

    # Specify validations set to watch performance
    evallist = [(dtest, 'eval'), (dtrain, 'train')]

    # Training
    num_round = 10
    bst = xgb.train(plst, dtrain, num_round, evallist)
    bst.save_model('0001.model')  # Saving the model
    bst.dump_model('dump.raw.txt')  # dump model
    bst.dump_model('dump.raw.txt',
                   'featmap.txt')  # dump model with feature map
    bst = xgb.Booster({'nthread': 4})  # init model
    bst.load_model('model.bin')  # load data

    # Testing
    ypred = bst.predict(dtest)
    #ypred = bst.predict(dtest, ntree_limit=bst.best_ntree_limit) # Use this one only if early stopping is enabled in training

    # Plotting
    xgb.plot_importance(bst)
    xgb.plot_tree(bst, num_trees=2)
    xgb.to_graphviz(bst, num_trees=2)

    file = open("results.txt", "w")
    file.write(ypred)
    file.close()
def model_plot():
    import matplotlib.pyplot as plt
    bst = xgb.Booster({"nthread": 4})  # init model
    bst.load_model("../data/model/xgb.model")  # load data
    # xgb.plot_importance(bst)
    # plt.show()
    # To plot the output tree via matplotlib, use plot_tree, specifying the ordinal number of the target tree.
    xgb.plot_tree(bst, num_trees=2)
    plt.show()
    # When using IPython, you can use the to_graphviz function, which converts the target tree to a graphviz instance.
    # The graphviz instance is automatically rendered in IPython.
    xgb.to_graphviz(bst, num_trees=2)
def visualizationPart(model_xgb):
    # 输出特征重要性
    # 树模型对象,条形图高度,显示排序后的最大特征数量,X轴文字,grid不显示网格
    # importance_type = weight是特征在树中出现的次数,gain是使用特征分裂的平均值增益,cover是作为分裂节点的覆盖的样本比例
    xgb.plot_importance(model_xgb,
                        height=0.5,
                        importance_type='gain',
                        max_num_features=10,
                        xlabel='Gain Split',
                        grid=False)
    plt.show()

    # 输出树形规则图
    # 树模型对象, 树的个数0-9, yes_color为真的线条颜色
    xgb.to_graphviz(model_xgb,
                    num_trees=1,
                    yes_color='#638e5e',
                    no_color='#a40000').view()

    # 获取数据
    importance = model_xgb.get_booster().get_score(importance_type='weight',
                                                   fmap='')
    tuples = [(k, importance[k]) for k in importance]
    tuples = sorted(tuples, key=lambda x: x[1], reverse=True)
    labels, values = zip(*tuples)
    print(importance)
    print(tuples)
    print(labels)
    print(values)

    # 词云
    mywordcloud = WordCloud()
    # 词云图的轮廓也可以选择,有 'circle', 'cardioid', 'diamond', 'triangle-forward', 'triangle', 'pentagon',默认的词云轮廓为circle
    mywordcloud.add('', tuples, shape='pentagon')
    # 渲染图片
    # 指定渲染图片存放的路径
    mywordcloud.render('词云.html')

    # 环形饼图
    circular_pie_chart = (
        Pie(init_opts=opts.InitOpts(width="1600px",
                                    height="1000px"))  # 图形的大小设置
        .add(
            series_name="特征重要性",
            data_pair=[list(z) for z in zip(labels, values)],
            radius=["15%", "50%"],  # 饼图内圈和外圈的大小比例
            center=["30%", "40%"],  # 饼图的位置:左边距和上边距
            label_opts=opts.LabelOpts(is_show=True),  # 显示数据和百分比
        ).set_global_opts(legend_opts=opts.LegendOpts(
            pos_left="left", orient="vertical"))  # 图例在左边和垂直显示
        .set_series_opts(tooltip_opts=opts.TooltipOpts(
            trigger="item", formatter="{a} <br/>{b}: {c} ({d}%)"), ))
    circular_pie_chart.render('环形饼图.html')
def main():
    # Multiclass classification
    xgb_model = xgb.XGBClassifier(objective="multi:softprob", random_state=42)
    xgb_model.fit(X, y)
    y_pred = xgb_model.predict(X)
    # print(confusion_matrix(y, y_pred))

    # plotting
    xgb_model = xgb.XGBClassifier(objective="binary:logistic", random_state=20002, eval_metric="auc")
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20002)
    xgb_model.fit(X_train, y_train)
    # xgb.plot_importance(xgb_model)
    # converts the target tree to a graphviz instance
    xgb.to_graphviz(xgb_model)
    xgb_model.predict(X_test)
Exemple #8
0
def printXGBoostTree(fn,
                     xgBoostTree,
                     numTrees=2,
                     yesColor='#0000FF',
                     noColor='#FF0000'):
    """
    Creates a pdf for the given XGBoost Tree from xgboost package

    :param fn: filename
    :param xgBoostTree: XGBoost tree object
    :param numTrees (Optional, Default = 2) : Number of decision trees to draw
    :param yesColor (Optional, Default = '#0000FF') : Color of correct output classes
    :param noColor (Optional, Default = '#FF0000'): Color of wrong output classes
    """
    from subprocess import check_call
    if checkModuleExists("xgboost"):
        import xgboost as xgb
    else:
        print("Requires xgboost library. Cannot print xgboost tree")
        return

    with open(fn + ".dot", "w") as file:
        val = xgb.to_graphviz(xgBoostTree,
                              num_trees=numTrees,
                              yes_color=yesColor,
                              no_color=noColor)

    val.save(fn + ".dot")
    check_call(["dot", "-Tpdf", fn + ".dot", "-o", fn + ".pdf"])
Exemple #9
0
    def to_graphviz(self, num_trees=0, rankdir='UT',
                    yes_color='#0000FF', no_color='#FF0000', **kwargs):

        """Convert specified tree to graphviz instance. IPython can automatically plot the
        returned graphiz instance. Otherwise, you shoud call .render() method
        of the returned graphiz instance.

        Parameters
        ----------
        num_trees : int, default 0
            Specify the ordinal number of target tree
        rankdir : str, default "UT"
            Passed to graphiz via graph_attr
        yes_color : str, default '#0000FF'
            Edge color when meets the node condigion.
        no_color : str, default '#FF0000'
            Edge color when doesn't meet the node condigion.
        kwargs :
            Other keywords passed to graphviz graph_attr

        Returns
        -------
        ax : matplotlib Axes
        """

        import xgboost as xgb

        if not isinstance(self._df.estimator, xgb.XGBModel):
            raise ValueError('estimator must be XGBRegressor or XGBClassifier')
        return xgb.to_graphviz(self._df.estimator.booster(),
                               num_trees=num_trees, rankdir=rankdir,
                               yes_color=yes_color, no_color=no_color, **kwargs)
def test_sklearn_plotting():
    tm._skip_if_no_sklearn()
    from sklearn.datasets import load_iris

    iris = load_iris()

    classifier = xgb.XGBClassifier()
    classifier.fit(iris.data, iris.target)

    import matplotlib
    matplotlib.use('Agg')

    from matplotlib.axes import Axes
    from graphviz import Digraph

    ax = xgb.plot_importance(classifier)
    assert isinstance(ax, Axes)
    assert ax.get_title() == 'Feature importance'
    assert ax.get_xlabel() == 'F score'
    assert ax.get_ylabel() == 'Features'
    assert len(ax.patches) == 4

    g = xgb.to_graphviz(classifier, num_trees=0)
    assert isinstance(g, Digraph)

    ax = xgb.plot_tree(classifier, num_trees=0)
    assert isinstance(ax, Axes)
def test_sklearn_plotting():
    tm._skip_if_no_sklearn()
    from sklearn.datasets import load_iris

    iris = load_iris()

    classifier = xgb.XGBClassifier()
    classifier.fit(iris.data, iris.target)

    import matplotlib
    matplotlib.use('Agg')

    from matplotlib.axes import Axes
    from graphviz import Digraph

    ax = xgb.plot_importance(classifier)
    assert isinstance(ax, Axes)
    assert ax.get_title() == 'Feature importance'
    assert ax.get_xlabel() == 'F score'
    assert ax.get_ylabel() == 'Features'
    assert len(ax.patches) == 4

    g = xgb.to_graphviz(classifier, num_trees=0)
    assert isinstance(g, Digraph)

    ax = xgb.plot_tree(classifier, num_trees=0)
    assert isinstance(ax, Axes)
def main():
    data = LoadFile(
        p=r'F:\ProximityDetection\Stacking\dataset_PNY\PNY_fft_cl_1.pickle')
    imp = Imputer(missing_values='NaN',
                  strategy='mean',
                  axis=0,
                  verbose=0,
                  copy=True)
    dataset_sim = imp.fit_transform(data)
    XGBoost = multi_XGBoost(max_depth=2,
                            learning_rate=1e-2,
                            n_estimators=300,
                            objective='binary:logistic',
                            nthread=4,
                            gamma=0.1,
                            min_child_weight=1,
                            subsample=1,
                            reg_lambda=2,
                            scale_pos_weight=1.)
    training_main(model=XGBoost, dataset_sim=dataset_sim)
    digraph = xgb.to_graphviz(XGBoost, num_trees=2)
    digraph.format = 'png'
    digraph.view('./ProximityDetection_xgb')
    xgb.plot_importance(XGBoost)
    plt.show()
Exemple #13
0
    def save_model(self):
        with open(path + '/agent_{0}.pkl'.format(self.b_id), 'wb') as f:
            pickle.dump(self.model, f)
        # for count, tree_in_forest in enumerate(self.model.estimators_):
        #     export_graphviz(tree_in_forest, out_file=path + '/agent_{0}_{1}.txt'.format(self.get_id(), count),
        #                         feature_names=self.feature_names)
        # export_graphviz(self.model, out_file=path + '/agent_{0}.txt'.format(self.get_id()), feature_names=self.feature_names)

        a = to_graphviz(self.model, num_trees=self.model.best_iteration)
        with open(path + '/agent_{0}.txt'.format(self.get_id()), 'w') as f:
            f.write(str(a))
def save_best_model(phase_number, phase_name, phase_params, phase_predictor,
                    x_train, y_train):
    print(f"Save best model for phase: {phase_number}: {phase_name}")
    print(f"Column number: {x_train.num_col()}")
    print(
        f"Best number of rounds: {best_model_iterations[phase_number]} for phase: {phase_number}: {phase_name}"
    )
    best_model = xgb.train(phase_params,
                           x_train,
                           num_boost_round=best_model_iterations[phase_number],
                           evals=[(y_train, phase_predictor)])

    xgb.plot_importance(best_model)

    # converts the target tree to a graphviz instance
    xgb.to_graphviz(best_model, num_trees=best_model.best_iteration)

    print(f"Saving model to phase_{phase_number}_model.model")
    best_model.save_model(
        f"models-saved/xgboost/phase_{phase_number}_model.model")
Exemple #15
0
def _log_trees(booster, tree_list, img_name, npt, **kwargs):
    with tempfile.TemporaryDirectory(dir='.') as d:
        for i in tree_list:
            file_name = 'tree_{}'.format(i)
            tree = xgb.to_graphviz(booster=booster, num_trees=i, **kwargs)  # pylint: disable=E1101
            tree.render(filename=file_name,
                        directory=d,
                        view=False,
                        format='png')
            npt.log_image(img_name,
                          os.path.join(d, '{}.png'.format(file_name)),
                          image_name=file_name)
Exemple #16
0
    def evaluate(self, dtrain_full, dtest_full, output_file):
        df_test = pd.read_csv(tf.gfile.Open(test_file_name),
                              names=[] + CSV_COLUMNS[0:1] + CSV_COLUMNS[2:],
                              skipinitialspace=True,
                              engine="python",
                              skiprows=1)

        bst = xgb.train(self.param, dtrain_full, self.num_round)
        prediction = bst.predict(dtest_full)
        bst.save_model('0001.model')
        xgb.to_graphviz(bst, num_trees=2)
        xgb.plot_importance(bst)
        xgb.plot_tree(bst, num_trees=2)

        timeStamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        f = open(output_file, 'w')
        f.write("id,target\n")
        i = 0
        for i, p in enumerate(prediction):
            f.write("{},{}\n".format(df_test['id'][i], p))
        f.close()
 def get_details_xgboost_model(model_path):
     """ Reference: https://xgboost.readthedocs.io/en/latest/python/python_api.html#xgboost.to_graphviz, https://xgboost.readthedocs.io/en/latest/python/python_api.html#xgboost.train """
     model = ModelVisualizer.load_pkl_model(model_path)
     booster_dots = []
     for i in range(model.best_ntree_limit):
         dot_model = xgboost.to_graphviz(model,
                                         num_trees=i,
                                         yes_color='#000000',
                                         no_color='#000000')
         booster_dots += [
             ModelVisualizer.visualize_dot_model(dot_model.source)
         ]
     return {"boosters": booster_dots[:5]}  ### TEMP [ :5]
Exemple #18
0
    def test_plotting(self):
        m = xgb.DMatrix(dpath)
        booster = xgb.train(
            {
                'max_depth': 2,
                'eta': 1,
                'objective': 'binary:logistic'
            },
            m,
            num_boost_round=2)

        ax = xgb.plot_importance(booster)
        assert isinstance(ax, Axes)
        assert ax.get_title() == 'Feature importance'
        assert ax.get_xlabel() == 'F score'
        assert ax.get_ylabel() == 'Features'
        assert len(ax.patches) == 4

        ax = xgb.plot_importance(booster,
                                 color='r',
                                 title='t',
                                 xlabel='x',
                                 ylabel='y')
        assert isinstance(ax, Axes)
        assert ax.get_title() == 't'
        assert ax.get_xlabel() == 'x'
        assert ax.get_ylabel() == 'y'
        assert len(ax.patches) == 4
        for p in ax.patches:
            assert p.get_facecolor() == (1.0, 0, 0, 1.0)  # red

        ax = xgb.plot_importance(booster,
                                 color=['r', 'r', 'b', 'b'],
                                 title=None,
                                 xlabel=None,
                                 ylabel=None)
        assert isinstance(ax, Axes)
        assert ax.get_title() == ''
        assert ax.get_xlabel() == ''
        assert ax.get_ylabel() == ''
        assert len(ax.patches) == 4
        assert ax.patches[0].get_facecolor() == (1.0, 0, 0, 1.0)  # red
        assert ax.patches[1].get_facecolor() == (1.0, 0, 0, 1.0)  # red
        assert ax.patches[2].get_facecolor() == (0, 0, 1.0, 1.0)  # blue
        assert ax.patches[3].get_facecolor() == (0, 0, 1.0, 1.0)  # blue

        g = xgb.to_graphviz(booster, num_trees=0)
        assert isinstance(g, Source)

        ax = xgb.plot_tree(booster, num_trees=0)
        assert isinstance(ax, Axes)
Exemple #19
0
    def test_plotting(self):
        bst2 = xgb.Booster(model_file='xgb.model')
        # plotting

        import matplotlib
        matplotlib.use('Agg')

        from matplotlib.axes import Axes
        from graphviz import Digraph

        ax = xgb.plot_importance(bst2)
        assert isinstance(ax, Axes)
        assert ax.get_title() == 'Feature importance'
        assert ax.get_xlabel() == 'F score'
        assert ax.get_ylabel() == 'Features'
        assert len(ax.patches) == 4

        ax = xgb.plot_importance(bst2,
                                 color='r',
                                 title='t',
                                 xlabel='x',
                                 ylabel='y')
        assert isinstance(ax, Axes)
        assert ax.get_title() == 't'
        assert ax.get_xlabel() == 'x'
        assert ax.get_ylabel() == 'y'
        assert len(ax.patches) == 4
        for p in ax.patches:
            assert p.get_facecolor() == (1.0, 0, 0, 1.0)  # red

        ax = xgb.plot_importance(bst2,
                                 color=['r', 'r', 'b', 'b'],
                                 title=None,
                                 xlabel=None,
                                 ylabel=None)
        assert isinstance(ax, Axes)
        assert ax.get_title() == ''
        assert ax.get_xlabel() == ''
        assert ax.get_ylabel() == ''
        assert len(ax.patches) == 4
        assert ax.patches[0].get_facecolor() == (1.0, 0, 0, 1.0)  # red
        assert ax.patches[1].get_facecolor() == (1.0, 0, 0, 1.0)  # red
        assert ax.patches[2].get_facecolor() == (0, 0, 1.0, 1.0)  # blue
        assert ax.patches[3].get_facecolor() == (0, 0, 1.0, 1.0)  # blue

        g = xgb.to_graphviz(bst2, num_trees=0)
        assert isinstance(g, Digraph)

        ax = xgb.plot_tree(bst2, num_trees=0)
        assert isinstance(ax, Axes)
Exemple #20
0
def main_3(cols=['乳酸脱氢酶', '淋巴细胞(%)', '超敏C反应蛋白']):
    data_df_unna, data_pre_df = data_preprocess()

    # cols =  ['乳酸脱氢酶','淋巴细胞(%)','超敏C反应蛋白']
    cols.append('Type2')

    Tets_Y = data_pre_df.reset_index()[['PATIENT_ID', '出院方式']].copy()
    Tets_Y = Tets_Y.rename(columns={'PATIENT_ID': 'ID', '出院方式': 'Y'})
    Tets_Y['Y'] = (Tets_Y['Y'].map({'治愈': 0, '好转': 0, '死亡': 1}))
    y_true = Tets_Y['Y'].values

    x_col = cols[:-1]
    y_col = cols[-1]
    x_np = data_df_unna[x_col].values
    y_np = data_df_unna[y_col].values

    x_test = data_pre_df[x_col].values

    X_train, X_val, y_train, y_val = train_test_split(x_np,
                                                      y_np,
                                                      test_size=0.3,
                                                      random_state=6)
    model = xgb.XGBClassifier(
        max_depth=3,
        n_estimators=1,
    )
    model.fit(X_train, y_train)

    #训练集混淆矩阵
    pred_train = model.predict(X_train)
    show_confusion_matrix(y_train, pred_train)
    print(classification_report(y_train, pred_train))

    #验证集混淆矩阵
    pred_val = model.predict(X_val)
    show_confusion_matrix(y_val, pred_val)
    print(classification_report(y_val, pred_val))
    #测试集混淆矩阵
    pred_test = model.predict(x_test)
    show_confusion_matrix(y_true, pred_test)
    print(classification_report(y_true, pred_test))

    #单树可视化
    ceate_feature_map(cols[:-1])
    graph = xgb.to_graphviz(model,
                            fmap='xgb.fmap',
                            num_trees=0,
                            **{'size': str(10)})
    graph.render(filename='single-tree.dot')
    def pickle_xgb(self, pickle_path, tree_path):
        model_dict = {
            'clf': self.clf,
            'params': self.params,
            'vectorizer': self.vectorizer,
            'test_pred': (self.y_test, self.pred),
            'self_prob_pred': self.pred_proba,
            'confusion_matrix': self.cm
        }
        labels = ['n_estimators', 'max_depth', 'learning_rate']
        val = [self.params.get(v) for v in labels]

        file_name = "xgb_ntrees_{0}_depth_{1}_eta_{2}".format(*val)
        pickle_path += file_name
        pickle.dump(model_dict, open(pickle_path + file_name + ".p", "wb"))

        # Write Trees
        self.clf.get_booster(
        ).feature_names = self.vectorizer.get_feature_names()
        tree_path += file_name
        xgb.to_graphviz(self.clf, num_trees=1).render(tree_path + ".gv")
        xgb.plot_importance(self.clf, max_num_features=20)
        plt.show()
        return
def main():
    out_dir = sys.argv[1]
    model_file = sys.argv[2]
    bst = xgb.Booster(model_file=model_file)
    num_trees = len(bst.get_dump())
    if len(sys.argv) > 3:
        with open(sys.argv[3]) as f:
            features = [line.split() for line in f.readlines()]
            bst.feature_names = [f[1] for f in features]
            bst.feature_types = [f[2] for f in features]
    for i in range(num_trees):
        sys.stdout.write('\rtree id: %d' % i)
        graph = xgb.to_graphviz(bst, i)
        graph.format = 'png'
        graph.render(str(i), directory=out_dir, view=False, cleanup=True)
def model_train(data):
    with open('fmap.csv', encoding='utf-8') as f:
        fmap = f.read().strip().split(',')
        with open('fmap.txt', 'w', encoding='utf-8') as w:
            count = 0
            for feature in fmap:
                w.write(str(count) + '\t' + feature + '\tq\n')
                count += 1
            w.close()
        bst = xgb.Booster()
        bst.load_model('xgboost_3.model')
        score_weight = bst.get_score('fmap.txt', 'weight')
        score_gain = bst.get_score('fmap.txt', 'gain')
        score_importance = bst.get_fscore('fmap.txt')
        with open('./result/feature_result_3.csv',
                  'w',
                  encoding='utf-8',
                  newline='') as w:
            writer = csv.writer(w)
            writer.writerow(
                ['f_name', 'score_weight', 'score_gain', 'score_importance'])
            for f_name, importance in score_importance.items():
                split_value = bst.get_split_value_histogram(f_name, 'fmap.txt')
                writer.writerow([
                    f_name, score_weight[f_name], score_gain[f_name],
                    score_importance[f_name]
                ])
            w.close()

        # fig, ax = plt.subplots()
        # fig.set_size_inches(60, 30)
        # xgb.plot_tree(bst, ax=ax, num_trees=0, rankdir='LR')

        # fig.savefig('tree_2.jpg', dpi=100)
        # fig.show()
        xgb.to_graphviz(bst, num_trees=1, rankdir='RL').render()
Exemple #24
0
    def fit(self, dataset: Dataset):
        self.set_dataset(dataset)

        self.trees = []
        self.clf.fit(self.dataset.X, self.dataset.y)

        n_classes = self.dataset.num_classes()
        n_estimators = self.clf.n_estimators
        n_trees = n_estimators * n_classes if n_classes > 2 else n_estimators

        for tree_idx in range(0, n_trees):
            tree = xgb.to_graphviz(self.clf, num_trees=tree_idx)
            parsed_tree = XGBoostTree.parse(str(tree), self.dataset)

            self.trees.append(parsed_tree)
Exemple #25
0
    def run_categorical(self, tree_method: str) -> None:
        X, y = tm.make_categorical(1000, 31, 19, onehot=False)
        reg = xgb.XGBRegressor(enable_categorical=True,
                               n_estimators=10,
                               tree_method=tree_method)
        reg.fit(X, y)
        trees = reg.get_booster().get_dump(dump_format="json")
        for tree in trees:
            j_tree = json.loads(tree)
            assert "leaf" in j_tree.keys() or isinstance(
                j_tree["split_condition"], list)

        graph = xgb.to_graphviz(reg, num_trees=len(j_tree) - 1)
        assert isinstance(graph, Source)
        ax = xgb.plot_tree(reg, num_trees=len(j_tree) - 1)
        assert isinstance(ax, Axes)
Exemple #26
0
def train_xgb(data_x, data_y):
    # xgboost 不需要归一化
    train_x, temp_x, train_y, temp_y = train_test_split(data_x, data_y, test_size=0.3, shuffle=False, stratify=None)
    val_x, test_x, val_y, test_y = train_test_split(temp_x, temp_y, test_size=0.5, shuffle=False, stratify=None)
    print("X_train : " + str(train_x.shape) + "  X_test : " + str(test_x.shape))
    print("y_train : " + str(train_y.shape) + "  y_test : " + str(test_y.shape))
    # 训练
    dtrain = xgb.DMatrix(train_x, label=train_y)
    dval = xgb.DMatrix(val_x, label=val_y)
    dtest = xgb.DMatrix(test_x, label=test_y)
    evallist = [(dval, 'eval'), (dtrain, 'train')]
    param = {'booster': 'gbtree', 'silent': 1, 'nthread': -1,
             'objective': 'reg:linear', 'eval_metric': 'rmse',
             'eta': 0.01, 'gamma': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.8,
             'colsample_bytree': 0.8, 'lambda': 0.1, 'alpha': 0, 'tree_method': 'auto', 'predictor': 'cpu_predictor',
             }
    bst = xgb.train(param, dtrain, num_boost_round=500, evals=evallist, early_stopping_rounds=10, verbose_eval=20)
    # 反归一化
    train_y_pred = bst.predict(dtrain)
    val_y_pred = bst.predict(dval)
    test_y_pred = bst.predict(dtest)
    '''
    train_y_pred = np.expm1(train_y_pred)
    train_y = np.expm1(train_y)
    val_y_pred = np.expm1(val_y_pred)
    val_y = np.expm1(val_y)
    test_y_pred = np.expm1(test_y_pred)
    test_y = np.expm1(test_y)

    test_y_pred = np.concatenate((val_y_pred, test_y_pred), axis=0)
    test_y = np.concatenate((val_y, test_y), axis=0)
    include = pd.DataFrame(data={'train_y': train_y, 'train_y_pred': train_y_pred})
    include.to_excel('E:/Pycharm/Mywork/aqi/log/result/train.xlsx')
    exclude = pd.DataFrame(data={'test_y': test_y, 'test_y_pred': test_y_pred})
    exclude.to_excel('E:/Pycharm/Mywork/aqi/log/result/test.xlsx')
    '''
    importance(bst, 24)
    xgb.plot_tree(bst, fmap='Gradient boosting tree', num_trees=0, rankdir='UT', ax=None)
    inst = xgb.to_graphviz(bst, fmap='', num_trees=0, rankdir='UT', yes_color='#0000FF', no_color='#FF0000')
    inst.render()
    rmse = math.sqrt(mse(train_y, train_y_pred))
    r2 = r2_score(train_y, train_y_pred)
    print('Train RMSE = %f, R^2 = %f' % (rmse, r2))
    rmse = math.sqrt(mse(test_y, test_y_pred))
    r2 = r2_score(test_y, test_y_pred)
    print('Test RMSE = %f, R^2 = %f' % (rmse, r2))
    drawing(test_y, test_y_pred)
Exemple #27
0
def trees_graphviz_plot(model_file, featmap_file):
    model = xgb.Booster(model_file=model_file)
    model_name = model_file.split("/")[-1]

    dot = xgb.to_graphviz(model, fmap=featmap_file)

    dot.render(filename=model_name, directory="dot", format="png")

    with open("dot/" + model_name + ".png", "rb") as fp:
        tmpfile = fp.read()

    encoded = base64.b64encode(tmpfile).decode(encoding="utf-8")
    html = '<div style="text-align: center;"><img src="data:image/png;base64,{}"></div>'.format(
        encoded)

    shutil.rmtree("dot")

    return html
Exemple #28
0
    def test_plotting(self):
        bst2 = xgb.Booster(model_file='xgb.model')
        # plotting

        import matplotlib
        matplotlib.use('Agg')

        from matplotlib.axes import Axes
        from graphviz import Digraph

        ax = xgb.plot_importance(bst2)
        assert isinstance(ax, Axes)
        assert ax.get_title() == 'Feature importance'
        assert ax.get_xlabel() == 'F score'
        assert ax.get_ylabel() == 'Features'
        assert len(ax.patches) == 4

        ax = xgb.plot_importance(bst2, color='r',
                                 title='t', xlabel='x', ylabel='y')
        assert isinstance(ax, Axes)
        assert ax.get_title() == 't'
        assert ax.get_xlabel() == 'x'
        assert ax.get_ylabel() == 'y'
        assert len(ax.patches) == 4
        for p in ax.patches:
            assert p.get_facecolor() == (1.0, 0, 0, 1.0) # red


        ax = xgb.plot_importance(bst2, color=['r', 'r', 'b', 'b'],
                                 title=None, xlabel=None, ylabel=None)
        assert isinstance(ax, Axes)
        assert ax.get_title() == ''
        assert ax.get_xlabel() == ''
        assert ax.get_ylabel() == ''
        assert len(ax.patches) == 4
        assert ax.patches[0].get_facecolor() == (1.0, 0, 0, 1.0) # red
        assert ax.patches[1].get_facecolor() == (1.0, 0, 0, 1.0) # red
        assert ax.patches[2].get_facecolor() == (0, 0, 1.0, 1.0) # blue
        assert ax.patches[3].get_facecolor() == (0, 0, 1.0, 1.0) # blue

        g = xgb.to_graphviz(bst2, num_trees=0)
        assert isinstance(g, Digraph)
        ax = xgb.plot_tree(bst2, num_trees=0)
        assert isinstance(ax, Axes)
Exemple #29
0
def plotio(inputs, outputs, xs):
    wh = shutil.get_terminal_size((111, 11))
    wh = (wh.columns - 1, min(3, wh.lines - 3))
    param = {'max_depth': 2, 'eta': 1, 'objective': 'reg:squarederror'}
    for rounds in range(1, 9):
        best = xgb.train(param,
                         xgb.DMatrix(inputs, label=outputs),
                         num_boost_round=rounds)
        ys = list(best.predict(xgb.DMatrix([[x]]))[0] for x in xs)
        log(rounds, xs, ys)
        a = [[' ' for x in range(wh[0])] for u in range(wh[1])]
        map = plot(a, xs, ys)
        for *_, ax, ay in map.zip([i[0] for i in inputs],
                                  [o[0] for o in outputs]):
            a[ay][ax] = '\033[34m*\033[0m'
        print('\n'.join(''.join(y) for y in a))

    gv = xgb.to_graphviz(best)
    open('plotio.pdf', 'wb').write(gv.pipe())
Exemple #30
0
def extract_tree_threshold(trees):
    """ Take BST TREE and return a dict = {features index : [splits position 1, splits position 2, ...]}
    """
    n = len(trees.get_dump())
    thr = {}
    for t in range(n):
        gv = xgb.to_graphviz(trees, num_trees=t)
        body = gv.body
        for i in range(len(body)):
            for l in body[i].split('"'):
                if 'f' in l and '<' in l:
                    tmp = l.split("<")
                    if tmp[0] in thr:
                        thr[tmp[0]].append(float(tmp[1]))
                    else:
                        thr[tmp[0]] = [float(tmp[1])]
    for k in thr:
        thr[k] = np.sort(np.array(thr[k]))
    return thr
Exemple #31
0
def printXGBoostTree(fn, xgBoostTree, numTrees=2, yesColor='#0000FF', noColor='#FF0000'):
    """
    Creates a pdf for the given XGBoost Tree from xgboost package

    :param fn: filename
    :param xgBoostTree: XGBoost tree object
    :param numTrees (Optional, Default = 2) : Number of decision trees to draw
    :param yesColor (Optional, Default = '#0000FF') : Color of correct output classes
    :param noColor (Optional, Default = '#FF0000'): Color of wrong output classes
    """
    from subprocess import check_call
    if checkModuleExists("xgboost"):
        import xgboost as xgb
    else:
        print("Requires xgboost library. Cannot print xgboost tree")
        return

    with open(fn + ".dot", "w") as file:
        val = xgb.to_graphviz(xgBoostTree, num_trees=numTrees, yes_color=yesColor, no_color=noColor)

    val.save(fn + ".dot")
    check_call(["dot", "-Tpdf", fn + ".dot", "-o", fn + ".pdf"])
Exemple #32
0
import graphviz
import xgboost as xgb
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix
import matplotlib.pyplot as plt

print(os.getcwd())
data = pd.read_csv('../Data/WineDataset/wine.data.csv')
X = data.values[:, :-1]
y = data.values[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
xgb_clf = xgb.XGBClassifier()
xgb_clf.fit(X_train, y_train)
dot_data_xg = xgb.to_graphviz(xgb_clf)
graph = graphviz.Source(dot_data_xg)
# graph.render("xgb_tree_wine")
y_hat = xgb_clf.predict(X_test)
plot_confusion_matrix(xgb_clf,
                      X_test,
                      y_test,
                      display_labels=['class 0', 'class 1', 'class 2'],
                      values_format='.0f')
X_test_leaves = xgb_clf.apply(X_test)
# plt.show(block=True)
x = 0
Exemple #33
0
    import xgboost as xgb
    _, inputs, outputs = load_inputs()
    velocity = [o[1] for o in outputs]
    param = {'max_depth': 3}
    dtrain = xgb.DMatrix(inputs, label=velocity)
    best = xgb.train(param,
                     dtrain,
                     evals=[(dtrain, 'train')],
                     num_boost_round=314)
    for count, (input, expected) in enumerate(zip(inputs, outputs)):
        prediction = best.predict(xgb.DMatrix([input]))[0]
        log(f"prediction {prediction} expected {expected[1]}")
        if 32 < count:
            break

    gv = xgb.to_graphviz(best)
    open('velocity.pdf', 'wb').write(gv.pipe())

if '--tf' in sys.argv:  # Inference with TF
    from tensorflow import keras
    from tensorflow.keras import layers

    tf_inputs = keras.Input(shape=(5, ), name="state-and-action")
    x = layers.Dense(314, activation="relu", name="dense_1")(tf_inputs)
    x = layers.Dense(314, activation="relu", name="dense_2")(x)
    tf_outputs = layers.Dense(4, activation="softmax",
                              name="state-prediction")(x)
    #   3 .. 0.18
    #  31 .. 0.18
    # 314 .. 0.18