Example #1
0
def _get_feature_path_importance_sklearn_plot(features,
                                              feature_path_importance, figsize,
                                              colors, fontsize, fontname,
                                              grid):
    colors = adjust_colors(colors)
    fig, ax = plt.subplots(figsize=figsize)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(.3)
    ax.spines['bottom'].set_linewidth(.3)
    ax.set_xticks(range(0, len(features)))
    ax.set_xticklabels(features)

    barcontainers = ax.bar(range(0, len(features)),
                           feature_path_importance,
                           color=colors["hist_bar"],
                           lw=.3,
                           align='center',
                           width=1)
    for rect in barcontainers.patches:
        rect.set_linewidth(.5)
        rect.set_edgecolor(colors['rect_edge'])
    ax.set_xlabel("features",
                  fontsize=fontsize,
                  fontname=fontname,
                  color=colors['axis_label'])
    ax.set_ylabel("feature importance",
                  fontsize=fontsize,
                  fontname=fontname,
                  color=colors['axis_label'])
    ax.grid(b=grid)

    return ax
Example #2
0
def draw_piechart(counts, size, colors, filename, label=None, fontname="Arial", graph_colors=None):

    graph_colors = adjust_colors(graph_colors)
    n_nonzero = np.count_nonzero(counts)
    i = np.nonzero(counts)[0][0]
    if n_nonzero==1:
        counts = [counts[i]]
        colors = [colors[i]]
    tweak = size * .01
    fig, ax = plt.subplots(1, 1, figsize=(size, size))
    ax.axis('equal')
    # ax.set_xlim(0 - tweak, size + tweak)
    # ax.set_ylim(0 - tweak, size + tweak)
    ax.set_xlim(0, size-10*tweak)
    ax.set_ylim(0, size-10*tweak)
    # frame=True needed for some reason to fit pie properly (ugh)
    # had to tweak the crap out of this to get tight box around piechart :(
    wedges, _ = ax.pie(counts, center=(size/2-6*tweak,size/2-6*tweak), radius=size/2, colors=colors, shadow=False, frame=True)
    for w in wedges:
        w.set_linewidth(.5)
        w.set_edgecolor(graph_colors['pie'])

    ax.axis('off')
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)

    if label is not None:
        ax.text(size/2-6*tweak, -10*tweak, label,
                horizontalalignment='center',
                verticalalignment='top',
                fontsize=9, color=graph_colors['text'], fontname=fontname)

    # plt.tight_layout()
    plt.savefig(filename, bbox_inches='tight', pad_inches=0)
    plt.close()
Example #3
0
def rtreeviz_bivar_heatmap(ax, X_train, y_train, max_depth, feature_names,
                           fontsize=14, ticks_fontsize=12, fontname="Arial",
                           show={'title'},
                           n_colors_in_map=100,
                           colors=None
                           ) -> tree.DecisionTreeClassifier:
    """
    Show tesselated 2D feature space for bivariate regression tree. X_train can
    have lots of features but features lists indexes of 2 features to train tree with.
    """
    if isinstance(X_train,pd.DataFrame):
        X_train = X_train.values
    if isinstance(y_train, pd.Series):
        y_train = y_train.values

    colors = adjust_colors(colors)

    rt = tree.DecisionTreeRegressor(max_depth=max_depth)
    rt.fit(X_train, y_train)

    y_lim = np.min(y_train), np.max(y_train)
    y_range = y_lim[1] - y_lim[0]
    color_map = [rgb2hex(c.rgb, force_long=True) for c in Color(colors['color_map_min']).range_to(Color(colors['color_map_max']),
                                                                                                  n_colors_in_map)]

    shadow_tree = ShadowDecTree(rt, X_train, y_train, feature_names=feature_names)

    tesselation = shadow_tree.tesselation()

    for node,bbox in tesselation:
        pred = node.prediction()
        color = color_map[int(((pred - y_lim[0]) / y_range) * (n_colors_in_map-1))]
        x = bbox[0]
        y = bbox[1]
        w = bbox[2] - bbox[0]
        h = bbox[3] - bbox[1]
        rect = patches.Rectangle((x, y), w, h, 0, linewidth=.3, alpha=.5,
                                 edgecolor=colors['edge'], facecolor=color)
        ax.add_patch(rect)

    color_map = [color_map[int(((y-y_lim[0])/y_range)*(n_colors_in_map-1))] for y in y_train]
    x, y, z = X_train[:,0], X_train[:,1], y_train
    ax.scatter(x, y, marker='o', alpha=.95, c=color_map, edgecolor=colors['scatter_edge'], lw=.3)

    ax.set_xlabel(f"{feature_names[0]}", fontsize=fontsize, fontname=fontname, color=colors['axis_label'])
    ax.set_ylabel(f"{feature_names[1]}", fontsize=fontsize, fontname=fontname, color=colors['axis_label'])

    ax.tick_params(axis='both', which='major', width=.3, labelcolor=colors['tick_label'], labelsize=ticks_fontsize)

    if 'title' in show:
        accur = rt.score(X_train, y_train)
        title = f"Regression tree depth {max_depth}, training $R^2$={accur:.3f}"
        plt.title(title, fontsize=fontsize, color=colors['title'])

    return None
Example #4
0
def regr_leaf_viz(node : ShadowDecTreeNode,
                  y : (pd.Series,np.ndarray),
                  target_name,
                  filename:str=None,
                  y_range=None,
                  precision=1,
                  label_fontsize: int = 9,
                  ticks_fontsize: int = 8,
                  fontname:str="Arial",
                  colors=None):

    colors = adjust_colors(colors)

    samples = node.samples()
    y = y[samples]

    figsize = (.75, .8)

    fig, ax = plt.subplots(1, 1, figsize=figsize)
    ax.tick_params(colors=colors['tick_label'])

    m = np.mean(y)

    ax.set_ylim(y_range)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_linewidth(.3)
    ax.set_xticks([])
    # ax.set_yticks(y_range)

    ticklabelpad = plt.rcParams['xtick.major.pad']
    ax.annotate(f"{target_name}={myround(m,precision)}\nn={len(y)}",
                xy=(.5, 0), xytext=(.5, -.5*ticklabelpad), ha='center', va='top',
                xycoords='axes fraction', textcoords='offset points',
                fontsize=label_fontsize, fontname=fontname, color=colors['axis_label'])

    ax.tick_params(axis='y', which='major', width=.3, labelcolor=colors['tick_label'], labelsize=ticks_fontsize)

    mu = .5
    sigma = .08
    X = np.random.normal(mu, sigma, size=len(y))
    ax.set_xlim(0, 1)
    alpha = .25

    ax.scatter(X, y, s=5, c=colors['scatter_marker'], alpha=alpha, lw=.3)
    ax.plot([0,len(node.samples())],[m,m],'--', color=colors['split_line'], linewidth=1)

    #plt.tight_layout()
    if filename is not None:
        plt.savefig(filename, bbox_inches='tight', pad_inches=0)
        plt.close()
Example #5
0
def draw_legend(shadow_tree, target_name, filename, colors=None):
    colors = adjust_colors(colors)
    n_classes = shadow_tree.nclasses()
    class_values = shadow_tree.unique_target_values
    class_names = shadow_tree.class_names
    color_values = colors['classes'][n_classes]
    color_map = {v:color_values[i] for i,v in enumerate(class_values)}

    boxes = []
    for i, c in enumerate(class_values):
        box = patches.Rectangle((0, 0), 20, 10, linewidth=.4, edgecolor=colors['rect_edge'],
                                facecolor=color_map[c], label=class_names[c])
        boxes.append(box)

    fig, ax = plt.subplots(1, 1, figsize=(1,1))
    leg = ax.legend(handles=boxes,
                    frameon=True,
                    shadow=False,
                    fancybox=True,
                    loc='center',
                    title=target_name,
                    handletextpad=.35,
                    borderpad=.8,
                    edgecolor=colors['legend_edge'])

    leg.get_frame().set_linewidth(.5)
    leg.get_title().set_color(colors['legend_title'])
    leg.get_title().set_fontsize(10)
    leg.get_title().set_fontweight('bold')
    for text in leg.get_texts():
        text.set_color(colors['text'])
        text.set_fontsize(10)

    ax.set_xlim(0, 20)
    ax.set_ylim(0, 10)
    ax.axis('off')
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)

    if filename is not None:
        plt.savefig(filename, bbox_inches='tight', pad_inches=0)
        plt.close()
Example #6
0
def class_leaf_viz(node : ShadowDecTreeNode,
                   colors : List[str],
                   filename: str,
                   graph_colors=None):

    graph_colors = adjust_colors(graph_colors)
    # size = prop_size(node.nsamples(), counts=node.shadow_tree.leaf_sample_counts(),
    #                  output_range=(.2, 1.5))

    minsize = .15
    maxsize = 1.3
    slope = 0.02
    nsamples = node.nsamples()
    size = nsamples * slope + minsize
    size = min(size, maxsize)

    # we visually need n=1 and n=9 to appear different but diff between 300 and 400 is no big deal
    # size = np.sqrt(np.log(size))
    counts = node.class_counts()
    draw_piechart(counts, size=size, colors=colors, filename=filename, label=f"n={nsamples}",
                  graph_colors=graph_colors)
Example #7
0
def clfviz_univar(model,
                  x: np.ndarray,
                  y: np.ndarray,
                  ntiles=100,
                  binary_threshold=0.5,
                  show=[
                      'instances', 'boundaries', 'probabilities',
                      'misclassified', 'legend'
                  ],
                  feature_name=None,
                  target_name=None,
                  class_names=None,
                  markers=None,
                  fontsize=9,
                  fontname="Arial",
                  dot_w=25,
                  yshift=.09,
                  sigma=.09,
                  colors: dict = None,
                  ax=None) -> None:
    """
    See comment and parameter descriptions for clfviz() above.
    """
    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=(5, 1.2))

    if isinstance(x, pd.Series):
        x = x.values
    if isinstance(y, pd.Series):
        y = y.values

    if (len(x.shape) == 2 and x.shape[1] != 1) or len(x.shape) > 2:
        raise ValueError(f"Expecting 1D data not {x.shape}")

    colors = adjust_colors(colors)

    mu = 0.08
    class_values = np.unique(y)
    nclasses = len(class_values)
    class_colors = np.array(colors['classes'][nclasses])
    color_map = {v: class_colors[i] for i, v in enumerate(class_values)}

    x1r = np.max(x) - np.min(x)
    x1range = (np.min(x), np.max(x))
    grid_points, w = np.linspace(*x1range,
                                 num=ntiles,
                                 endpoint=True,
                                 retstep=True)
    grid_proba = _predict_proba(model, grid_points)
    if len(np.unique(y)) == 2:  # is k=2 binary?
        grid_pred = np.where(grid_proba[:, 1] >= binary_threshold, 1, 0)
    else:
        grid_pred = np.argmax(grid_proba,
                              axis=1)  # TODO: assumes classes are 0..k-1
    ymax = ax.get_ylim()[1]

    # compute the stripes on the bottom showing probabilities
    if 'probabilities' in show:
        class_values = np.unique(y)
        color_map, grid_pred_colors, grid_proba_colors = \
            _get_grid_colors(grid_proba, grid_pred, class_values, colors=adjust_colors(None))

        pred_box_height = .08 * ymax
        boxes = []
        for i, gx in enumerate(grid_points):
            rect = patches.Rectangle((gx, 0),
                                     w,
                                     pred_box_height,
                                     edgecolor='none',
                                     facecolor=grid_proba_colors[i],
                                     alpha=colors['tile_alpha'])
            boxes.append(rect)
        # drop box around the gradation
        ax.add_collection(PatchCollection(boxes, match_original=True))
        rect = patches.Rectangle((grid_points[0], 0),
                                 x1r + w,
                                 pred_box_height,
                                 linewidth=.3,
                                 edgecolor=colors['rect_edge'],
                                 facecolor='none')
        ax.add_patch(rect)

    if 'boundaries' in show:
        dx = np.abs(np.diff(grid_pred))
        dx = np.hstack([0, dx])
        dx_edge_idx = np.where(dx)  # indexes of dx class transitions?
        for lx in grid_points[dx_edge_idx]:
            ax.plot([lx, lx], [*ax.get_ylim()],
                    '--',
                    lw=.3,
                    c=colors['split_line'],
                    alpha=1.0)

    if 'instances' in show:
        # user should pass in short and wide fig
        x_proba = _predict_proba(model, x)
        if len(np.unique(y)) == 2:  # is k=2 binary?
            x_pred = np.where(x_proba[:, 1] >= binary_threshold, 1, 0)
        else:
            x_pred = np.argmax(x_proba,
                               axis=1)  # TODO: assumes classes are 0..k-1
        class_x = [x[y == cl] for cl in class_values]
        class_x_pred = [x_pred[y == cl] for cl in class_values]

        if markers is None:
            markers = ['o'] * len(class_x)
        for i, x_, in enumerate(class_x):
            if 'misclassified' in show:
                # Show correctly classified markers
                good_x = x_[class_x_pred[i] == class_values[i]]
                noise = np.random.normal(mu, sigma, size=len(good_x))
                ax.scatter(good_x, [mu + i * yshift] * len(good_x) + noise,
                           s=dot_w,
                           c=color_map[i],
                           marker=markers[i],
                           alpha=colors['scatter_marker_alpha'],
                           edgecolors=colors['scatter_edge'],
                           lw=.5)
                # Show misclassified markers (can't have alpha per marker so do in 2 calls)
                bad_x = x_[class_x_pred[i] != class_values[i]]
                noise = np.random.normal(mu, sigma, size=len(bad_x))
                ax.scatter(bad_x, [mu + i * yshift] * len(bad_x) + noise,
                           s=dot_w,
                           c=color_map[i],
                           marker=markers[i],
                           alpha=1.0,
                           edgecolors=colors['warning'],
                           lw=.5)
            else:
                noise = np.random.normal(mu, sigma, size=len(x_))
                ax.scatter(x_, [mu + i * yshift] * len(x_) + noise,
                           s=dot_w,
                           c=color_map[i],
                           marker=markers[i],
                           alpha=colors['scatter_marker_alpha'],
                           edgecolors=colors['scatter_edge'],
                           lw=.5)

    ax.spines['top'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_linewidth(0.1)
    ax.set_yticks([])
    ax.tick_params(axis='both',
                   which='major',
                   width=.3,
                   labelcolor=colors['tick_label'],
                   labelsize=fontsize)
    for tick in ax.get_xticklabels():
        tick.set_fontname(fontname)
    for tick in ax.get_yticklabels():
        tick.set_fontname(fontname)
    ax.set_ylim(0, mu + nclasses * yshift + 6 * sigma)

    if feature_name is not None:
        ax.set_xlabel(f"{feature_name}",
                      fontsize=fontsize,
                      fontname=fontname,
                      color=colors['axis_label'])

    if 'legend' in show:
        class_names = utils._normalize_class_names(class_names, nclasses)
        add_classifier_legend(ax,
                              class_names,
                              class_values,
                              color_map,
                              target_name,
                              colors,
                              fontsize=fontsize,
                              fontname=fontname)
Example #8
0
def clfviz_bivar(model,
                 X: np.ndarray,
                 y: np.ndarray,
                 ntiles=50,
                 tile_fraction=.9,
                 binary_threshold=0.5,
                 show=[
                     'instances', 'boundaries', 'probabilities',
                     'misclassified', 'legend'
                 ],
                 feature_names=None,
                 target_name=None,
                 class_names=None,
                 markers=None,
                 boundary_marker='o',
                 boundary_markersize=.8,
                 fontsize=9,
                 fontname="Arial",
                 dot_w=25,
                 colors: dict = None,
                 ax=None) -> None:
    """
    See comment and parameter descriptions for clfviz() above.
    """
    if isinstance(X, pd.DataFrame):
        X = X.values
    if isinstance(y, pd.Series):
        y = y.values

    if len(X.shape) == 1 or (len(X.shape) == 2
                             and X.shape[1] != 2) or len(X.shape) > 2:
        raise ValueError(f"Expecting 2D data not {X.shape}")

    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=(5, 3.5))

    # Created grid over the range of x1 and x2 variables, get probabilities, predictions
    grid_points, grid_proba, grid_pred_as_matrix, w, x_, class_X, class_values = \
        _compute_tiling(model, X, y, binary_threshold, ntiles, tile_fraction)

    x_proba = _predict_proba(model, X)
    if len(np.unique(y)) == 2:  # is k=2 binary?
        X_pred = np.where(x_proba[:, 1] >= binary_threshold, 1, 0)
    else:
        X_pred = np.argmax(x_proba, axis=1)  # TODO: assumes classes are 0..k-1
    class_X_pred = [X_pred[y == cl] for cl in class_values]

    if markers is None:
        markers = ['o'] * len(class_X)

    colors = adjust_colors(colors)

    class_values = np.unique(y)  # returns sorted

    # Get class to color map for probabilities and predictions
    color_map, grid_pred_colors, grid_proba_colors = \
        _get_grid_colors(grid_proba, grid_pred_as_matrix, class_values, colors)

    # Draw probabilities or class prediction grid
    facecolors = grid_proba_colors if 'probabilities' in show else grid_pred_colors
    _draw_tiles(ax, grid_points, facecolors, colors['tile_alpha'], x_, w)

    # Get grid with class predictions with coordinates (x,y)
    # e.g., y_pred[0,0] is lower left pixel and y_pred[5,5] is top-right pixel
    # for npoints=5
    grid_pred_as_matrix = grid_pred_as_matrix.reshape(ntiles, ntiles)

    if 'boundaries' in show:
        _draw_boundary_edges(ax, grid_points, grid_pred_as_matrix,
                             boundary_marker, boundary_markersize, colors, w,
                             x_)

    # Draw the X instances circles
    if 'instances' in show:
        for i, x_ in enumerate(class_X):
            if 'misclassified' in show:
                # Show correctly classified markers
                good_x = x_[class_X_pred[i] == class_values[i], :]
                ax.scatter(good_x[:, 0],
                           good_x[:, 1],
                           s=dot_w,
                           c=color_map[i],
                           marker=markers[i],
                           alpha=colors['scatter_marker_alpha'],
                           edgecolors=colors['scatter_edge'],
                           lw=.5)
                # Show misclassified markers (can't have alpha per marker so do in 2 calls)
                bad_x = x_[class_X_pred[i] != class_values[i], :]
                ax.scatter(bad_x[:, 0],
                           bad_x[:, 1],
                           s=dot_w,
                           c=color_map[i],
                           marker=markers[i],
                           alpha=1.0,
                           edgecolors=colors['warning'],
                           lw=.5)
            else:
                ax.scatter(x_[:, 0],
                           x_[:, 1],
                           s=dot_w,
                           c=color_map[i],
                           marker=markers[i],
                           alpha=colors['scatter_marker_alpha'],
                           edgecolors=colors['scatter_edge'],
                           lw=.5)

    if feature_names is not None:
        ax.set_xlabel(f"{feature_names[0]}",
                      fontsize=fontsize,
                      fontname=fontname,
                      color=colors['axis_label'])
        ax.set_ylabel(f"{feature_names[1]}",
                      fontsize=fontsize,
                      fontname=fontname,
                      color=colors['axis_label'])

    if 'legend' in show:
        class_names = utils._normalize_class_names(class_names,
                                                   nclasses=len(class_values))
        add_classifier_legend(ax,
                              class_names,
                              class_values,
                              color_map,
                              target_name,
                              colors,
                              fontsize=fontsize,
                              fontname=fontname)

    ax.tick_params(axis='both',
                   which='major',
                   width=.3,
                   labelcolor=colors['tick_label'],
                   labelsize=fontsize)
    for tick in ax.get_xticklabels():
        tick.set_fontname(fontname)
    for tick in ax.get_yticklabels():
        tick.set_fontname(fontname)
    ax.spines['top'].set_visible(False)  # turns off the top "spine" completely
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(.5)
    ax.spines['bottom'].set_linewidth(.5)
Example #9
0
def regr_split_viz(node: ShadowDecTreeNode,
                   X_train: np.ndarray,
                   y_train: np.ndarray,
                   target_name: str,
                   filename: str = None,
                   y_range=None,
                   ticks_fontsize: int = 8,
                   label_fontsize: int = 9,
                   fontname: str = "Arial",
                   precision=1,
                   X : np.array = None,
                   highlight_node : bool = False,
                   colors: dict=None):

    colors = adjust_colors(colors)

    figsize = (2.5, 1.1)
    fig, ax = plt.subplots(1, 1, figsize=figsize)
    ax.tick_params(colors=colors['tick_label'])

    feature_name = node.feature_name()

    ax.set_xlabel(f"{feature_name}", fontsize=label_fontsize, fontname=fontname, color=colors['axis_label'])

    ax.set_ylim(y_range)
    if node==node.shadow_tree.root:
        ax.set_ylabel(target_name, fontsize=label_fontsize, fontname=fontname, color=colors['axis_label'])

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(.3)
    ax.spines['bottom'].set_linewidth(.3)
    ax.tick_params(axis='both', which='major', width=.3, labelcolor=colors['tick_label'], labelsize=ticks_fontsize)

    # Get X, y data for all samples associated with this node.
    X_feature = X_train[:,node.feature()]
    X_feature, y_train = X_feature[node.samples()], y_train[node.samples()]

    overall_feature_range = (np.min(X_train[:,node.feature()]), np.max(X_train[:,node.feature()]))
    ax.set_xlim(*overall_feature_range)

    xmin, xmax = overall_feature_range
    xr = xmax - xmin

    xticks = list(overall_feature_range)
    if node.split()>xmin+.10*xr and node.split()<xmax-.1*xr: # don't show split if too close to axis ends
        xticks += [node.split()]
    ax.set_xticks(xticks)

    ax.scatter(X_feature, y_train, s=5, c=colors['scatter_marker'], alpha=.4, lw=.3)
    left, right = node.split_samples()
    left = y_train[left]
    right = y_train[right]
    split = node.split()
    ax.plot([overall_feature_range[0],split],[np.mean(left),np.mean(left)],'--', color=colors['split_line'], linewidth=1)
    ax.plot([split,split],[*y_range],'--', color=colors['split_line'], linewidth=1)
    ax.plot([split,overall_feature_range[1]],[np.mean(right),np.mean(right)],'--', color=colors['split_line'], linewidth=1)

    def wedge(ax,x,color):
        ymin, ymax = ax.get_ylim()
        xr = xmax - xmin
        yr = ymax - ymin
        th = yr * .1
        tw = xr * .018
        tipy = ymin
        tria = np.array([[x, tipy], [x - tw, ymin-th], [x + tw, ymin-th]])
        t = patches.Polygon(tria, facecolor=color)
        t.set_clip_on(False)
        ax.add_patch(t)

    wedge(ax, node.split(), color=colors['wedge'])

    if highlight_node:
        wedge(ax, X[node.feature()], color=colors['highlight'])

    #plt.tight_layout()
    if filename is not None:
        plt.savefig(filename, bbox_inches='tight', pad_inches=0)
        plt.close()
Example #10
0
def rtreeviz_univar(ax,
                    x_train: (pd.Series, np.ndarray),  # 1 vector of X data
                    y_train: (pd.Series, np.ndarray),
                    max_depth = 10,
                    feature_name: str = None,
                    target_name: str = None,
                    min_samples_leaf = 1,
                    fontsize: int = 14,
                    show={'title','splits'},
                    split_linewidth=.5,
                    mean_linewidth = 2,
                    markersize=None,
                    colors=None):
    if isinstance(x_train, pd.Series):
        x_train = x_train.values
    if isinstance(y_train, pd.Series):
        y_train = y_train.values

    colors = adjust_colors(colors)

    y_range = (min(y_train), max(y_train))  # same y axis for all
    overall_feature_range = (np.min(x_train), np.max(x_train))

    t = tree.DecisionTreeRegressor(max_depth=max_depth, min_samples_leaf=min_samples_leaf)
    t.fit(x_train.reshape(-1,1), y_train)

    shadow_tree = ShadowDecTree(t, x_train.reshape(-1,1), y_train, feature_names=[feature_name])
    splits = []
    for node in shadow_tree.internal:
        splits.append(node.split())
    splits = sorted(splits)
    bins = [overall_feature_range[0]] + splits + [overall_feature_range[1]]

    means = []
    for i in range(len(bins) - 1):
        left = bins[i]
        right = bins[i + 1]
        inrange = y_train[(x_train >= left) & (x_train <= right)]
        means.append(np.mean(inrange))

    ax.scatter(x_train, y_train, marker='o', alpha=.4, c=colors['scatter_marker'], s=markersize,
               edgecolor=colors['scatter_edge'], lw=.3)

    if 'splits' in show:
        for split in splits:
            ax.plot([split, split], [*y_range], '--', color=colors['split_line'], linewidth=split_linewidth)

        prevX = overall_feature_range[0]
        for i, m in enumerate(means):
            split = overall_feature_range[1]
            if i < len(splits):
                split = splits[i]
            ax.plot([prevX, split], [m, m], '-', color=colors['mean_line'], linewidth=mean_linewidth)
            prevX = split

    ax.tick_params(axis='both', which='major', width=.3, labelcolor=colors['tick_label'], labelsize=fontsize)

    if 'title' in show:
        title = f"Regression tree depth {max_depth}, samples per leaf {min_samples_leaf},\nTraining $R^2$={t.score(x_train.reshape(-1,1),y_train):.3f}"
        plt.title(title, fontsize=fontsize, color=colors['title'])

    plt.xlabel(feature_name, fontsize=fontsize, color=colors['axis_label'])
    plt.ylabel(target_name, fontsize=fontsize, color=colors['axis_label'])
Example #11
0
def dtreeviz(tree_model: (tree.DecisionTreeRegressor, tree.DecisionTreeClassifier),
             X_train: (pd.DataFrame, np.ndarray),
             y_train: (pd.Series, np.ndarray),
             feature_names: List[str],
             target_name: str,
             class_names: (Mapping[Number, str], List[str]) = None, # required if classifier
             precision: int = 2,
             orientation: ('TD', 'LR') = "TD",
             show_root_edge_labels: bool = True,
             show_node_labels: bool = False,
             fancy: bool = True,
             histtype: ('bar', 'barstacked', 'strip') = 'barstacked',
             highlight_path: List[int] = [],
             X: np.ndarray = None,
             max_X_features_LR: int = 10,
             max_X_features_TD: int = 20,
             label_fontsize: int=12,
             ticks_fontsize: int=8,
             fontname: str="Arial",
             colors: dict=None
             ) \
    -> DTreeViz:
    """
    Given a decision tree regressor or classifier, create and return a tree visualization
    using the graphviz (DOT) language.

    :param tree_model: A DecisionTreeRegressor or DecisionTreeClassifier that has been
                       fit to X_train, y_train.
    :param X_train: A data frame or 2-D matrix of feature vectors used to train the model.
    :param y_train: A pandas Series or 1-D vector with target values or classes.
    :param feature_names: A list of the feature names.
    :param target_name: The name of the target variable.
    :param class_names: [For classifiers] A dictionary or list of strings mapping class
                        value to class name.
    :param precision: When displaying floating-point numbers, how many digits to display
                      after the decimal point. Default is 2.
    :param orientation:  Is the tree top down, "TD", or left to right, "LR"?
    :param show_root_edge_labels: Include < and >= on the edges emanating from the root?
    :param show_node_labels: Add "Node id" to top of each node in graph for educational purposes
    :param fancy:
    :param histtype: [For classifiers] Either 'bar' or 'barstacked' to indicate
                     histogram type. We find that 'barstacked' looks great up to about.
                     four classes.
    :param highlight_path: A list of node IDs to highlight, default is [].
                           Useful for emphasizing node(s) in tree for discussion.
                           If X argument given then this is ignored.
    :type highlight_path: List[int]
    :param X: Instance to run down the tree; derived path to highlight from this vector.
              Show feature vector with labels underneath leaf reached. highlight_path
              is ignored if X is not None.
    :type X: np.ndarray
    :param label_fontsize: Size of the label font
    :param ticks_fontsize: Size of the tick font
    :param fontname: Font which is used for labels and text
    :param max_X_features_LR: If len(X) exceeds this limit for LR layout,
                            display only those features
                           used to guide X vector down tree. Helps when len(X) is large.
                           Default is 10.
    :param max_X_features_TD: If len(X) exceeds this limit for TD layout,
                            display only those features
                           used to guide X vector down tree. Helps when len(X) is large.
                           Default is 25.

    :return: A string in graphviz DOT language that describes the decision tree.
    """
    def node_name(node : ShadowDecTreeNode) -> str:
        return f"node{node.id}"

    def split_node(name, node_name, split):
        if fancy:
            labelgraph = node_label(node) if show_node_labels else ''
            html = f"""<table border="0">
            {labelgraph}
            <tr>
                    <td><img src="{tmp}/node{node.id}_{os.getpid()}.svg"/></td>
            </tr>
            </table>"""
        else:
            html = f"""<font face="Helvetica" color="#444443" point-size="12">{name}@{split}</font>"""
        if node.id in highlight_path:
            gr_node = f'{node_name} [margin="0" shape=box penwidth=".5" color="{colors["highlight"]}" style="dashed" label=<{html}>]'
        else:
            gr_node = f'{node_name} [margin="0" shape=none label=<{html}>]'
        return gr_node


    def regr_leaf_node(node, label_fontsize: int = 12):
        # always generate fancy regr leaves for now but shrink a bit for nonfancy.
        labelgraph = node_label(node) if show_node_labels else ''
        html = f"""<table border="0">
        {labelgraph}
        <tr>
                <td><img src="{tmp}/leaf{node.id}_{os.getpid()}.svg"/></td>
        </tr>
        </table>"""
        if node.id in highlight_path:
            return f'leaf{node.id} [margin="0" shape=box penwidth=".5" color="{colors["highlight"]}" style="dashed" label=<{html}>]'
        else:
            return f'leaf{node.id} [margin="0" shape=box penwidth="0" color="{colors["text"]}" label=<{html}>]'


    def class_leaf_node(node, label_fontsize: int = 12):
        labelgraph = node_label(node) if show_node_labels else ''
        html = f"""<table border="0" CELLBORDER="0">
        {labelgraph}
        <tr>
                <td><img src="{tmp}/leaf{node.id}_{os.getpid()}.svg"/></td>
        </tr>
        </table>"""
        if node.id in highlight_path:
            return f'leaf{node.id} [margin="0" shape=box penwidth=".5" color="{colors["highlight"]}" style="dashed" label=<{html}>]'
        else:
            return f'leaf{node.id} [margin="0" shape=box penwidth="0" color="{colors["text"]}" label=<{html}>]'

    def node_label(node):
        return f'<tr><td CELLPADDING="0" CELLSPACING="0"><font face="Helvetica" color="{colors["node_label"]}" point-size="14"><i>Node {node.id}</i></font></td></tr>'

    def class_legend_html():
        return f"""
        <table border="0" cellspacing="0" cellpadding="0">
            <tr>
                <td border="0" cellspacing="0" cellpadding="0"><img src="{tmp}/legend_{os.getpid()}.svg"/></td>
            </tr>
        </table>
        """

    def class_legend_gr():
        if not shadow_tree.isclassifier():
            return ""
        return f"""
            subgraph cluster_legend {{
                style=invis;
                legend [penwidth="0" margin="0" shape=box margin="0.03" width=.1, height=.1 label=<
                {class_legend_html()}
                >]
            }}
            """

    def instance_html(path, instance_fontsize: int = 11):
        headers = []
        features_used = [node.feature() for node in path[:-1]] # don't include leaf
        display_X = X
        display_feature_names = feature_names
        highlight_feature_indexes = features_used
        if (orientation == 'TD' and len(X) > max_X_features_TD) or\
           (orientation == 'LR' and len(X) > max_X_features_LR):
            # squash all features down to just those used
            display_X = [X[i] for i in features_used] + ['...']
            display_feature_names = [node.feature_name() for node in path[:-1]] + ['...']
            highlight_feature_indexes = range(0,len(features_used))

        for i,name in enumerate(display_feature_names):
            if i in highlight_feature_indexes:
                color = colors['highlight']
            else:
                color = colors['text']
            headers.append(f'<td cellpadding="1" align="right" bgcolor="white">'
                           f'<font face="Helvetica" color="{color}" point-size="{instance_fontsize}">'
                           f'{name}'
                           '</font>'
                           '</td>')

        values = []
        for i,v in enumerate(display_X):
            if i in highlight_feature_indexes:
                color = colors['highlight']
            else:
                color = colors['text']
            if isinstance(v,int) or isinstance(v, str):
                disp_v = v
            else:
                disp_v = myround(v, precision)
            values.append(f'<td cellpadding="1" align="right" bgcolor="white">'
                          f'<font face="Helvetica" color="{color}" point-size="{instance_fontsize}">{disp_v}</font>'
                          '</td>')

        return f"""
        <table border="0" cellspacing="0" cellpadding="0">
        <tr>
            {''.join(headers)}
        </tr>
        <tr>
            {''.join(values)}
        </tr>
        </table>
        """

    def instance_gr():
        if X is None:
            return ""
        pred, path = shadow_tree.predict(X)
        leaf = f"leaf{path[-1].id}"
        if shadow_tree.isclassifier():
            edge_label = f" &#160;Prediction<br/> {path[-1].prediction_name()}"
        else:
            edge_label = f" &#160;Prediction<br/> {myround(path[-1].prediction(), precision)}"
        return f"""
            subgraph cluster_instance {{
                style=invis;
                X_y [penwidth="0.3" margin="0" shape=box margin="0.03" width=.1, height=.1 label=<
                {instance_html(path)}
                >]
            }}
            {leaf} -> X_y [dir=back; penwidth="1.2" color="{colors['highlight']}" label=<<font face="Helvetica" color="{colors['leaf_label']}" point-size="{11}">{edge_label}</font>>]
            """

    colors = adjust_colors(colors)

    if orientation=="TD":
        ranksep = ".2"
        nodesep = "0.1"
    else:
        if fancy:
            ranksep = ".22"
            nodesep = "0.1"
        else:
            ranksep = ".05"
            nodesep = "0.09"

    tmp = tempfile.gettempdir()
    # tmp = "/tmp"

    shadow_tree = ShadowDecTree(tree_model, X_train, y_train,
                                feature_names=feature_names, class_names=class_names)

    if X is not None:
        pred, path = shadow_tree.predict(X)
        highlight_path = [n.id for n in path]

    n_classes = shadow_tree.nclasses()
    color_values = colors['classes'][n_classes]

    # Fix the mapping from target value to color for entire tree
    if shadow_tree.isclassifier():
        class_values = shadow_tree.unique_target_values
        color_map = {v: color_values[i] for i, v in enumerate(class_values)}
        draw_legend(shadow_tree, target_name, f"{tmp}/legend_{os.getpid()}.svg", colors=colors)

    if isinstance(X_train, pd.DataFrame):
        X_train = X_train.values
    if isinstance(y_train, pd.Series):
        y_train = y_train.values
    if y_train.dtype == np.dtype(object):
        try:
            y_train = y_train.astype('float')
        except ValueError as e:
            raise ValueError('y_train needs to consist only of numerical values. {}'.format(e))
        if len(y_train.shape) != 1:
            raise ValueError('y_train must a one-dimensional list or Pandas Series, got: {}'.format(y_train.shape))

    y_range = (min(y_train) * 1.03, max(y_train) * 1.03)  # same y axis for all

    # Find max height (count) for any bar in any node
    if shadow_tree.isclassifier():
        nbins = get_num_bins(histtype, n_classes)
        node_heights = shadow_tree.get_split_node_heights(X_train, y_train, nbins=nbins)

    internal = []
    for node in shadow_tree.internal:
        if fancy:
            if shadow_tree.isclassifier():
                class_split_viz(node, X_train, y_train,
                                filename=f"{tmp}/node{node.id}_{os.getpid()}.svg",
                                precision=precision,
                                colors={**color_map, **colors},
                                histtype=histtype,
                                node_heights=node_heights,
                                X=X,
                                ticks_fontsize=ticks_fontsize,
                                label_fontsize=label_fontsize,
                                fontname=fontname,
                                highlight_node=node.id in highlight_path)
            else:

                regr_split_viz(node, X_train, y_train,
                               filename=f"{tmp}/node{node.id}_{os.getpid()}.svg",
                               target_name=target_name,
                               y_range=y_range,
                               precision=precision,
                               X=X,
                               ticks_fontsize=ticks_fontsize,
                               label_fontsize=label_fontsize,
                               fontname=fontname,
                               highlight_node=node.id in highlight_path,
                               colors=colors)

        nname = node_name(node)
        gr_node = split_node(node.feature_name(), nname, split=myround(node.split(), precision))
        internal.append(gr_node)

    leaves = []
    for node in shadow_tree.leaves:
        if shadow_tree.isclassifier():
            class_leaf_viz(node, colors=color_values,
                           filename=f"{tmp}/leaf{node.id}_{os.getpid()}.svg",
                           graph_colors=colors)
            leaves.append( class_leaf_node(node) )
        else:
            # for now, always gen leaf
            regr_leaf_viz(node,
                          y_train,
                          target_name=target_name,
                          filename=f"{tmp}/leaf{node.id}_{os.getpid()}.svg",
                          y_range=y_range,
                          precision=precision,
                          ticks_fontsize=ticks_fontsize,
                          label_fontsize=label_fontsize,
                          fontname=fontname,
                          colors=colors)
            leaves.append( regr_leaf_node(node) )

    show_edge_labels = False
    all_llabel = '&lt;' if show_edge_labels else ''
    all_rlabel = '&ge;' if show_edge_labels else ''
    root_llabel = '&lt;' if show_root_edge_labels else ''
    root_rlabel = '&ge;' if show_root_edge_labels else ''

    edges = []
    # non leaf edges with > and <=
    for node in shadow_tree.internal:
        nname = node_name(node)
        if node.left.isleaf():
            left_node_name ='leaf%d' % node.left.id
        else:
            left_node_name = node_name(node.left)
        if node.right.isleaf():
            right_node_name ='leaf%d' % node.right.id
        else:
            right_node_name = node_name(node.right)

        if node==shadow_tree.root:
            llabel = root_llabel
            rlabel = root_rlabel
        else:
            llabel = all_llabel
            rlabel = all_rlabel

        lcolor = rcolor = colors['arrow']
        lpw = rpw = "0.3"
        if node.left.id in highlight_path:
            lcolor = colors['highlight']
            lpw = "1.2"
        if node.right.id in highlight_path:
            lcolor = colors['highlight']
            rpw = "1.2"
        edges.append( f'{nname} -> {left_node_name} [penwidth={lpw} color="{lcolor}" label=<{llabel}>]' )
        edges.append( f'{nname} -> {right_node_name} [penwidth={rpw} color="{rcolor}" label=<{rlabel}>]' )
        edges.append(f"""
        {{
            rank=same;
            {left_node_name} -> {right_node_name} [style=invis]
        }}
        """)

    newline = "\n\t"
    dot = f"""
digraph G {{
    splines=line;
    nodesep={nodesep};
    ranksep={ranksep};
    rankdir={orientation};
    margin=0.0;
    node [margin="0.03" penwidth="0.5" width=.1, height=.1];
    edge [arrowsize=.4 penwidth="0.3"]

    {newline.join(internal)}
    {newline.join(edges)}
    {newline.join(leaves)}

    {class_legend_gr()}
    {instance_gr()}
}}
    """

    return DTreeViz(dot)
Example #12
0
def ctreeviz_bivar(ax, X_train, y_train, feature_names, class_names,
                   target_name,
                   max_depth=None,
                   min_samples_leaf=None,
                   fontsize=14,
                   fontname="Arial",
                   show={'title','legend','splits'},
                   colors=None):
    """
    Show tesselated 2D feature space for bivariate classification tree. X_train can
    have lots of features but features lists indexes of 2 features to train tree with.
    """
    if isinstance(X_train,pd.DataFrame):
        X_train = X_train.values
    if isinstance(y_train, pd.Series):
        y_train = y_train.values

    if max_depth is None and min_samples_leaf is None:
        raise ValueError("Either max_depth or min_samples_leaf must be set")
    if max_depth is not None and min_samples_leaf is None:
        min_samples_leaf = 1

    colors = adjust_colors(colors)

    ct = tree.DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=min_samples_leaf)
    ct.fit(X_train, y_train)

    shadow_tree = ShadowDecTree(ct, X_train, y_train,
                                feature_names=feature_names, class_names=class_names)

    tesselation = shadow_tree.tesselation()

    n_classes = shadow_tree.nclasses()
    class_values = shadow_tree.unique_target_values

    color_values = colors['classes'][n_classes]
    color_map = {v: color_values[i] for i, v in enumerate(class_values)}

    if 'splits' in show:
        for node,bbox in tesselation:
            x = bbox[0]
            y = bbox[1]
            w = bbox[2]-bbox[0]
            h = bbox[3]-bbox[1]
            rect = patches.Rectangle((x, y), w, h, 0, linewidth=.3, alpha=.4,
                                     edgecolor=colors['rect_edge'], facecolor=color_map[node.prediction()])
            ax.add_patch(rect)

    dot_w = 25
    X_hist = [X_train[y_train == cl] for cl in class_values]
    for i, h in enumerate(X_hist):
        ax.scatter(h[:,0], h[:,1], alpha=1, marker='o', s=dot_w, c=color_map[i],
                   edgecolors=colors['scatter_edge'], lw=.3)

    ax.set_xlabel(f"{feature_names[0]}", fontsize=fontsize, fontname=fontname, color=colors['axis_label'])
    ax.set_ylabel(f"{feature_names[1]}", fontsize=fontsize, fontname=fontname, color=colors['axis_label'])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_linewidth(.3)

    if 'legend' in show:
        add_classifier_legend(ax, class_names, class_values, color_map, target_name, colors)

    if 'title' in show:
        accur = ct.score(X_train, y_train)
        title = f"Classifier tree depth {max_depth}, training accuracy={accur*100:.2f}%"
        plt.title(title, fontsize=fontsize, color=colors['title'],)

    return None
Example #13
0
def ctreeviz_univar(ax, x_train, y_train, feature_name, class_names,
                    target_name,
                    max_depth=None,
                    min_samples_leaf=None,
                    fontsize=14, fontname="Arial", nbins=25, gtype='strip',
                    show={'title','legend','splits'},
                    colors=None):
    if isinstance(x_train, pd.Series):
        x_train = x_train.values
    if isinstance(y_train, pd.Series):
        y_train = y_train.values

    if max_depth is None and min_samples_leaf is None:
        raise ValueError("Either max_depth or min_samples_leaf must be set")
    if max_depth is not None and min_samples_leaf is None:
        min_samples_leaf = 1

    colors = adjust_colors(colors)

    #    ax.set_facecolor('#F9F9F9')
    ct = tree.DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=min_samples_leaf)
    ct.fit(x_train.reshape(-1, 1), y_train)

    shadow_tree = ShadowDecTree(ct, x_train.reshape(-1, 1), y_train,
                                feature_names=[feature_name], class_names=class_names)

    n_classes = shadow_tree.nclasses()
    overall_feature_range = (np.min(x_train), np.max(x_train))
    class_values = shadow_tree.unique_target_values

    color_values = colors['classes'][n_classes]
    color_map = {v: color_values[i] for i, v in enumerate(class_values)}
    X_colors = [color_map[cl] for cl in class_values]

    ax.set_xlabel(f"{feature_name}", fontsize=fontsize, fontname=fontname,
                  color=colors['axis_label'])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.yaxis.set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['bottom'].set_linewidth(.3)

    X_hist = [x_train[y_train == cl] for cl in class_values]

    if gtype == 'barstacked':
        bins = np.linspace(start=overall_feature_range[0], stop=overall_feature_range[1], num=nbins, endpoint=True)
        hist, bins, barcontainers = ax.hist(X_hist,
                                            color=X_colors,
                                            align='mid',
                                            histtype='barstacked',
                                            bins=bins,
                                            label=class_names)

        for patch in barcontainers:
            for rect in patch.patches:
                rect.set_linewidth(.5)
                rect.set_edgecolor(colors['edge'])
        ax.set_xlim(*overall_feature_range)
        ax.set_xticks(overall_feature_range)
        ax.set_yticks([0, max([max(h) for h in hist])])
    elif gtype == 'strip':
        # user should pass in short and wide fig
        sigma = .013
        mu = .08
        class_step = .08
        dot_w = 20
        ax.set_ylim(0, mu + n_classes*class_step)
        for i, bucket in enumerate(X_hist):
            y_noise = np.random.normal(mu+i*class_step, sigma, size=len(bucket))
            ax.scatter(bucket, y_noise, alpha=.7, marker='o', s=dot_w, c=color_map[i],
                       edgecolors=colors['scatter_edge'], lw=.3)

    ax.tick_params(axis='both', which='major', width=.3, labelcolor=colors['tick_label'],
                   labelsize=fontsize)

    splits = [node.split() for node in shadow_tree.internal]
    splits = sorted(splits)
    bins = [ax.get_xlim()[0]] + splits + [ax.get_xlim()[1]]

    pred_box_height = .07 * ax.get_ylim()[1]
    preds = []
    for i in range(len(bins) - 1):
        left = bins[i]
        right = bins[i + 1]
        inrange = y_train[(x_train >= left) & (x_train <= right)]
        values, counts = np.unique(inrange, return_counts=True)
        pred = values[np.argmax(counts)]
        rect = patches.Rectangle((left, 0), (right - left), pred_box_height, linewidth=.3,
                                 edgecolor=colors['edge'], facecolor=color_map[pred])
        ax.add_patch(rect)
        preds.append(pred)

    if 'legend' in show:
        add_classifier_legend(ax, class_names, class_values, color_map, target_name, colors)

    if 'title' in show:
        accur = ct.score(x_train.reshape(-1, 1), y_train)
        title = f"Classifier tree depth {max_depth}, training accuracy={accur*100:.2f}%"
        plt.title(title, fontsize=fontsize, color=colors['title'])

    if 'splits' in show:
        for split in splits:
            plt.plot([split, split], [*ax.get_ylim()], '--', color=colors['split_line'], linewidth=1)
Example #14
0
def rtreeviz_bivar_3D(ax, X_train, y_train, max_depth, feature_names, target_name,
                      fontsize=14, ticks_fontsize=10, fontname="Arial",
                      azim=0, elev=0, dist=7,
                      show={'title'},
                      colors=None,
                      n_colors_in_map = 100
                      ) -> tree.DecisionTreeClassifier:
    """
    Show 3D feature space for bivariate regression tree. X_train can
    have lots of features but features lists indexes of 2 features to train tree with.
    """
    if isinstance(X_train, pd.DataFrame):
        X_train = X_train.values
    if isinstance(y_train, pd.Series):
        y_train = y_train.values

    colors = adjust_colors(colors)

    ax.view_init(elev=elev, azim=azim)
    ax.dist = dist

    def plane(node, bbox):
        x = np.linspace(bbox[0], bbox[2], 2)
        y = np.linspace(bbox[1], bbox[3], 2)
        xx, yy = np.meshgrid(x, y)
        z = np.full(xx.shape, node.prediction())
        # print(f"{node.prediction()}->{int(((node.prediction()-y_lim[0])/y_range)*(n_colors_in_map-1))}, lim {y_lim}")
        # print(f"{color_map[int(((node.prediction()-y_lim[0])/y_range)*(n_colors_in_map-1))]}")
        ax.plot_surface(xx, yy, z, alpha=.85, shade=False,
                        color=color_map[int(((node.prediction()-y_lim[0])/y_range)*(n_colors_in_map-1))],
                        edgecolor=colors['edge'], lw=.3)

    rt = tree.DecisionTreeRegressor(max_depth=max_depth)
    rt.fit(X_train, y_train)

    y_lim = np.min(y_train), np.max(y_train)
    y_range = y_lim[1] - y_lim[0]
    color_map = [rgb2hex(c.rgb, force_long=True) for c in Color(colors['color_map_min']).range_to(Color(colors['color_map_max']),
                                                                                                  n_colors_in_map)]
    color_map = [color_map[int(((y-y_lim[0])/y_range)*(n_colors_in_map-1))] for y in y_train]

    shadow_tree = ShadowDecTree(rt, X_train, y_train, feature_names=feature_names)
    tesselation = shadow_tree.tesselation()

    for node, bbox in tesselation:
        plane(node, bbox)

    x, y, z = X_train[:, 0], X_train[:, 1], y_train
    ax.scatter(x, y, z, marker='o', alpha=.7, edgecolor=colors['scatter_edge'], lw=.3, c=color_map)

    ax.set_xlabel(f"{feature_names[0]}", fontsize=fontsize, fontname=fontname, color=colors['axis_label'])
    ax.set_ylabel(f"{feature_names[1]}", fontsize=fontsize, fontname=fontname, color=colors['axis_label'])
    ax.set_zlabel(f"{target_name}", fontsize=fontsize, fontname=fontname, color=colors['axis_label'])

    ax.tick_params(axis='both', which='major', width=.3, labelcolor=colors['tick_label'], labelsize=ticks_fontsize)

    if 'title' in show:
        accur = rt.score(X_train, y_train)
        title = f"Regression tree depth {max_depth}, training $R^2$={accur:.3f}"
        plt.title(title, fontsize=fontsize, color=colors['title'])

    return None