Beispiel #1
0
def plot_tree(tree_classifier,data_X,data_y,max_depth=None,proportion=False, rotate=False):
    dtree_graph = Source(export_graphviz(tree_classifier,
                                             out_file=None, 
                                             feature_names = [str(x) for x in list(data_X.columns.values)],
                                             class_names = [str(x) for x in list(data_y.unique())],
                                             rounded = True,
                                             proportion = proportion, 
                                             precision = 2,
                                             filled = True,
                                             max_depth=max_depth,
                                             rotate=rotate))

    return dtree_graph.pipe(format='png')
    return display(SVG(dtree_graph.pipe(format='svg')))
Beispiel #2
0
def train_and_visualize(x, y):
    from sklearn.model_selection import train_test_split

    X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=1)

    from sklearn import tree
    model = tree.DecisionTreeClassifier()
    model.fit(X_train, y_train)
    y_predict = model.predict(X_test)

    from sklearn.metrics import accuracy_score, f1_score

    print("Accuracy: ", accuracy_score(y_test, y_predict))
    print("F1 score: ", f1_score(y_test, y_predict, average='macro'))
    #roc_curve(y_test,y_predict)

    from sklearn import tree
    from IPython.display import SVG
    from graphviz import Source
    from IPython.display import display

    # Create DOT data
    graph = Source(
        tree.export_graphviz(model,
                             out_file=None,
                             feature_names=[
                                 'mean_gps', 'stdv_gps', 'mean_dp', 'stdv_dp',
                                 'mean_voxel', 'stdv_voxel'
                             ],
                             class_names=[str(c) for c in range(1, 15)],
                             filled=True,
                             special_characters=True))
    display(SVG(graph.pipe(format='svg')))
def plot_tree(train_features: pd.DataFrame,
              train_target: pd.DataFrame,
              feature_names: list,
              split: str,
              depth: int,
              min_split: float,
              min_leaf: float = 0.2):
    """
    Interactive plotting for regression trees.
    Code adapted from https://towardsdatascience.com/interactive-visualization-of-decision-trees-with-jupyter-widgets-ca15dd312084
    :param train_features:
    :param train_target:
    :param feature_names:
    :param split:
    :param depth:
    :param min_split:
    :param min_leaf:
    :return:
    """
    model = DecisionTreeRegressor(random_state=0,
                                  splitter=split,
                                  max_depth=depth,
                                  min_samples_split=min_split,
                                  min_samples_leaf=min_leaf)
    model.fit(train_features, train_target)
    graph = Source(
        tree.export_graphviz(model,
                             out_file=None,
                             feature_names=feature_names,
                             filled=True))
    display(SVG(graph.pipe(format='svg')))
    return model
def plot_tree_rf(crit=["gini", "entropy"],
                 bootstrap=["True", "False"],
                 depth=IntSlider(min=1,max=30,value=3, continuous_update=False),
                 forests=IntSlider(min=1,max=200,value=100,continuous_update=False),
                 min_split=IntSlider(min=2,max=5,value=2, continuous_update=False),
                 min_leaf=IntSlider(min=1,max=5,value=1, continuous_update=False)):
    
    estimator = RandomForestClassifier(random_state=1,
                                       criterion=crit,
                                       bootstrap=bootstrap,
                                       n_estimators=forests,
                                       max_depth=depth,
                                       min_samples_split=min_split,
                                       min_samples_leaf=min_leaf,
                                       n_jobs=-1,
                                      verbose=False).fit(X_train, y_train)

    print('Random Forest Training Accuracy: {:.3f}'.format(accuracy_score(y_train, estimator.predict(X_train))))
    print('Random Forest Test Accuracy: {:.3f}'.format(accuracy_score(y_test, estimator.predict(X_test))))
    num_tree = estimator.estimators_[0]
    print('\nVisualizing Decision Tree:', 0)
    
    graph = Source(tree.export_graphviz(num_tree,
                                        out_file=None,
                                        feature_names=X_train.columns,
                                        class_names=['0', '1'],
                                        filled = True))
    
    display(Image(data=graph.pipe(format='png')))
    
    return estimator
def decision_plot(new_X_train2, new_y_train2, feature_names, test, model,
                  classify):
    dt = DecisionTreeClassifier(random_state=0,
                                criterion='entropy',
                                max_depth=1)

    dt.fit(new_X_train2, new_y_train2)
    if classify == 'rf':
        print("Decision Tree Predicts for Instance:" + str(dt.predict(test)) +
              " and Random Forests predicted:" + str(model.predict(test)))
    elif classify == 'xg':
        print("Decision Tree Predicts for Instance:" + str(dt.predict(test)) +
              " and XGboost predicted:" + str(model.predict(test)))

    fidelityPreds = dt.predict(new_X_train2)
    print("Let's see fidelity", accuracy_score(new_y_train2, fidelityPreds))

    graph = Source(
        export_graphviz(dt,
                        out_file=None,
                        feature_names=feature_names,
                        class_names=dt.classes_,
                        filled=True))
    display(SVG(graph.pipe(format='svg')))
    print("Lets find out the path for this specific instance!")
    for i in dt.decision_path(test):
        print(i)
    return dt
def plot_decision_tree(clf, feature_names, class_names):
    # generate dot-file
    dot = tree.export_graphviz(clf,
                               out_file=None,
                               feature_names=feature_names,
                               class_names=sorted(class_names),
                               filled=True,
                               rounded=True,
                               label='all',
                               impurity=False,
                               proportion=False,
                               leaves_parallel=False,
                               rotate=True,
                               special_characters=True)
    # highlight the rules in bold
    lines = []
    for line in dot.split('\n'):
        prettyfied_line = re.sub(r'(.*label=<)(.*)(<br/>samples = .*)',
                                 r'\1<b>\2</b>\3', line)
        lines.append(prettyfied_line)
    dot = '\n'.join(lines)

    # plot the graph inline
    graph = Source(dot)
    display(SVG(graph.pipe(format='svg')))
def plot_tree(crit=["gini", "entropy"],
              split=["best", "random"],
              depth=IntSlider(min=1,max=30,value=2, continuous_update=False),
              min_split=IntSlider(min=2,max=5,value=2, continuous_update=False),
              min_leaf=IntSlider(min=1,max=5,value=1, continuous_update=False)):
    
    estimator = DecisionTreeClassifier(random_state=0,
                                       criterion=crit,
                                       splitter = split,
                                       max_depth = depth,
                                       min_samples_split=min_split,
                                       min_samples_leaf=min_leaf)
    estimator.fit(X_train, y_train)
    print('Decision Tree Training Accuracy: {:.3f}'.format(accuracy_score(y_train, estimator.predict(X_train))))
    print('Decision Tree Test Accuracy: {:.3f}'.format(accuracy_score(y_test, estimator.predict(X_test))))

    graph = Source(tree.export_graphviz(estimator,
                                        out_file=None,
                                        feature_names=X_train.columns,
                                        class_names=['0', '1'],
                                        filled = True))
    
    display(Image(data=graph.pipe(format='png')))
    
    return estimator
Beispiel #8
0
def viz_render_to_svg(template_path, *args, engine='dot', **kwargs):
    # engine = kwargs.pop('engine', 'dot')
    logger.debug(f'load dot template at {template_path}')
    dot_source = loader.render_to_string(template_path, *args, **kwargs)
    logger.debug(f'result dot_source={dot_source}', dot_source)
    src = Source(dot_source, format='svg', engine=engine)
    return src.pipe()
Beispiel #9
0
def print_model_results(model_results, parameter_name, y_label = "accuracy", tree_mode_label = 'best_model'):
    print("BEST PERFORMANCE TREE,", parameter_name, "=", model_results['best_parameter'], ", " +  y_label + " = {:.2f}%".format(model_results['best_metric']))
    print("WORST PERFORMANCE TREE,", parameter_name, "=", model_results['worst_parameter'], ", " +  y_label + " = {:.2f}%".format(model_results['worst_metric']))
    graph = Source(tree.export_graphviz(model_results[tree_mode_label], out_file=None, 
                                        class_names=['Negative', 'Positive'], filled=True,rounded=True,
                                        special_characters=True))
    display(SVG(graph.pipe(format='svg')))
Beispiel #10
0
 def plot_tree_helper(crit, split, depth, min_samples_split):
     estimator = DecisionTreeClassifier(
         random_state=0,
         criterion=crit,
         splitter=split,
         min_samples_split=min_samples_split,
         class_weight=class_weight,
         max_depth=max_depth,
         max_features=max_features,
         max_leaf_nodes=max_leaf_nodes,
         min_impurity_decrease=min_impurity_decrease,
         min_impurity_split=min_impurity_split,
         min_samples_leaf=min_samples_leaf,
         min_weight_fraction_leaf=min_weight_fraction_leaf,
         presort=presort)
     estimator.fit(data.data, data.target)
     graph = Source(
         tree.export_graphviz(
             estimator,
             out_file=None,
             feature_names=data.feature_names,
             class_names=[str(name) for name in np.unique(data.target)],
             filled=True))
     display(SVG(graph.pipe(format='svg')))
     return graph
Beispiel #11
0
 def analyzeEndpoint():
     form = MyForm()
     if form.validate_on_submit():
         profile = request.form.get('profile')
         start = time.time()
         CFGForest, taintedSink, stats, err = analyze(
             form.source_code.data, profile)
         end = time.time()
         timetaken = (end - start)
         if err:
             return json.dumps({"error": err.message}), 400
         sinks = []
         profileJson = profiles.getProfiles()
         if profile in profileJson:
             profile = profileJson[profile]
         for sink in taintedSink:
             sinkDict = dict()
             sinkDict['startLine'] = sink.startLine
             sinkDict['endLine'] = sink.endLine
             sinkDict['state'] = sink.state
             sinks.append(sinkDict)
         graph = ""
         if CFGForest:
             graphviz = Source(CFGForest.generateGraphViz(True))
             graphviz.format = 'svg'
             graph = graphviz.pipe().decode('utf-8')
     return json.dumps({
         'sinks': sinks,
         'graph': graph,
         'stats': stats,
         'profile': profile,
         'timetaken': timetaken
     })
def plot_tree_rf(crit=['gini','entropy'],
                 bootstrap=['True','False'],
                 depth=IntSlider(min=1,max=30,value=2,  continuous_update=False),
                 forests=IntSlider(min=1,max=200,value=100, continuous_update=False),
                 min_split=IntSlider(min=2,max=5,value=2, continous_update=False),
                 min_leaf=IntSlider(min=1,max=5,value=1, continuous_update=False)):
    estimator = DecisionTreeClassifer(random_state=0,
                                      criterion = crit,
                                      bootstrap = bootstrap,
                                      n_estimators = forests,
                                      max_depth = depth,
                                      min_samples_split=min_split,
                                      min_samples_split=min_leaf,
                                      n_jobs = -1,
                                      verbose= False).fit(X_train,y_train)
    estimator.fit(X_train, y_train)
                                      
    print('Decision tree tranning accuracy: {:.3f}'.format(accuracy_score(y_train,estimator.predict(X_train))))
    print('Decision tree tranning accuracy: {:.3f}'.format(accuracy_score(y_test,estimator.predict(X_test))))
    num_tree = estimator.estimators_[0]
    print('\Visualizing tree:',0)         
                                     
    graph = Source(tree.export_graphviz(num_tree,
                                        out_file=None,
                                        feature_names=X_train.columns,
                                        class_names=['stayed','quit'],
                                        filled=True))
    
    display(Image(data=graph.pipe(format='png')))
Beispiel #13
0
    def plot_regression_tree(crit, split, depth, min_split, min_leaf):
        estimator = DecisionTreeRegressor(
            random_state=0,
            criterion=crit,
            splitter=split,
            max_depth=depth,
            min_samples_split=min_split,
            min_samples_leaf=min_leaf,
        )

        estimator.fit(X_train, y_train)

        graph = Source(
            export_graphviz(
                estimator,
                out_file=None,
                feature_names=list(X_train.columns),
                filled=True,
                rounded=True,
                impurity=False,
            )
        )

        display(SVG(graph.pipe(format="svg")))

        return estimator
Beispiel #14
0
    def do_execute(
        self, code, silent, store_history=True, user_expressions=None, allow_stdin=False
    ):
        src = Source(code)
        has_error = False
        try:
            png_src = src.pipe(format="png")
        except subprocess.CalledProcessError as _called_error:
            has_error = True
            error = _called_error.stderr
        # send response to web client
        if not silent:
            if not has_error:
                data = urllib.parse.quote(base64.b64encode(png_src))
                width, height = imgsize.get_png_size(png_src)
                stream_content = {
                    "metadata": {"image/png": {"width": width, "height": height}},
                    "data": {"image/png": data},
                }

                self.send_response(self.iopub_socket, "display_data", stream_content)
            else:
                stream_content = {"name": "stdout", "text": error.decode()}
                self.send_response(self.iopub_socket, "stream", stream_content)

        return {
            "status": "ok",
            # The base class increments the execution count
            "execution_count": self.execution_count,
            "payload": [],
            "user_expressions": {},
        }
def plot_tree(
    crit=['gini', 'entropy'],
    split=['best', 'split'],
    depth=IntSlider(min=1, max=30, value=2, continuous_update=False),
    min_split=IntSlider(min=1, max=5, value=2, continuous_update=False),
    # number of samples to split an internal node
    min_leaf=IntSlider(min=1, max=5, value=1, continuous_update=False)
):  # min number of samples, required for internal leaf node

    estimator = DecisionTreeClassifier(random_state=0,
                                       criterion=crit,
                                       splitter=split,
                                       max_depth=depth,
                                       min_samples_split=min_split,
                                       min_samples_leaf=min_leaf)
    estimator.fit(x_train, y_train)

    print('decision tree training accuracy: {:.3f}'.format(
        accuracy_score(y_train, estimator.predict(x_train))))
    print('decision tree test accuracy: {:.3f}'.format(
        accuracy_score(y_test, estimator.predict(x_test))))

    graph = Source(
        tree.export_graphviz(estimator,
                             out_file=None,
                             feature_names=x_train.columns,
                             class_names=['stayed', 'quit'],
                             filled=True))
    display(Image(data=graph.pipe(format='png')))
    return estimator
Beispiel #16
0
 def to_png(self, output_path: str):
     graph = Source(
         tree.export_graphviz(self.clf,
                              out_file=None,
                              feature_names=self.contexts))
     png_bytes = graph.pipe(format='png')
     with open(output_path, 'wb') as f:
         f.write(png_bytes)
Beispiel #17
0
 def show_graph(self):
     with open("Models/decision_tree.pkl", 'rb') as f:
         saved = pickle.load(f)
     #tree.plot_tree()
     from graphviz import Source
     graph = Source(tree.export_graphviz(saved['clf'], out_file=None))
     png_bytes = graph.pipe(format='png')
     with open('dtree_pipe.png', 'wb') as f:
         f.write(png_bytes)
Beispiel #18
0
def display_tree(tree_model, cols):
    """
    tree_model: a decision tree model (e.g. an estimator from random forest model)
    """
    from graphviz import Source
    from IPython.display import SVG
    from sklearn import tree
    graph = Source(tree.export_graphviz(tree_model, out_file=None, feature_names= cols))
    SVG(graph.pipe(format='svg'))
Beispiel #19
0
    def drawNumpy1DArray(
        self,
        array,
        showIndex=False,
        layout="row",
    ):
        maxLen = 0
        for i in range(array.shape[0]):
            val = str(array[i])
            if len(val) > maxLen:
                maxLen = len(val)

        size = 20 + 7 * maxLen
        if layout == "row":
            strArray = "<TR>"
            for i in range(array.shape[0]):
                strArray = strArray + '<TD border="1" fixedsize="true" width="' + str(
                    size) + '" height="' + str(size) + '">' + str(
                        array[i]) + '</TD>'
            strArray = strArray + '</TR>'
            if showIndex:
                strArray = strArray + "<TR>"
                for i in range(array.shape[0]):
                    strArray = strArray + '<TD border="0" fixedsize="true" width="' + str(
                        size) + '" height="' + str(size) + '">' + str(
                            i) + '</TD>'
                strArray = strArray + '</TR>'
        elif layout == "column":
            strArray = ""
            for i in range(array.shape[0]):
                if not showIndex:
                    strArray = strArray + '<TR><TD border="1" fixedsize="true" width="' + str(
                        size) + '" height="' + str(size) + '">' + str(
                            array[i]) + '</TD></TR>'
                else:
                    strArray = strArray + '<TR><TD border="0" fixedsize="true" width="' + str(
                        size
                    ) + '" height="' + str(size) + '">' + str(
                        i
                    ) + '</TD><TD border="1" fixedsize="true" width="' + str(
                        size) + '" height="' + str(size) + '">' + str(
                            array[i]) + '</TD></TR>'

        if not self.animation:
            src = Source(
                'graph "Array" { node [fontsize=15, shape=plaintext]; a0 [label=< <TABLE border="0" cellspacing="0" cellpadding="3">'
                + strArray + '</TABLE> >] }')
            src.render('lista.gv', view=True)
            display(SVG(src.pipe(format='svg')))
            return None
        else:
            src = Source(
                'graph "Array" { node [fontsize=15, shape=plaintext]; a0 [label=< <TABLE border="0" cellspacing="0" cellpadding="3">'
                + strArray + '</TABLE> >] }',
                format='png')
            return src
 def visualize(treeModel, featuresList, targetValues):
     dot_data=tree.export_graphviz(treeModel, out_file=None,
                                   feature_names=featuresList,  
                                   class_names=targetValues,  
                                   filled=True, rounded=True,  
                                   special_characters=True)
     graph = Source(dot_data)
     graph_png=graph.pipe(format='png')
     graph_url=base64.b64encode(graph_png).decode('utf-8')
     return 'data:image/png;base64,{}'.format(graph_url)
Beispiel #21
0
def export_graph_tree(decision_tree, class_names, file_name):
    graph = Source(
        export_graphviz(decision_tree,
                        out_file=None,
                        feature_names=features,
                        class_names=class_names,
                        filled=True,
                        max_depth=3))
    png_bytes = graph.pipe(format='png')
    with open(file_name + '.png', 'wb') as f:
        f.write(png_bytes)
Beispiel #22
0
def generate(engine, outformat, graphviz_code):
    src = Source(graphviz_code, engine=engine, format=outformat)

    app.logger.info("/".join(("request is ", engine, outformat, graphviz_code)))

    fb = io.BytesIO(src.pipe())

    app.logger.debug(fb.getvalue())
    app.logger.debug(outformat)

    return send_file(fb, mimetype=mimetypes.types_map['.'+outformat])
Beispiel #23
0
def treeGraph(classificator, features) -> None:
    print(len(features))
    graph = Source(export_graphviz(classificator, out_file=None,
                                   filled=True, rounded=True,
                                   special_characters=True, feature_names=features.columns,
                                   class_names=['0', '1']
                                   ))
    png_bytes = graph.pipe(format='png')
    with open('classificationGraphModel.png', 'wb') as f:
        f.write(png_bytes)
    Image(png_bytes)
    print("Complete png_create")
Beispiel #24
0
def main(args=None):
    parser = ArgumentParser()
    parser.add_argument('--graphviz', action='store_true')
    parser.add_argument('--clipboard', action='store_true')
    parser.add_argument('--web', action='store_true')
    parser.add_argument('--width', type=str)
    parser.add_argument('--height', type=str)
    parser.add_argument('--preserve', type=str)
    parser.add_argument(
        'infile',
        nargs='?',
        type=str,
        help='a graphviz file to be validated or pretty-printed',
        default=sys.stdin)
    parser.add_argument('outfile',
                        nargs='?',
                        type=FileType('w'),
                        help='write the output of infile to outfile',
                        default=sys.stdout)
    parsed_args = parser.parse_args(args)

    graphviz = parsed_args.graphviz
    clipboard = parsed_args.clipboard
    web = parsed_args.web
    width = parsed_args.width
    height = parsed_args.height
    preserve = parsed_args.preserve

    # file-like object
    infile = parsed_args.infile
    outfile = parsed_args.outfile

    output = partial(iterm2_img_format,
                     preserve=preserve,
                     width=width,
                     height=height)

    with outfile:
        if graphviz:
            from graphviz import Source
            with infile:
                gv = Source(infile.read())
            outfile.write(output(gv.pipe('png')))
        elif clipboard:
            outfile.write(output(get_clipboard_image()))
        elif web:
            resp = requests.get(infile)
            outfile.write(output(BytesIO(resp.content)))
        else:
            outfile.write(output(infile))
Beispiel #25
0
 def plot_tree_helper(crit, split, depth):
     estimator = DecisionTreeClassifier(
           random_state = 0
         , criterion = crit
         , splitter = split
         , max_depth = depth)
     estimator.fit(data.data, data.target)
     graph = Source(tree.export_graphviz(estimator
             , out_file = None
             , feature_names=data.feature_names
             , class_names=[str(name) for name in np.unique(data.target)]
             , filled = True))
     display(SVG(graph.pipe(format='svg')))
     return graph
Beispiel #26
0
def plot_tree(crit=['gini','entropy'],
              bootstrap=['True','False'],
              depth = IntSlider(min=1,max=30,value=2,continous_update=False),
              forests = IntSlider(min=1,max=200,value=100,continous_update=False),
              min_split=IntSlider(min=2,max=5,value=2,continous_update=False),
              min_leaf=IntSlider(min=1,max=5,value=1,continous_update=False)):
    estimator = RandomForestClassifier(random_state=1,criterion=crit,bootstrap=bootstrap,max_depth=depth,min_samples_split=min_split,min_samples_leaf=min_leaf,n_jobs=-1,verbose=False)
    estimator.fit(X_train,y_train)
    print(accuracy_score(y_train,estimator.predict(X_train)))
    print(accuracy_score(y_test,estimator.predict(X_test)))
    num_tree = estimator.estimators_[0]
    graph = Source(tree.export_graphviz(num_tree,out_file=None,feature_names=X_train.columns,class_names=['0','1'],filled=True))
    display(Image(data=graph.pipe(format='png')))
    return estimator
Beispiel #27
0
def plot_decision_tree(clf):
    '''
    Function for the classification task - Plots the structure of the trained decision tree
    '''
    features = np.array(['issuercountry', 'txvariantcode', 'issuer_id', 'amount', 'currencycode',
                         'shoppercountry', 'interaction', 'verification', 'cvcresponse', 'creationdate_stamp',
                         'accountcode', 'mail_id', 'ip_id', 'card_id'])
    fearure_nums = [0, 4, 5, 8, 10, 12]
    graph = Source(export_graphviz(clf, out_file=None, max_depth=3, feature_names=features[fearure_nums],
                    class_names=['benign', 'fraudulent'], filled=True, rounded=True, special_characters=True,
                    proportion=False, precision=2))
    png_bytes = graph.pipe(format='png')
    with open('dtree.png', 'wb') as f:
        f.write(png_bytes)
Beispiel #28
0
    def draw_tree(self, tree, root):
        self.counterNull = 0
        self.counterNodes = 0

        B = self.copy_tree(getattr(tree, root))
        x, y, z = self.compute_position(B)

        listNodes = self.encode_nodes(B)
        listStr = self.encode_edges(B)

        src = Source('graph "Arbol" { rankdir=TB; ' + listNodes +
                     ' node[shape=circle] ' + listStr + ' }')
        src.engine = "neato"
        src.render('lista.gv', view=True)
        display(SVG(src.pipe(format='svg')))
Beispiel #29
0
    def draw_linked_list(self, nList):
        listStr = ''

        if self.strHeader != "":
            listStr = f'HEAD [shape=plaintext label="{self.strHeader}"];\n'
        listStr += 'NULL [shape=square label=""];\n'

        for position, label in self.pointers.items():
            listStr += f'_label_pos{position} [shape=plaintext label="{label}"];\n'

        listStr += 'node[shape=circle];\n'
        if self.strHeader != "":
            listStr += 'HEAD -> '

        p = getattr(nList, self.fieldHeader)
        position = 0
        pointedNodes = []
        while p is not None:
            nodeData = str(getattr(p, self.fieldData))
            p = getattr(p, self.fieldLink)

            if position in self.pointers:
                listStr += f'_nodo_pos{position} -> '
                pointedNodes.append(
                    f'_nodo_pos{position} [shape=circle label="{nodeData}"];\n'
                    f'_label_pos{position} -> _nodo_pos{position};\n'
                    f'{{ rank="same"; _label_pos{position}; _nodo_pos{position} }};'
                )
            else:
                listStr += f'{nodeData} -> '
            position += 1

        listStr += 'NULL;\n'

        listStr += '\n'.join(pointedNodes)
        if position in self.pointers:  # Last position is NULL
            listStr += f'''
        _label_pos{position} -> NULL;
        {{ rank="same"; _label_pos{position}; NULL }} '''

        if len(self.pointers) > 1 and max(self.pointers) > position:
            raise SegmentationFault(
                f'Tried to draw a pointer to node {max(self.pointers)}, but list length is {position}.'
            )

        src = Source('digraph "Lista" { rankdir=LR; ' + listStr + ' }')
        src.render('lista.gv', view=True)
        display(SVG(src.pipe(format='svg')))
Beispiel #30
0
def part4():
    print('part 4 -----------------------------------------')
    x_train, y_train, x_test, y_test, features_names = load_data(
        'train.csv', 'test.csv')
    classifier = tree.DecisionTreeClassifier("entropy",
                                             min_samples_split=27,
                                             random_state=2)
    classifier.fit(x_train, y_train)
    graph = Source(
        tree.export_graphviz(classifier,
                             out_file=None,
                             feature_names=features_names,
                             class_names=['-', '+']))
    png_bytes = graph.pipe(format='png')
    with open('dtree_pipe_4.png', 'wb') as f:
        f.write(png_bytes)
Beispiel #31
0
 def visualize_dtree(self, estimator, file_id):
     dot_file = './files/{}.dot'.format(file_id)
     pdf_file = './files/{}.pdf'.format(file_id)
     try:
         graph = Source(export_graphviz(estimator, out_file=None,
                                        feature_names=list(self.x_train.columns),
                                        class_names=list(map(str, list(self.y_train.unique()))), filled=True))
         display(SVG(graph.pipe(format='svg')))
         with open(dot_file, "w") as f:
             export_graphviz(estimator, out_file=f,
                             feature_names=list(self.x_train.columns),
                             class_names=list(map(str, list(self.y_train.unique()))),
                             filled=True, rounded=True)
         check_output("dot -Tpdf " + dot_file + " -o " + pdf_file, shell=True)
     except Exception as err:
         print('Cannot visualize decision trees')
         print(err)
Beispiel #32
0
def main():
    """Main function."""
    args = get_args()
    # Read the whole output file
    with open(args.heinz) as r:
        graph_dot = r.readlines()

    # Remove the redundant lines
    while not graph_dot[0].startswith('graph G {'):
        graph_dot.pop(0)

    src = Source(''.join(graph_dot))
    data_pdf = src.pipe('pdf')
    # Redirect the output (very important)
    with open(args.output, 'wb') as w:
        w.write(data_pdf)
    print('The visualization is saved as PDF!')
    sys.exit(0)
plt.ylabel('Feature')
t = plt.title('Feature Importances for Decision Tree')


# ## Visualize the Decision Tree

# In[21]:

from graphviz import Source
from sklearn import tree
from IPython.display import Image

graph = Source(tree.export_graphviz(wqp_dt, out_file=None, class_names=wqp_label_names,
                                    filled=True, rounded=True, special_characters=False,
                                    feature_names=wqp_feature_names, max_depth=3))
png_data = graph.pipe(format='png')
with open('dtree_structure.png','wb') as f:
    f.write(png_data)

Image(png_data)


# ## Train, Predict & Evaluate Model using Random Forests

# In[22]:

from sklearn.ensemble import RandomForestClassifier
# train the model
wqp_rf = RandomForestClassifier()
wqp_rf.fit(wqp_train_SX, wqp_train_y)
# predict and evaluate performance
 def _text_to_graphiz(self, text):
     """create a graphviz graph from text"""
     dot = Source(text, format='svg')
     return dot.pipe().decode('utf-8')