def plot_tree(tree_classifier,data_X,data_y,max_depth=None,proportion=False, rotate=False): dtree_graph = Source(export_graphviz(tree_classifier, out_file=None, feature_names = [str(x) for x in list(data_X.columns.values)], class_names = [str(x) for x in list(data_y.unique())], rounded = True, proportion = proportion, precision = 2, filled = True, max_depth=max_depth, rotate=rotate)) return dtree_graph.pipe(format='png') return display(SVG(dtree_graph.pipe(format='svg')))
def train_and_visualize(x, y): from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=1) from sklearn import tree model = tree.DecisionTreeClassifier() model.fit(X_train, y_train) y_predict = model.predict(X_test) from sklearn.metrics import accuracy_score, f1_score print("Accuracy: ", accuracy_score(y_test, y_predict)) print("F1 score: ", f1_score(y_test, y_predict, average='macro')) #roc_curve(y_test,y_predict) from sklearn import tree from IPython.display import SVG from graphviz import Source from IPython.display import display # Create DOT data graph = Source( tree.export_graphviz(model, out_file=None, feature_names=[ 'mean_gps', 'stdv_gps', 'mean_dp', 'stdv_dp', 'mean_voxel', 'stdv_voxel' ], class_names=[str(c) for c in range(1, 15)], filled=True, special_characters=True)) display(SVG(graph.pipe(format='svg')))
def plot_tree(train_features: pd.DataFrame, train_target: pd.DataFrame, feature_names: list, split: str, depth: int, min_split: float, min_leaf: float = 0.2): """ Interactive plotting for regression trees. Code adapted from https://towardsdatascience.com/interactive-visualization-of-decision-trees-with-jupyter-widgets-ca15dd312084 :param train_features: :param train_target: :param feature_names: :param split: :param depth: :param min_split: :param min_leaf: :return: """ model = DecisionTreeRegressor(random_state=0, splitter=split, max_depth=depth, min_samples_split=min_split, min_samples_leaf=min_leaf) model.fit(train_features, train_target) graph = Source( tree.export_graphviz(model, out_file=None, feature_names=feature_names, filled=True)) display(SVG(graph.pipe(format='svg'))) return model
def plot_tree_rf(crit=["gini", "entropy"], bootstrap=["True", "False"], depth=IntSlider(min=1,max=30,value=3, continuous_update=False), forests=IntSlider(min=1,max=200,value=100,continuous_update=False), min_split=IntSlider(min=2,max=5,value=2, continuous_update=False), min_leaf=IntSlider(min=1,max=5,value=1, continuous_update=False)): estimator = RandomForestClassifier(random_state=1, criterion=crit, bootstrap=bootstrap, n_estimators=forests, max_depth=depth, min_samples_split=min_split, min_samples_leaf=min_leaf, n_jobs=-1, verbose=False).fit(X_train, y_train) print('Random Forest Training Accuracy: {:.3f}'.format(accuracy_score(y_train, estimator.predict(X_train)))) print('Random Forest Test Accuracy: {:.3f}'.format(accuracy_score(y_test, estimator.predict(X_test)))) num_tree = estimator.estimators_[0] print('\nVisualizing Decision Tree:', 0) graph = Source(tree.export_graphviz(num_tree, out_file=None, feature_names=X_train.columns, class_names=['0', '1'], filled = True)) display(Image(data=graph.pipe(format='png'))) return estimator
def decision_plot(new_X_train2, new_y_train2, feature_names, test, model, classify): dt = DecisionTreeClassifier(random_state=0, criterion='entropy', max_depth=1) dt.fit(new_X_train2, new_y_train2) if classify == 'rf': print("Decision Tree Predicts for Instance:" + str(dt.predict(test)) + " and Random Forests predicted:" + str(model.predict(test))) elif classify == 'xg': print("Decision Tree Predicts for Instance:" + str(dt.predict(test)) + " and XGboost predicted:" + str(model.predict(test))) fidelityPreds = dt.predict(new_X_train2) print("Let's see fidelity", accuracy_score(new_y_train2, fidelityPreds)) graph = Source( export_graphviz(dt, out_file=None, feature_names=feature_names, class_names=dt.classes_, filled=True)) display(SVG(graph.pipe(format='svg'))) print("Lets find out the path for this specific instance!") for i in dt.decision_path(test): print(i) return dt
def plot_decision_tree(clf, feature_names, class_names): # generate dot-file dot = tree.export_graphviz(clf, out_file=None, feature_names=feature_names, class_names=sorted(class_names), filled=True, rounded=True, label='all', impurity=False, proportion=False, leaves_parallel=False, rotate=True, special_characters=True) # highlight the rules in bold lines = [] for line in dot.split('\n'): prettyfied_line = re.sub(r'(.*label=<)(.*)(<br/>samples = .*)', r'\1<b>\2</b>\3', line) lines.append(prettyfied_line) dot = '\n'.join(lines) # plot the graph inline graph = Source(dot) display(SVG(graph.pipe(format='svg')))
def plot_tree(crit=["gini", "entropy"], split=["best", "random"], depth=IntSlider(min=1,max=30,value=2, continuous_update=False), min_split=IntSlider(min=2,max=5,value=2, continuous_update=False), min_leaf=IntSlider(min=1,max=5,value=1, continuous_update=False)): estimator = DecisionTreeClassifier(random_state=0, criterion=crit, splitter = split, max_depth = depth, min_samples_split=min_split, min_samples_leaf=min_leaf) estimator.fit(X_train, y_train) print('Decision Tree Training Accuracy: {:.3f}'.format(accuracy_score(y_train, estimator.predict(X_train)))) print('Decision Tree Test Accuracy: {:.3f}'.format(accuracy_score(y_test, estimator.predict(X_test)))) graph = Source(tree.export_graphviz(estimator, out_file=None, feature_names=X_train.columns, class_names=['0', '1'], filled = True)) display(Image(data=graph.pipe(format='png'))) return estimator
def viz_render_to_svg(template_path, *args, engine='dot', **kwargs): # engine = kwargs.pop('engine', 'dot') logger.debug(f'load dot template at {template_path}') dot_source = loader.render_to_string(template_path, *args, **kwargs) logger.debug(f'result dot_source={dot_source}', dot_source) src = Source(dot_source, format='svg', engine=engine) return src.pipe()
def print_model_results(model_results, parameter_name, y_label = "accuracy", tree_mode_label = 'best_model'): print("BEST PERFORMANCE TREE,", parameter_name, "=", model_results['best_parameter'], ", " + y_label + " = {:.2f}%".format(model_results['best_metric'])) print("WORST PERFORMANCE TREE,", parameter_name, "=", model_results['worst_parameter'], ", " + y_label + " = {:.2f}%".format(model_results['worst_metric'])) graph = Source(tree.export_graphviz(model_results[tree_mode_label], out_file=None, class_names=['Negative', 'Positive'], filled=True,rounded=True, special_characters=True)) display(SVG(graph.pipe(format='svg')))
def plot_tree_helper(crit, split, depth, min_samples_split): estimator = DecisionTreeClassifier( random_state=0, criterion=crit, splitter=split, min_samples_split=min_samples_split, class_weight=class_weight, max_depth=max_depth, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, min_impurity_split=min_impurity_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, presort=presort) estimator.fit(data.data, data.target) graph = Source( tree.export_graphviz( estimator, out_file=None, feature_names=data.feature_names, class_names=[str(name) for name in np.unique(data.target)], filled=True)) display(SVG(graph.pipe(format='svg'))) return graph
def analyzeEndpoint(): form = MyForm() if form.validate_on_submit(): profile = request.form.get('profile') start = time.time() CFGForest, taintedSink, stats, err = analyze( form.source_code.data, profile) end = time.time() timetaken = (end - start) if err: return json.dumps({"error": err.message}), 400 sinks = [] profileJson = profiles.getProfiles() if profile in profileJson: profile = profileJson[profile] for sink in taintedSink: sinkDict = dict() sinkDict['startLine'] = sink.startLine sinkDict['endLine'] = sink.endLine sinkDict['state'] = sink.state sinks.append(sinkDict) graph = "" if CFGForest: graphviz = Source(CFGForest.generateGraphViz(True)) graphviz.format = 'svg' graph = graphviz.pipe().decode('utf-8') return json.dumps({ 'sinks': sinks, 'graph': graph, 'stats': stats, 'profile': profile, 'timetaken': timetaken })
def plot_tree_rf(crit=['gini','entropy'], bootstrap=['True','False'], depth=IntSlider(min=1,max=30,value=2, continuous_update=False), forests=IntSlider(min=1,max=200,value=100, continuous_update=False), min_split=IntSlider(min=2,max=5,value=2, continous_update=False), min_leaf=IntSlider(min=1,max=5,value=1, continuous_update=False)): estimator = DecisionTreeClassifer(random_state=0, criterion = crit, bootstrap = bootstrap, n_estimators = forests, max_depth = depth, min_samples_split=min_split, min_samples_split=min_leaf, n_jobs = -1, verbose= False).fit(X_train,y_train) estimator.fit(X_train, y_train) print('Decision tree tranning accuracy: {:.3f}'.format(accuracy_score(y_train,estimator.predict(X_train)))) print('Decision tree tranning accuracy: {:.3f}'.format(accuracy_score(y_test,estimator.predict(X_test)))) num_tree = estimator.estimators_[0] print('\Visualizing tree:',0) graph = Source(tree.export_graphviz(num_tree, out_file=None, feature_names=X_train.columns, class_names=['stayed','quit'], filled=True)) display(Image(data=graph.pipe(format='png')))
def plot_regression_tree(crit, split, depth, min_split, min_leaf): estimator = DecisionTreeRegressor( random_state=0, criterion=crit, splitter=split, max_depth=depth, min_samples_split=min_split, min_samples_leaf=min_leaf, ) estimator.fit(X_train, y_train) graph = Source( export_graphviz( estimator, out_file=None, feature_names=list(X_train.columns), filled=True, rounded=True, impurity=False, ) ) display(SVG(graph.pipe(format="svg"))) return estimator
def do_execute( self, code, silent, store_history=True, user_expressions=None, allow_stdin=False ): src = Source(code) has_error = False try: png_src = src.pipe(format="png") except subprocess.CalledProcessError as _called_error: has_error = True error = _called_error.stderr # send response to web client if not silent: if not has_error: data = urllib.parse.quote(base64.b64encode(png_src)) width, height = imgsize.get_png_size(png_src) stream_content = { "metadata": {"image/png": {"width": width, "height": height}}, "data": {"image/png": data}, } self.send_response(self.iopub_socket, "display_data", stream_content) else: stream_content = {"name": "stdout", "text": error.decode()} self.send_response(self.iopub_socket, "stream", stream_content) return { "status": "ok", # The base class increments the execution count "execution_count": self.execution_count, "payload": [], "user_expressions": {}, }
def plot_tree( crit=['gini', 'entropy'], split=['best', 'split'], depth=IntSlider(min=1, max=30, value=2, continuous_update=False), min_split=IntSlider(min=1, max=5, value=2, continuous_update=False), # number of samples to split an internal node min_leaf=IntSlider(min=1, max=5, value=1, continuous_update=False) ): # min number of samples, required for internal leaf node estimator = DecisionTreeClassifier(random_state=0, criterion=crit, splitter=split, max_depth=depth, min_samples_split=min_split, min_samples_leaf=min_leaf) estimator.fit(x_train, y_train) print('decision tree training accuracy: {:.3f}'.format( accuracy_score(y_train, estimator.predict(x_train)))) print('decision tree test accuracy: {:.3f}'.format( accuracy_score(y_test, estimator.predict(x_test)))) graph = Source( tree.export_graphviz(estimator, out_file=None, feature_names=x_train.columns, class_names=['stayed', 'quit'], filled=True)) display(Image(data=graph.pipe(format='png'))) return estimator
def to_png(self, output_path: str): graph = Source( tree.export_graphviz(self.clf, out_file=None, feature_names=self.contexts)) png_bytes = graph.pipe(format='png') with open(output_path, 'wb') as f: f.write(png_bytes)
def show_graph(self): with open("Models/decision_tree.pkl", 'rb') as f: saved = pickle.load(f) #tree.plot_tree() from graphviz import Source graph = Source(tree.export_graphviz(saved['clf'], out_file=None)) png_bytes = graph.pipe(format='png') with open('dtree_pipe.png', 'wb') as f: f.write(png_bytes)
def display_tree(tree_model, cols): """ tree_model: a decision tree model (e.g. an estimator from random forest model) """ from graphviz import Source from IPython.display import SVG from sklearn import tree graph = Source(tree.export_graphviz(tree_model, out_file=None, feature_names= cols)) SVG(graph.pipe(format='svg'))
def drawNumpy1DArray( self, array, showIndex=False, layout="row", ): maxLen = 0 for i in range(array.shape[0]): val = str(array[i]) if len(val) > maxLen: maxLen = len(val) size = 20 + 7 * maxLen if layout == "row": strArray = "<TR>" for i in range(array.shape[0]): strArray = strArray + '<TD border="1" fixedsize="true" width="' + str( size) + '" height="' + str(size) + '">' + str( array[i]) + '</TD>' strArray = strArray + '</TR>' if showIndex: strArray = strArray + "<TR>" for i in range(array.shape[0]): strArray = strArray + '<TD border="0" fixedsize="true" width="' + str( size) + '" height="' + str(size) + '">' + str( i) + '</TD>' strArray = strArray + '</TR>' elif layout == "column": strArray = "" for i in range(array.shape[0]): if not showIndex: strArray = strArray + '<TR><TD border="1" fixedsize="true" width="' + str( size) + '" height="' + str(size) + '">' + str( array[i]) + '</TD></TR>' else: strArray = strArray + '<TR><TD border="0" fixedsize="true" width="' + str( size ) + '" height="' + str(size) + '">' + str( i ) + '</TD><TD border="1" fixedsize="true" width="' + str( size) + '" height="' + str(size) + '">' + str( array[i]) + '</TD></TR>' if not self.animation: src = Source( 'graph "Array" { node [fontsize=15, shape=plaintext]; a0 [label=< <TABLE border="0" cellspacing="0" cellpadding="3">' + strArray + '</TABLE> >] }') src.render('lista.gv', view=True) display(SVG(src.pipe(format='svg'))) return None else: src = Source( 'graph "Array" { node [fontsize=15, shape=plaintext]; a0 [label=< <TABLE border="0" cellspacing="0" cellpadding="3">' + strArray + '</TABLE> >] }', format='png') return src
def visualize(treeModel, featuresList, targetValues): dot_data=tree.export_graphviz(treeModel, out_file=None, feature_names=featuresList, class_names=targetValues, filled=True, rounded=True, special_characters=True) graph = Source(dot_data) graph_png=graph.pipe(format='png') graph_url=base64.b64encode(graph_png).decode('utf-8') return 'data:image/png;base64,{}'.format(graph_url)
def export_graph_tree(decision_tree, class_names, file_name): graph = Source( export_graphviz(decision_tree, out_file=None, feature_names=features, class_names=class_names, filled=True, max_depth=3)) png_bytes = graph.pipe(format='png') with open(file_name + '.png', 'wb') as f: f.write(png_bytes)
def generate(engine, outformat, graphviz_code): src = Source(graphviz_code, engine=engine, format=outformat) app.logger.info("/".join(("request is ", engine, outformat, graphviz_code))) fb = io.BytesIO(src.pipe()) app.logger.debug(fb.getvalue()) app.logger.debug(outformat) return send_file(fb, mimetype=mimetypes.types_map['.'+outformat])
def treeGraph(classificator, features) -> None: print(len(features)) graph = Source(export_graphviz(classificator, out_file=None, filled=True, rounded=True, special_characters=True, feature_names=features.columns, class_names=['0', '1'] )) png_bytes = graph.pipe(format='png') with open('classificationGraphModel.png', 'wb') as f: f.write(png_bytes) Image(png_bytes) print("Complete png_create")
def main(args=None): parser = ArgumentParser() parser.add_argument('--graphviz', action='store_true') parser.add_argument('--clipboard', action='store_true') parser.add_argument('--web', action='store_true') parser.add_argument('--width', type=str) parser.add_argument('--height', type=str) parser.add_argument('--preserve', type=str) parser.add_argument( 'infile', nargs='?', type=str, help='a graphviz file to be validated or pretty-printed', default=sys.stdin) parser.add_argument('outfile', nargs='?', type=FileType('w'), help='write the output of infile to outfile', default=sys.stdout) parsed_args = parser.parse_args(args) graphviz = parsed_args.graphviz clipboard = parsed_args.clipboard web = parsed_args.web width = parsed_args.width height = parsed_args.height preserve = parsed_args.preserve # file-like object infile = parsed_args.infile outfile = parsed_args.outfile output = partial(iterm2_img_format, preserve=preserve, width=width, height=height) with outfile: if graphviz: from graphviz import Source with infile: gv = Source(infile.read()) outfile.write(output(gv.pipe('png'))) elif clipboard: outfile.write(output(get_clipboard_image())) elif web: resp = requests.get(infile) outfile.write(output(BytesIO(resp.content))) else: outfile.write(output(infile))
def plot_tree_helper(crit, split, depth): estimator = DecisionTreeClassifier( random_state = 0 , criterion = crit , splitter = split , max_depth = depth) estimator.fit(data.data, data.target) graph = Source(tree.export_graphviz(estimator , out_file = None , feature_names=data.feature_names , class_names=[str(name) for name in np.unique(data.target)] , filled = True)) display(SVG(graph.pipe(format='svg'))) return graph
def plot_tree(crit=['gini','entropy'], bootstrap=['True','False'], depth = IntSlider(min=1,max=30,value=2,continous_update=False), forests = IntSlider(min=1,max=200,value=100,continous_update=False), min_split=IntSlider(min=2,max=5,value=2,continous_update=False), min_leaf=IntSlider(min=1,max=5,value=1,continous_update=False)): estimator = RandomForestClassifier(random_state=1,criterion=crit,bootstrap=bootstrap,max_depth=depth,min_samples_split=min_split,min_samples_leaf=min_leaf,n_jobs=-1,verbose=False) estimator.fit(X_train,y_train) print(accuracy_score(y_train,estimator.predict(X_train))) print(accuracy_score(y_test,estimator.predict(X_test))) num_tree = estimator.estimators_[0] graph = Source(tree.export_graphviz(num_tree,out_file=None,feature_names=X_train.columns,class_names=['0','1'],filled=True)) display(Image(data=graph.pipe(format='png'))) return estimator
def plot_decision_tree(clf): ''' Function for the classification task - Plots the structure of the trained decision tree ''' features = np.array(['issuercountry', 'txvariantcode', 'issuer_id', 'amount', 'currencycode', 'shoppercountry', 'interaction', 'verification', 'cvcresponse', 'creationdate_stamp', 'accountcode', 'mail_id', 'ip_id', 'card_id']) fearure_nums = [0, 4, 5, 8, 10, 12] graph = Source(export_graphviz(clf, out_file=None, max_depth=3, feature_names=features[fearure_nums], class_names=['benign', 'fraudulent'], filled=True, rounded=True, special_characters=True, proportion=False, precision=2)) png_bytes = graph.pipe(format='png') with open('dtree.png', 'wb') as f: f.write(png_bytes)
def draw_tree(self, tree, root): self.counterNull = 0 self.counterNodes = 0 B = self.copy_tree(getattr(tree, root)) x, y, z = self.compute_position(B) listNodes = self.encode_nodes(B) listStr = self.encode_edges(B) src = Source('graph "Arbol" { rankdir=TB; ' + listNodes + ' node[shape=circle] ' + listStr + ' }') src.engine = "neato" src.render('lista.gv', view=True) display(SVG(src.pipe(format='svg')))
def draw_linked_list(self, nList): listStr = '' if self.strHeader != "": listStr = f'HEAD [shape=plaintext label="{self.strHeader}"];\n' listStr += 'NULL [shape=square label=""];\n' for position, label in self.pointers.items(): listStr += f'_label_pos{position} [shape=plaintext label="{label}"];\n' listStr += 'node[shape=circle];\n' if self.strHeader != "": listStr += 'HEAD -> ' p = getattr(nList, self.fieldHeader) position = 0 pointedNodes = [] while p is not None: nodeData = str(getattr(p, self.fieldData)) p = getattr(p, self.fieldLink) if position in self.pointers: listStr += f'_nodo_pos{position} -> ' pointedNodes.append( f'_nodo_pos{position} [shape=circle label="{nodeData}"];\n' f'_label_pos{position} -> _nodo_pos{position};\n' f'{{ rank="same"; _label_pos{position}; _nodo_pos{position} }};' ) else: listStr += f'{nodeData} -> ' position += 1 listStr += 'NULL;\n' listStr += '\n'.join(pointedNodes) if position in self.pointers: # Last position is NULL listStr += f''' _label_pos{position} -> NULL; {{ rank="same"; _label_pos{position}; NULL }} ''' if len(self.pointers) > 1 and max(self.pointers) > position: raise SegmentationFault( f'Tried to draw a pointer to node {max(self.pointers)}, but list length is {position}.' ) src = Source('digraph "Lista" { rankdir=LR; ' + listStr + ' }') src.render('lista.gv', view=True) display(SVG(src.pipe(format='svg')))
def part4(): print('part 4 -----------------------------------------') x_train, y_train, x_test, y_test, features_names = load_data( 'train.csv', 'test.csv') classifier = tree.DecisionTreeClassifier("entropy", min_samples_split=27, random_state=2) classifier.fit(x_train, y_train) graph = Source( tree.export_graphviz(classifier, out_file=None, feature_names=features_names, class_names=['-', '+'])) png_bytes = graph.pipe(format='png') with open('dtree_pipe_4.png', 'wb') as f: f.write(png_bytes)
def visualize_dtree(self, estimator, file_id): dot_file = './files/{}.dot'.format(file_id) pdf_file = './files/{}.pdf'.format(file_id) try: graph = Source(export_graphviz(estimator, out_file=None, feature_names=list(self.x_train.columns), class_names=list(map(str, list(self.y_train.unique()))), filled=True)) display(SVG(graph.pipe(format='svg'))) with open(dot_file, "w") as f: export_graphviz(estimator, out_file=f, feature_names=list(self.x_train.columns), class_names=list(map(str, list(self.y_train.unique()))), filled=True, rounded=True) check_output("dot -Tpdf " + dot_file + " -o " + pdf_file, shell=True) except Exception as err: print('Cannot visualize decision trees') print(err)
def main(): """Main function.""" args = get_args() # Read the whole output file with open(args.heinz) as r: graph_dot = r.readlines() # Remove the redundant lines while not graph_dot[0].startswith('graph G {'): graph_dot.pop(0) src = Source(''.join(graph_dot)) data_pdf = src.pipe('pdf') # Redirect the output (very important) with open(args.output, 'wb') as w: w.write(data_pdf) print('The visualization is saved as PDF!') sys.exit(0)
plt.ylabel('Feature') t = plt.title('Feature Importances for Decision Tree') # ## Visualize the Decision Tree # In[21]: from graphviz import Source from sklearn import tree from IPython.display import Image graph = Source(tree.export_graphviz(wqp_dt, out_file=None, class_names=wqp_label_names, filled=True, rounded=True, special_characters=False, feature_names=wqp_feature_names, max_depth=3)) png_data = graph.pipe(format='png') with open('dtree_structure.png','wb') as f: f.write(png_data) Image(png_data) # ## Train, Predict & Evaluate Model using Random Forests # In[22]: from sklearn.ensemble import RandomForestClassifier # train the model wqp_rf = RandomForestClassifier() wqp_rf.fit(wqp_train_SX, wqp_train_y) # predict and evaluate performance
def _text_to_graphiz(self, text): """create a graphviz graph from text""" dot = Source(text, format='svg') return dot.pipe().decode('utf-8')