def plotDTree(dTree, treeName): ''' Visualize decision tree ''' dot_data = StringIO() dot_data.write("graph dtree {\n") nodeList = [dTree] nodeIdxList = [0] while len(nodeList) > 0: latestIdx = nodeIdxList[-1] node = nodeList[0] del nodeList[0] nodeIdx = nodeIdxList[0] del nodeIdxList[0] if node['type'] == LEAF: nodeLabel = "gini: %.3f\nforecast: %d" % (node['gini'], node['fcVal']) dot_data.write("%d [label=\"%s\" shape=box]" % (nodeIdx, nodeLabel)) else: opName = "<=" if node['comparator'] == np.less_equal else "==" nodeLabel = "X[%d]%s%.2f\ngini: %.3f" % ( node['splitFeature'], opName, node['splitValue'], node['gini'], ) dot_data.write("%d [label=\"%s\" shape=box]" % (nodeIdx, nodeLabel)) for key in ['left', 'right']: branch = node.get(key, None) if branch: latestIdx += 1 nodeList.append(branch) nodeIdxList.append(latestIdx) dot_data.write("%d -- %d\n" % (nodeIdx, latestIdx)) dot_data.write("}") filename = "./trees/%s" % treeName graph = graphviz.Source(dot_data.getvalue(), filename=filename, format="png") graph.view()
test_prediction_rate = get_correct_ratio(test_results, test_survived) print('{} : {}'.format("test_prediction_rate", test_prediction_rate)) dot_data = StringIO() export_graphviz(titanic_tree, out_file=dot_data, filled=True, rounded=True, special_characters=True, feature_names=titanic_data_features.columns.values, class_names=["Died", "Survived"]) graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) graph.write_png('tree.png') png_str = graph.create_png(prog='dot') # treat the dot output string as an image file sio = StringIO() sio.write(png_str) sio.seek(0) img = mpimg.imread(sio) # plot the image imgplot = plt.imshow(img, aspect='equal') plt.show(block=False)
Y, test_size=0.3, random_state=1) c5 = DecisionTreeClassifier(criterion='entropy', max_depth=4) c5 = c5.fit(X_train, Y_train) Y_pred = c5.predict(X_test) print("Accuracy:", metrics.accuracy_score(Y_test, Y_pred)) dot_data = StringIO() export_graphviz(c5, out_file='dot_data', filled=True, rounded=True, special_characters=False, feature_names=columnList3) with open("dot_data") as content_file: dot_data.write(content_file.read()) graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) graph.write_png('titanicTree.png') """ Naive Bayes Classification """ nbC = BernoulliNB(alpha=0) nbC = nbC.fit(X, Y) print(nbC.predict(X)) print(nbC.predict_proba(X)) print(nbC.score(X, Y))
#!/usr/bin/env python '''Read and write a string as a file-like object.''' from sklearn.externals.six import StringIO # create a sample mysample = StringIO() mysample.write('My first testing line.') print( mysample) #this only will indicate the location of the file in the memory # retrieve contents using getvalue() content = mysample.getvalue() print(content) # close my sample mysample.close()