def draw_graph(self, x, y): # Decision Tree Graph clf = Id3Estimator() clf.fit(x, y, check_input=True) #clf.predict_proba(x) print(export_text(clf.tree_, self.feature_names)) # export tree.dot as pdf file to write Decision Tree as a graph dot_data = StringIO() #tree.export_graphviz(clf, out_file = dot_data) export_graphviz(clf.tree_, 'SVC_Tree.dot', self.feature_names) graph = pydot.graph_from_dot_file('SVC_Tree.dot') graph[0].write_pdf("SVC_Tree.pdf") clf = DecisionTreeClassifier() clf = clf.fit(x,y) clf.predict(x, check_input=True) clf.predict_proba(x) # version v1 pdf output dot_data = tree.export_graphviz(clf, out_file='SVC_Tree_v1.dot') graph = pydot.graph_from_dot_file('SVC_Tree_v1.dot') graph[0].write_pdf("SVC_Tree_v1.pdf") # version v2 pdf output dot_data = tree.export_graphviz(clf, out_file="SVC_Tree_v2", feature_names=self.feature_names, class_names=self.target, filled=True, rounded=True, special_characters=True) #dot_data = tree.export_graphviz(clf, out_file="Decision-Tree-Regression-v2", feature_names=feature_names, class_names=target.name, filled=True, rounded=True, special_characters=True) graph = graphviz.Source(dot_data) # print graph this is done correct as lang as out_file=None graph # save graph version 2 as pdf data file graph = pydot.graph_from_dot_file('SVC_Tree_v2') graph[0].write_pdf("SVC_Tree_v2.pdf") return True
def Tree(): names = ["tarcza", "czy lata", "wiek", "zbroja", "hp", "level", "potwor"] count = len(open('przypadki.txt', 'rU').readlines()) x = [] for i in range(1, count): line = linecache.getline('przypadki.txt', i).split(" ") line[6] = str(line[6][0]) x.append(line) X = np.asarray(x) print(X) y = np.array([int(i) for i in linecache.getline('wyniki.txt', 1)[:-2]]) yd = [int(i) for i in linecache.getline('wyniki.txt', 1)[:-2]] d = [] d.append(names) d[0].append("wynik") for i in range(0, len(yd)): d.append(x[i] + [yd[i]]) print(d) clf = Id3Estimator() clf.fit(X, y, check_input=True) #d = np.array([['0', '0', '39', '1', '9', '0','1', 't']]) #print(d) #c = clf.predict(d) #print(c) export_graphviz(clf.tree_, "out.dot", names) print(export_text(clf.tree_, names)) return clf
def cpuUsageDecisionTree(self): (X, Y) = self.get_data_from_csv() feature_names = [ "vm_id_map", "timestamp_new", "cpu_usage_percent", "admin_historic_decision_cpu" ] clf = Id3Estimator() clf.fit(X, Y, check_input=True) export_graphviz(clf.tree_, "out.dot", feature_names)
def id3(): headers = pd.read_csv('Task4_Data.csv', nrows=1).columns.values headers = headers[3:6] X = pd.read_csv('Task4_Data.csv').values y = X[:,6] X = X[:,3:6] clf = Id3Estimator() clf.fit(X, y, check_input=True) export_graphviz(clf.tree_, 'tree.dot', headers)
def BuildTree(): feature_names = ["danie", "na ciepło", "z mięsem", "na słodko", "kwaśne", "alkoholowe", "czekoladowe", "wybor"] dataset = ps.read_csv("recommend.csv", header=None, names=feature_names, sep=";") X = dataset.drop('wybor', axis=1) Y = dataset['wybor'] clf = Id3Estimator() X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20) clf.fit(X_train, Y_train) export_graphviz(clf.tree_, "lol.dot", feature_names) return clf
def BuildTree(): # nazwy cech feature_names = [ "pair", "empty_plate", "talking", "mood", "asked", "hurry", "bill" ] Yfeature_names = [ "pair", "empty_plate", "talking", "mood", "asked", "hurry" ] # wczytaj dataset z pliku dane.csv dataset = ps.read_csv("bill.csv", header=None, names=feature_names, sep=";") X = dataset.drop('bill', axis=1) Y = dataset['bill'] # tworzenie drzewa decyzyjnego clf = Id3Estimator() # Podział na dane treningowe i dane testowe X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20) # fit - synonim do "find patterns in data" clf.fit(X_train, Y_train) export_graphviz(clf.tree_, "test.dot", feature_names) model = load_model('third_try.h5') while True: path = random.choice( os.listdir("C://Users/Kinia/Desktop/sztuczna2/SI-master/test")) print(path) img_pred = image.load_img("test/" + path, target_size=(100, 100)) img_pred = image.img_to_array(img_pred) img_pred = np.expand_dims(img_pred, axis=0) rslt = model.predict(img_pred) print(rslt) if rslt[0][0] == 1: prediction = 1 break else: prediction = 0 print(prediction) return [prediction, clf]
def generate_tree(self, max_depth): self.__print( "\n-------------------------------------- Modelling ------------------------------------------\n" ) self.__create_output_dir() self.__print("\n-----DECISION TREE GENERATION-----\n") self.__print("Output file names: ./output/" + self.run_id + "/tree.dot ./output/" + self.run_id + "/tree.png") # the estimator self.estimator = Id3Estimator(max_depth) # suvrived x = self.dataframe.iloc[:, 0] # all attributes except survieved y = self.dataframe.iloc[:, 1:] # all var names except survieved feature_names = list(y.columns.values) # calc the tree self.estimator = self.estimator.fit(y, x) # export as .dot dot_data = export_graphviz(self.estimator.tree_, './output/' + self.run_id + '/tree.dot', feature_names) # create png file #command = ["dot", "-Tpng", './output/' + self.run_id + '/tree.dot', "-o", "./output/" + self.run_id + "/tree.png"] #subprocess.check_call(command, shell=True) command = "dot -Tpng " + './output/' + self.run_id + '/tree.dot' + " -o " + "./output/" + self.run_id + "/tree.png" #Tsvg can be changed to Tjpg, Tpng, Tgif etc (see dot man pages) os.system(command)
basketball_feature_names = ["HomeOrAway", "InTop25", "Media"] X = np.array([ ['Home', 'Out', '1-NBC'], ['Home', 'In', '1-NBC'], ['Away', 'Out', '2-ESPN'], ['Away', 'Out', '3-FOX'], ['Home', 'Out', '1-NBC'], ['Away', 'Out', '4-ABC'], ]) Y = np.array([ 'Win', 'Lose', 'Win', 'Win', 'Win', 'Win', ]) clf = Id3Estimator() clf.fit(X, Y) export_graphviz(clf.tree_, 'hw3/ID3/basketball.dot', feature_names=basketball_feature_names) subprocess.run( ['dot', '-Tpng', 'hw3/ID3/basketball.dot', '-o', 'hw3/ID3/basketball.png']) subprocess.run(['rm', 'hw3/ID3/basketball.dot'])
X, Y, test_size=test_proportion) X_trainf10, X_test, Y_trainf10, Y_test = train_test_split( X, Y, test_size=test_proportion) estimator1 = Id3Estimator() estimator2 = Id3Estimator() estimator3 = Id3Estimator() estimator4 = Id3Estimator() estimator5 = Id3Estimator() # estimator6 = Id3Estimator() estimator7 = Id3Estimator() estimator8 = Id3Estimator() estimator9 = Id3Estimator() estimator10 = Id3Estimator() estimator1.fit(X_trainf1, Y_trainf1) estimator2.fit(X_trainf2, Y_trainf2) estimator3.fit(X_trainf3, Y_trainf3) estimator4.fit(X_trainf4, Y_trainf4) estimator5.fit(X_trainf5, Y_trainf5) # estimator6.fit(X_trainf6, Y_trainf6) estimator7.fit(X_trainf7, Y_trainf7) estimator8.fit(X_trainf8, Y_trainf8) estimator9.fit(X_trainf9, Y_trainf9) estimator10.fit(X_trainf10, Y_trainf10) dot_data = export_graphviz(estimator1.tree_, 'random_forest_tree1.dot') estimator1.predict(X_test)
]].values X[0:5] y = df['CETIFIED'].values y[0:5] #Split data X_trainset, X_testset, y_trainset, y_testset = train_test_split(X, y, test_size=0.3, random_state=3) #Entrenar mdelo irisTree = Id3Estimator() irisTree #predercir irisTree.fit(X_trainset, y_trainset) predTree = irisTree.predict(X_testset) print(predTree[0:5]) print(y_testset[0:5]) #Evaluate print("DecisionTrees's Accuracy: ", metrics.accuracy_score(y_testset, predTree)) #Visualizing tree featureNames = df.columns[0:4] export_graphviz(irisTree.tree_, 'iris.dot', featureNames) #En una cmd ejecutar dot -Tpdf iris.dot -o iris.pdf
:align: center """ from id3 import Id3Estimator, export_graphviz import numpy as np feature_names = ["age", "gender", "sector", "degree"] X = np.array([[45, "male", "private", "m"], [50, "female", "private", "m"], [61, "other", "public", "b"], [40, "male", "private", "none"], [34, "female", "private", "none"], [33, "male", "public", "none"], [43, "other", "private", "m"], [35, "male", "private", "m"], [34, "female", "private", "m"], [35, "male", "public", "m"], [34, "other", "public", "m"], [34, "other", "public", "b"], [34, "female", "public", "b"], [34, "male", "public", "b"], [34, "female", "private", "b"], [34, "male", "private", "b"], [34, "other", "private", "b"]]) y = np.array([ "(30k,38k)", "(30k,38k)", "(30k,38k)", "(13k,15k)", "(13k,15k)", "(13k,15k)", "(23k,30k)", "(23k,30k)", "(23k,30k)", "(15k,23k)", "(15k,23k)", "(15k,23k)", "(15k,23k)", "(15k,23k)", "(23k,30k)", "(23k,30k)", "(23k,30k)" ]) clf = Id3Estimator() clf.fit(X, y, check_input=True) export_graphviz(clf.tree_, "out.dot", feature_names)
feature_list_file = "features_65.txt" feature_list_fh = open(feature_list_file, "r") features = feature_list_fh.read() feature_list_fh.close() feature_list = features.split() feature_list = unique(feature_list) if (model == 0): # ID3 Decision tree estimator = Id3Estimator() max_estimator = Id3Estimator() # To store max model m_scores = np.zeros(num_folds) estimator = estimator.fit(data, np.ravel(target)) tree = export_graphviz(estimator.tree_, 'tree.dot', np.asarray(feature_list).ravel()) #tree = export_graph('tree.dot', 'tree_mine.txt') k_fold = StratifiedKFold( num_folds, shuffle=False) # K-fold data split function from sklearn max_acc_model = 0 for k, (train, test) in enumerate(k_fold.split( data, np.ravel(target))): # Compute accuracy over k folds estimator.fit(data[train], np.ravel(target)[train]) prediction = estimator.predict(data[test]) #tree = export_graphviz(estimator.tree_, 'tree'+str(k)+'.dot', np.asarray(feature_list).ravel()) yTest = np.ravel(target)[test] # target labels - test set m_scores[k] = 1 - ( np.count_nonzero(prediction.astype(int) ^ yTest.astype(int)) / test.size) # Checking for errors and computing score if m_scores[k] > m_scores[max_acc_model]:
]) # Leave-one-out######################################################### n = np.random.randint(0, x.shape[0]) ## select randomly a data test x_test = np.array([x[n]]) x_train = np.delete(x, n, 0) y_test = np.array([y[n]]) y_train = np.delete(y, n, 0) ######################################################################## id3 = Id3Estimator() #################################################### id3.fit(x_train, y_train) ############################################### # Testing y_predict = id3.predict(x_test) ######################################### # Precision print("Precision") print("Input: ", x_test, "| Expected: ", y_test, "| Result: ", y_predict) if np.array_equal(y_test, y_predict): print("100%") else: print("0%") export_graphviz(id3.tree_, 'tree_p1.dot', features) #with open("tree_p1.dot") as f: # dot_graph = f.read() #g = graphviz.Source(dot_graph) #g.render() #g.view()
'cap-shape', 'cap-surface', 'cap-color', 'bruises%3F', 'odor', 'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color', 'stalk-shape', 'stalk-surface-above-ring', 'stalk-surface-below-ring', 'stalk-color-above-ring', 'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-type', 'spore-print-color', 'population', 'habitat' ]].values X[0:5] #Split data X_trainset, X_testset, y_trainset, y_testset = train_test_split(X, y, test_size=0.5, random_state=3) #Entrenar mdelo mushroomTree = Id3Estimator() mushroomTree #predercir mushroomTree.fit(X_trainset, y_trainset) predTree = mushroomTree.predict(X_testset) print(predTree[0:21]) print(y_testset[0:21]) #Evaluate print("Accuracy del árbol de decisión: ", metrics.accuracy_score(y_testset, predTree)) #Visualizing tree featureNames = df.columns[0:21] export_graphviz(mushroomTree.tree_, 'mushroom.dot', featureNames)
import numpy as np feature_names = ['Outlook', 'Temperature', 'Humidity', 'Wind', 'PlayTennis'] inputda = np.array([['Sunny', 'Hot', 'High', 'Weak'], ['Sunny', 'Hot', 'High', 'Strong'], ['Overcast', 'Hot', 'High', 'Weak'], ['Rain', 'Mild', 'High', 'Weak'], ['Rain', 'Cool', 'Normal', 'Weak'], ['Rain', 'Cool', 'Normal', 'Strong'], ['Overcast', 'Cool', 'Normal', 'Strong'], ['Sunny', 'Mild', 'High', 'Weak'], ['Sunny', 'Cool', 'Normal', 'Weak'], ['Rain', 'Mild', 'Normal', 'Weak'], ['Sunny', 'Mild', 'Normal', 'Strong'], ['Overcast', 'Mild', 'High', 'Strong'], ['Overcast', 'Hot', 'Normal', 'Weak'], ['Rain', 'Mild', 'High', 'Strong']]) inputsa = np.array([['No'], ['No'], ['Yes'], ['Yes'], ['Yes'], ['No'], ['Yes'], ['No'], ['Yes'], ['Yes'], ['Yes'], ['Yes'], ['Yes'], ['No']]) inputsa = inputsa.reshape(len(inputsa), ) estimator = Id3Estimator() estimator.fit(inputda, inputsa, check_input=False) export_graphviz(estimator.tree_, 'tree.dot', feature_names) #export_graphviz(estimator,out_file=dot_data,filled=True,rounded=True,special_characters=True) #graph = pydotplus.graph_from_dot_data(dot_data_getvalue()) #graph.write_png("dtree.png") system("dot -Tpng tree.dot > ID3_Rafa_Play_Tennis.png")
y = np.array([ "Höch", "Höch", "Höch", "Niedrig", "Niedrig", "Niedrig", "M-Höch", "M-Höch", "M-Höch", "M-Niedrig", "M-Niedrig", "M-Niedrig", "M-Niedrig", "M-Niedrig", "M-Höch", "M-Höch", "M-Höch" ]) clf = Id3Estimator() clf.fit(X, y, check_input=True) print(export_text(clf.tree_, feature_names)) # export tree.dot as pdf file to write Decision Tree as a graph dot_data = StringIO() #tree.export_graphviz(clf, out_file = dot_data) export_graphviz(clf.tree_, 'Ausfallrisiko.dot', feature_names) graph = pydot.graph_from_dot_file('Ausfallrisiko.dot') graph[0].write_pdf("Ausfallrisiko.pdf") # Accuracy X = np.array([[900, 1, int(True), int(True)], [1000, 0, int(True), int(True)], [1220, 2, int(False), int(False)], [800, 1, int(True), 2], [680, 0, int(True), 2], [660, 1, int(False), 2], [860, 2, int(True), int(False)], [700, 1, int(True), int(True)], [680, 0, int(True), int(True)], [700, 1,
https://pypi.python.org/pypi/decision-tree-id3/0.1.2 """ #from sklearn import tree from id3 import Id3Estimator from id3 import export_graphviz import numpy as np import graphviz # | 0 | 1 | 2 #Outlook | Sunny | Overcast | Rain #Temperature| Hot | Mild | Cool #Humidity | High | Normal | - #Wind | Weak | Strong | - x_labels = ["Outlook", "Temperature", "Humidity", "Wind"] X = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [2, 1, 0, 0], [2, 2, 1, 0], [2, 2, 1, 1], [1, 2, 1, 1], [0, 1, 0, 0], [0, 2, 1, 0], [2, 1, 1, 0], [0, 1, 1, 1], [1, 1, 0, 1], [1, 0, 1, 0], [2, 1, 0, 1]]) Y = np.array([0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]) #clf = tree.DecisionTreeClassifier() clf = Id3Estimator(min_samples_split=3) clf.fit(X, Y) dot_data = export_graphviz(clf.tree_, "decisiontree.dot", x_labels) #predictions = clf.predict(X) #for i in range(len(X)): # print X[i],Y[i],"->",predictions[i]
"exang", "oldpeak", "slope", "ca", "thal", "num" ] with open('AI/Decision_tree/cleveland.csv') as csvfile: reader = csv.DictReader(csvfile, fieldnames=feaure_names) list_all_data = [dat for dat in reader if "?" not in dat.values()] temp_all_data = copy.deepcopy(list_all_data[:-1]) list_data = np.array([ np.array([v for k, v in data.items()]) for data in temp_all_data if data.pop('num', None) ]) list_target = np.array( [np.array([data['num']]) for data in list_all_data[:-1]]) estimator.fit(list_data, list_target) export_graphviz(estimator.tree_, 'DecTree.dot', feaure_names) test_temp_data = copy.deepcopy(list_all_data[-5:-4]) test_data = np.array([ np.array([v for k, v in data.items()]) for data in test_temp_data if data.pop('num', None) ]) predict_data = estimator.predict(test_data) actual_outcome = [d['num'] for d in list_all_data[-5:-4]] print('\n\nTesting input set:\n') for k, v in list_all_data[-5:-4][0].items(): if (k != 'num'): print(f'{k}={v}') print('\n') print(f'Actual outcome: {actual_outcome[0]}')
yPredict = model.predict(dataOnlyTest) #print(accuracy_score(yTest,yPredict)) tree.export_graphviz(model, out_file='footballCARTtree.dot', feature_names=fbFeatures) call(["dot", "footballCARTtree.dot", "-Tpng", "-o", "footballCARTtree.png"]) # #ID3 # estimator = Id3Estimator() estimator.fit(fbX, fbY) export_graphviz(estimator.tree_, 'footballID3tree.dot', fbFeatures) call(["dot", "footballID3tree.dot", "-Tpng", "-o", "footballID3tree.png"]) # #C4.5 # """ Cannot find a reliable source that implemented C4.5. """ # #Task 4 #Question 2 # #
from id3 import Id3Estimator, export_graphviz import numpy as np import graphviz feature_names = ["color", "forma", "tamanio"] x = np.array([["rojo", "cuadrado", "grande"], ["azul", "cuadrado", "grande"], ["rojo", "redondo", "pequenio"], ["verde", "cuadrado", "pequenio"], ["rojo", "redondo", "grande"], ["verde", "cuadrado", "grande"]]) y = np.array(["+", "+", "-", "-", "+", "-"]) id3 = Id3Estimator() id3.fit(x, y) export_graphviz(id3.tree_, 'objetos.dot', feature_names) with open("objetos.dot") as f: dot_graph = f.read() g = graphviz.Source(dot_graph) g.render() g.view()
y_train = data.target[size:] # In[150]: id3 = Id3Estimator() id3.fit(x_train, y_train) y_pred = id3.predict(x_test) # In[151]: print('Tamaño de test: ', size) print("Precisión:", metrics.accuracy_score(y_test, y_pred)) print(precision_recall_fscore_support(y_test, y_pred)) # In[152]: export_graphviz(id3.tree_, 'arbol.dot', data.feature_names) # In[153]: with open("arbol.dot") as f: dot_graph = f.read() # In[154]: g = graphviz.Source(dot_graph) g.render() g.view() # In[ ]:
X = np.vstack([X, bX]) Y = np.concatenate([bY, Y]) clf = Id3Estimator(min_samples_split=4, prune=False, is_repeating=False, gain_ratio=False) clf.fit(X, Y, check_input=True) J = np.genfromtxt(TESTCASE_DATASET, delimiter=",", skip_header=1, dtype=str, usecols=(tuple(range(1, trainning_data_col_upper)))) K = clf.predict(J) predict_table = np.genfromtxt(TESTCASE_DATASET, delimiter=",", dtype=str) left, right = np.hsplit(predict_table, [leaf_col]) right = np.hsplit(right, [1])[1] # without the predicted column K = np.hstack([("Predicted" + attributes[leaf_col]), K]).reshape(-1, 1) I = np.hstack((left, K, right)) print("Writing result (.csv) in {}".format(RESULT_DATASET)) np.savetxt(RESULT_DATASET, I, delimiter=",", fmt="%s") output = path.realpath(path.join(path.realpath(__file__), '..', 'out.dot')) print("Writing dt in %s" % (output)) export_graphviz(clf.tree_, output, attributes[trainning_data_col_lower:trainning_data_col_upper])
from id3 import Id3Estimator, export_graphviz import numpy as np import graphviz feature_names = ["No. de ejemplares", "Nivel de ventas", "Precio"] x = np.array([["<=4", "Buenas", "<=150"], [">4", "Buenas", ">150"], [">4", "Buenas", "<=150"], ["<=4", "Buenas", ">150"], [">4", "Buenas", ">150"], [">4", "Bajas", ">150"], ["<=4", "Bajas", ">150"], ["<=4", "Bajas", ">150"], [">4", "Bajas", "<=150"], ["<=4", "Bajas", "<=150"], ["<=4", "Promedio", "<=150"], [">4", "Promedio", "<=150"], ["<=4", "Promedio", ">150"], [">4", "Promedio", ">150"], [">4", "Promedio", "<=150"]]) y = np.array([ "si", "si", "si", "si", "si", "si", "no", "si", "si", "no", "no", "no", "si", "si", "no" ]) id3 = Id3Estimator() id3.fit(x, y) export_graphviz(id3.tree_, 'librerias.dot', feature_names) with open("librerias.dot") as f: dot_graph = f.read() g = graphviz.Source(dot_graph) g.render() g.view()
Created on Wed Sep 05 01:56:33 2018 @author: Student """ import sklearn as sk import pandas as pd from id3 import Id3Estimator from id3 import export_graphviz data = pd.read_csv('car.data.txt', names=[ 'buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class' ]) data['class'], class_names = pd.factorize(data['class']) data['buying'], _ = pd.factorize(data['buying']) data['maint'], _ = pd.factorize(data['maint']) data['doors'], _ = pd.factorize(data['doors']) data['persons'], _ = pd.factorize(data['persons']) data['lug_boot'], _ = pd.factorize(data['lug_boot']) data['safety'], _ = pd.factorize(data['safety']) X = data.iloc[:, :-1] y = data.iloc[:, -1] X_train, X_test, y_train, y_test = sk.cross_validation.train_test_split( X, y, test_size=0.3, random_state=0) estimator = Id3Estimator() estimator.fit(X_train, y_train) export_graphviz(estimator.tree_, 'tree.dot', data['class']) import graphviz #dot -T p
feature_names = f.readlines() feature_names = [x.strip() for x in feature_names] X = np.array(genfromtxt(x_file, dtype=None, delimiter="~").tolist()) y = genfromtxt(y_file, dtype='i4') if len(feature_names) == 1: X = X.reshape(-1, 1) clf = Id3Estimator() clf.fit(X, y, check_input=True) end = datetime.now() delta = end - start try: export_graphviz(clf.tree_, dot_file, feature_names) except: print("Unexpected error:", sys.exc_info()[0]) result = convert_dot_to_predicate(dot_file, graph_dir) path, filename = os.path.split(graph_dir) result.insert(0, filename) result.append(delta.seconds) pprint(result) with open(path + "/result.csv", 'a') as f: writer = csv.writer(f) writer.writerow(result) # # dot -Tpng out.dot -o out.png
df['windy'] = df.windy.map({'weak': 0, 'strong': 1}) print('\n+++ CSV Data Change +++') print(df.head()) print('\n+++ Data shape +++') print(df.shape) data = df.values print('\n+++ Data values +++') print(data) data_train = data[:, :-1] print('\n+++ Data train +++') print(data_train) data_label = data[:, -1:].flatten() print('\n+++ Data label +++') print(data_label) clf = Id3Estimator() clf.fit(data_train, data_label, check_input=True) feature_names = ["outlook", "temperature", "humidity", "windy"] exported_text = export_text(clf.tree_, feature_names) print(exported_text) export_graphviz(clf.tree_, 'out.dot', feature_names)
dadosX = dados[[ 'pontuacao_final', 'sobrevivencia', 'bonus_ultima_sobrevivencia', 'dano_disparo', 'bonus_disparo_morte', 'colisao_dano', 'bonus_colisao_morte', '1lugar', '2lugar', '3lugar' ]].values dadosY = dados['classificacao'] treinoX, testeX, treinoY, testeY = train_test_split(dadosX, dadosY, test_size=0.3, shuffle=False) modeloArvodeID3 = Id3Estimator(max_depth=3) modeloArvodeID3.fit(treinoX, treinoY) export_graphviz(modeloArvodeID3.tree_, 'arvoreExecutada.dot', [ 'pontuacao_final', 'sobrevivencia', 'bonus_ultima_sobrevivencia', 'dano_disparo', 'bonus_disparo_morte', 'colisao_dano', 'bonus_colisao_morte', '1lugar', '2lugar', '3lugar' ]) classificacoes = modeloArvodeID3.predict(testeX) print('Resultados Árvore de Decisão ID3 (Iterative Dichotomiser 3):') print('Acurácia: %.4f' % accuracy_score(classificacoes, testeY)) print('Precisão: %.4f' % precision_score(classificacoes, testeY, average='macro')) print('Sensibilidade: %.4f' % recall_score(classificacoes, testeY, average='macro'))
# wrap in dataframe gender_df = pd.DataFrame(gender_data) # create transformed data frame # insert the names of the columns that must be encoded as strings transformed = MultiColumnLabelEncoder( columns=['link_color', 'sidebar_color', 'gender']).fit_transform(gender_df) # slice the transformed dataframe into an array data = np.array(transformed.ix[:, :5]) # identify the target target = np.array(transformed['gender']) estimator = Id3Estimator() estimator.fit(data, target) export_graphviz(estimator.tree_, 'gender_tree.dot', names) fold_one = 4708 fold_two = 9417 fold_three = 14126 fold_four = 18835 dataOne = data[4709:18836] dataTwo = np.append(data[0:4709], data[9417:18836], axis=0) dataThree = np.append(data[0:9417], data[14127:18836], axis=0) dataFour = data[0:14127] targetOne = target[4709:18836] targetTwo = np.append(target[0:4709], target[9417:18836], axis=0) targetThree = np.append(target[0:9417], target[14127:18836], axis=0) targetFour = target[0:14127]
bunch = fetch_kddcup99(subset="SA") data = bunch.data data = np.delete(data, np.s_[1:4], axis=1) target = bunch.target X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=.2, random_state=17) estimator = Id3Estimator() print("->Fitting ID3 classifier") estimator.fit(X_train, y_train) print("->Writing dot file") export_graphviz(estimator.tree_, 'tree.dot') print("->Calculating predictions") pred = estimator.predict(X_test) well_detected = 0 for index, val in enumerate(pred): if val == y_test[index]: well_detected += 1 percentage = well_detected / len(pred) * 100 print("predictions: ", well_detected, "/", len(pred), " = ", percentage, "%") print("error rate: ", (100 - percentage), "%") """ col_names = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised', 'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count', 'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate', 'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate', 'label']
['Overcast', 'Hot', 'Normal', 'False'], ['Rainy', 'Mild', 'High', 'True'], ]) Y = np.array([ 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No', ]) clf = Id3Estimator() clf.fit(X, Y) export_graphviz(clf.tree_, "hw3/ID3/tennis.dot", feature_names=tennis_feature_names) subprocess.run( ['dot', '-Tpng', 'hw3/ID3/tennis.dot', '-o', 'hw3/ID3/tennis.png']) subprocess.run(['rm', 'hw3/ID3/tennis.dot'])