예제 #1
0
def CreaMatrice():
    filename = askopenfilename(title="Ouvrir votre document",
                               filetypes=[('txt files', '.txt'),
                                          ('all files', '.*')])
    fichier = open(filename, "r")
    content = fichier.read()

    fichier = open(filename, "r")
    first_ligne = fichier.readline()
    L = first_ligne.split()
    nbAttributs = len(L)
    fichierX = []
    ligne = fichier.readline()
    compte = 0
    while (ligne):
        fichierX.append(ligne.split())
        compte = compte + 1
        ligne = fichier.readline()
    attributCible = []
    for i in range(len(fichierX)):
        attributCible.append(fichierX[i][-1])
        fichierX[i].pop()

    feature_names = L
    X = np.array(fichierX)
    y = np.array(attributCible)

    clf = Id3Estimator()
    clf.fit(X, y, check_input=False)

    print(export_text(clf.tree_, feature_names))
    save = open("matrice.txt", "w")
    save.write(export_text(clf.tree_, feature_names))
    save.close()
    fichier.close()
 def draw_graph(self, x, y):
     # Decision Tree Graph
     clf = Id3Estimator()
     clf.fit(x, y, check_input=True)
     #clf.predict_proba(x)
     print(export_text(clf.tree_, self.feature_names))
     
     # export tree.dot as pdf file to write Decision Tree as a graph
     dot_data = StringIO()
     #tree.export_graphviz(clf, out_file = dot_data)
     export_graphviz(clf.tree_, 'SVC_Tree.dot', self.feature_names)
     graph = pydot.graph_from_dot_file('SVC_Tree.dot')
     graph[0].write_pdf("SVC_Tree.pdf")
     
     clf = DecisionTreeClassifier()
     clf = clf.fit(x,y)
     clf.predict(x, check_input=True)
     clf.predict_proba(x)
     
     # version v1 pdf output
     dot_data = tree.export_graphviz(clf, out_file='SVC_Tree_v1.dot')
     graph = pydot.graph_from_dot_file('SVC_Tree_v1.dot')
     graph[0].write_pdf("SVC_Tree_v1.pdf")
     
     # version v2 pdf output
     dot_data = tree.export_graphviz(clf, out_file="SVC_Tree_v2", feature_names=self.feature_names, class_names=self.target, filled=True, rounded=True, special_characters=True)
     #dot_data = tree.export_graphviz(clf, out_file="Decision-Tree-Regression-v2", feature_names=feature_names, class_names=target.name, filled=True, rounded=True, special_characters=True)
     graph = graphviz.Source(dot_data)
     # print graph this is done correct as lang as out_file=None
     graph
     
     # save graph version 2 as pdf data file
     graph = pydot.graph_from_dot_file('SVC_Tree_v2')
     graph[0].write_pdf("SVC_Tree_v2.pdf")
     return True
예제 #3
0
def Tree():
    names = ["tarcza", "czy lata", "wiek", "zbroja", "hp", "level", "potwor"]

    count = len(open('przypadki.txt', 'rU').readlines())
    x = []

    for i in range(1, count):
        line = linecache.getline('przypadki.txt', i).split(" ")
        line[6] = str(line[6][0])
        x.append(line)
    X = np.asarray(x)
    print(X)

    y = np.array([int(i) for i in linecache.getline('wyniki.txt', 1)[:-2]])
    yd = [int(i) for i in linecache.getline('wyniki.txt', 1)[:-2]]
    d = []
    d.append(names)
    d[0].append("wynik")
    for i in range(0, len(yd)):
        d.append(x[i] + [yd[i]])
    print(d)
    clf = Id3Estimator()
    clf.fit(X, y, check_input=True)
    #d = np.array([['0', '0', '39', '1', '9', '0','1', 't']])
    #print(d)
    #c = clf.predict(d)
    #print(c)

    export_graphviz(clf.tree_, "out.dot", names)
    print(export_text(clf.tree_, names))
    return clf
예제 #4
0
def main():
    feature_names = ["Opponent", "Home/Away", "AP Top 25", "Media"]

    X = np.array([['Texas', 'Home', 'Out', '1-NBC'],
                  ['Virginia', 'Away', 'Out', '4-ABC'],
                  ['GeorgiaTech', 'Home', 'In', '1-NBC'],
                  ['UMass', 'Home', 'Out', '1-NBC'],
                  ['Clemson', 'Away', 'In', '4-ABC'],
                  ['Navy', 'Home', 'Out', '1-NBC'],
                  ['USC', 'Home', 'In', '1-NBC'],
                  ['Temple', 'Away', 'Out', '4-ABC'],
                  ['PITT', 'Away', 'Out', '4-ABC'],
                  ['WakeForest', 'Home', 'Out', '1-NBC'],
                  ['BostonCollege', 'Away', 'Out', '1-NBC'],
                  ['Stanford', 'Away', 'In', '3-FOX'],
                  ['Texas', 'Away', 'Out', '4-ABC'],
                  ['Nevada', 'Home', 'Out', '1-NBC'],
                  ['MichiganState', 'Home', 'Out', '1-NBC'],
                  ['Duke', 'Home', 'Out', '1-NBC'],
                  ['Syracuse', 'Home', 'Out', '2-ESPN'],
                  ['NorthCarolinaState', 'Away', 'Out', '4-ABC'],
                  ['Stanford', 'Home', 'In', '1-NBC'],
                  ['MiamiFlorida', 'Home', 'Out', '1-NBC'],
                  ['Navy', 'Home', 'Out', '5-CBS'],
                  ['Army', 'Home', 'Out', '1-NBC'],
                  ['VirginiaTech', 'Home', 'In', '1-NBC'],
                  ['USC', 'Away', 'In', '4-ABC']])

    y = np.array([
        "Win", "Win", "Win", "Win", "Lose", "Win", "Win", "Win", "Win", "Win",
        "Win", "Lose", "Lose", "Win", "Lose", "Lose", "Win", "Lose", "Lose",
        "Win", "Lose", "Win", "Lose", "Lose"
    ])

    clf = Id3Estimator()
    clf.fit(X, y, check_input=True)
    print("Training:")
    print(export_text(clf.tree_, feature_names))
    testing = [
        ["Temple", "Home", "Out", "1-NBC"],
        # ["Georgia", "Home", "In", "1-NBC"],
        ["BostonCollege", "Away", "Out", "2-ESPN"],
        ["MichiganState", "Away", "Out", "3-FOX"],
        # ["MiamiOhio", "Home", "Out", "1-NBC"],
        # ["NorthCarolina", "Away", "Out", "4-ABC"],
        ["USC", "Home", "In", "1-NBC"],
        ["NorthCarolinaState", "Home", "Out", "1-NBC"],
        ["WakeForest", "Home", "Out", "1-NBC"],
        ["MiamiFlorida", "Away", "In", "4-ABC"],
        ["Navy", "Home", "Out", "1-NBC"],
        ["Stanford", "Away", "In", "4-ABC"]
    ]
    print("\n\nTesting:")
    print(clf.predict(testing))
예제 #5
0
def main():
    feature_names = ["home/away", "top25", "media"]

    X = np.array([['home', 'out', '1-nbc'], ['home', 'in', '1-nbc'],
                  ['away', 'out', '2-espn'], ['away', 'out', '3-fox'],
                  ['home', 'out', '1-nbc'], ['away', 'out', '4-abc']])

    y = np.array(["win", "lose", "win", "win", "win", "win"])

    clf = Id3Estimator()
    clf.fit(X, y, check_input=True)

    print(export_text(clf.tree_, feature_names))
    testing = [["home", "in", "1-nbc"], ["home", "out", "1-nbc"],
               ["home", "out", "1-nbc"], ["home", "in", "4-abc"],
               ["home", "out", "1-nbc"], ["home", "in", "4-abc"]]
    print("\n\nTesting:")
    print(clf.predict(testing))
예제 #6
0
def main():
    feature_names = ["outlook",
                 "temperature",
                 "humidity",
                 "windy"]

    X = np.array([['sunny', 'hot', 'high', 'false'],
            ['sunny', 'hot', 'high', 'true'],
            ['overcast', 'hot', 'high', 'false'],
            ['rainy', 'mild', 'high', 'false'],
            ['rainy', 'cool', 'normal', 'false'],
            ['rainy', 'cool', 'normal', 'true'],
            ['overcast', 'cool', 'normal', 'true'],
            ['sunny', 'mild', 'high', 'false'],
            ['sunny', 'cool', 'normal', 'false'],
            ['rainy', 'mild', 'normal', 'false'],
            ['sunny', 'mild', 'normal', 'true'],
            ['overcast', 'mild', 'high', 'true'],
            ['overcast', 'hot', 'normal', 'false'],
            ['rainy', 'mild', 'high', 'true']])

    y = np.array(["No",
                  "No",
                  "Yes",
                  "Yes",
                  "Yes",
                  "No",
                  "Yes",
                  "No",
                  "Yes",
                  "Yes",
                  "Yes",
                  "Yes",
                  "Yes",
                  "No"])

    clf = Id3Estimator()
    clf.fit(X, y, check_input=True)
    print("Training:")
    print(export_text(clf.tree_, feature_names))
    print("Testing: rainy, hot, high, false")
    print(clf.predict([["rainy", "hot", "high", "false"]])) #Throws DeprecationWarning, ignore it
예제 #7
0
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Author: Andrew Floyd
Date: 10/18/2018
Course: CS3001: Intro to Data Science
File Description: File for basketball ID3 tree
"""

from id3 import Id3Estimator, export_text
import numpy as np

feature_names = ["home/away", "top25", "media"]

X = np.array([["home", "out", "1-NBC"], ["home", "in", "1-NBC"],
              ["away", "out", "2-ESPN"], ["away", "out", "3-FOX"],
              ["home", "out", "1-NBC"], ["away", "out", "4-ABC"]])

y = np.array(["(win)", "(lose)", "(win)", "(win)", "(win)", "(win)"])

clf = Id3Estimator()
clf.fit(X, y, check_input=True)

print(export_text(clf.tree_, feature_names))
예제 #8
0
df['windy'] = df.windy.map({'weak': 0, 'strong': 1})

print('\n+++ CSV Data Change +++')
print(df.head())

print('\n+++ Data shape +++')
print(df.shape)

data = df.values
print('\n+++ Data values +++')
print(data)

data_train = data[:, :-1]
print('\n+++ Data train +++')
print(data_train)

data_label = data[:, -1:].flatten()
print('\n+++ Data label +++')
print(data_label)

clf = Id3Estimator()
clf.fit(data_train, data_label, check_input=True)

feature_names = ["outlook", "temperature", "humidity", "windy"]

exported_text = export_text(clf.tree_, feature_names)

print(exported_text)

export_graphviz(clf.tree_, 'out.dot', feature_names)
예제 #9
0
from id3 import Id3Estimator, export_text
import numpy as np
import preprocess, csv
X = []
Y = []
feature_names = []
with open('aggregated_data.csv', mode='r') as infile:
    print('opened el file correcto')
    csvfile = csv.reader(infile, delimiter=',')
    rows = 0
    for row in csvfile:
        if rows == 0:
            feature_names = [
                'CodedMonth', 'District', 'XCoord', 'YCoord', 'Day', 'Time'
            ]
        else:
            X.append([row[1], row[3], row[4], row[5], row[6], row[7]])
            Y.append(row[2])
        rows += 1
X = np.array(X)
Y = np.array(Y)
clf = Id3Estimator()
clf.fit(X, Y, check_input=True)
text = export_text(clf.tree_, feature_names)
with open('tree.txt', mode='w+') as out:
    out.write(text)
    out.close()
testX = []
testY = []
with open('testing.csv', mode='r') as test: