def main(): X = [["home/away", "top25", "media"], ['home', 'out', '1-nbc'], ['home', 'in', '1-nbc'], ['away', 'out', '2-espn'], ['away', 'out', '3-fox'], ['home', 'out', '1-nbc'], ['away', 'out', '4-abc']] Y = ["win", "lose", "win", "win", "win", "win"] pyC45.train(X, Y, "Q1c45.xml")
def c45(X_train, y_train, X_test, y_test): pyC45.train(X_train, y_train, "DecisionTree.xml") # test the C45 decision tree answer = [] testing_obs = [] for index, row in y_test.iteritems(): # testing_obs.append(row[:-1].tolist()) answer.append(str(row)) prediction = pyC45.predict("DecisionTree.xml", X_test) return answer, prediction
def trainandpredict(self, trainX, trainY, testX, testY): startTime = datetime.now() pyC45.train(trainX, trainY, "DecisionTree.xml") start_time = datetime.now() # test the C45 decision tree answer = [] testing_obs = [] for index, row in testY.iteritems(): answer.append(str(row)) startTime = datetime.now() prediction = pyC45.predict("DecisionTree.xml", testX) predictionTime = datetime.now() dt = datetime.now() - start_time ms = (dt.days * 24 * 60 * 60 + dt.seconds) * 1000 + dt.microseconds / 1000.0 print ('Time taken by this algo in millisec:' + str(ms)) return answer, prediction
def main(): X = [["Opponent", "Home/Away", "AP Top 25", "Media"], ['Texas', 'Home', 'Out', '1-NBC'], ['Virginia', 'Away', 'Out', '4-ABC'], ['GeorgiaTech', 'Home', 'In', '1-NBC'], ['UMass', 'Home', 'Out', '1-NBC'], ['Clemson', 'Away', 'In', '4-ABC'], ['Navy', 'Home', 'Out', '1-NBC'], ['USC', 'Home', 'In', '1-NBC'], ['Temple', 'Away', 'Out', '4-ABC'], ['PITT', 'Away', 'Out', '4-ABC'], ['WakeForest', 'Home', 'Out', '1-NBC'], ['BostonCollege', 'Away', 'Out', '1-NBC'], ['Stanford', 'Away', 'In', '3-FOX'], ['Texas', 'Away', 'Out', '4-ABC'], ['Nevada', 'Home', 'Out', '1-NBC'], ['MichiganState', 'Home', 'Out', '1-NBC'], ['Duke', 'Home', 'Out', '1-NBC'], ['Syracuse', 'Home', 'Out', '2-ESPN'], ['NorthCarolinaState', 'Away', 'Out', '4-ABC'], ['Stanford', 'Home', 'In', '1-NBC'], ['MiamiFlorida', 'Home', 'Out', '1-NBC'], ['Navy', 'Home', 'Out', '5-CBS'], ['Army', 'Home', 'Out', '1-NBC'], ['VirginiaTech', 'Home', 'In', '1-NBC'], ['USC', 'Away', 'In', '4-ABC']] Y = [ "Win", "Win", "Win", "Win", "Lose", "Win", "Win", "Win", "Win", "Win", "Win", "Lose", "Lose", "Win", "Lose", "Lose", "Win", "Lose", "Lose", "Win", "Lose", "Win", "Lose", "Lose" ] pyC45.train(X, Y, "T5c45.xml") testing = [ ["Temple", "Home", "Out", "1-NBC"], # ["Georgia", "Home", "In", "1-NBC"], ["BostonCollege", "Away", "Out", "2-ESPN"], ["MichiganState", "Away", "Out", "3-FOX"], # ["MiamiOhio", "Home", "Out", "1-NBC"], # ["NorthCarolina", "Away", "Out", "4-ABC"], ["USC", "Home", "In", "1-NBC"], ["NorthCarolinaState", "Home", "Out", "1-NBC"], ["WakeForest", "Home", "Out", "1-NBC"], ["MiamiFlorida", "Away", "In", "4-ABC"], ["Navy", "Home", "Out", "1-NBC"], ["Stanford", "Away", "In", "4-ABC"] ]
def main(): X = [["outlook", "temperature", "humidity", "windy"], ['sunny', 'hot', 'high', 'false'], ['sunny', 'hot', 'high', 'true'], ['overcast', 'hot', 'high', 'false'], ['rainy', 'mild', 'high', 'false'], ['rainy', 'cool', 'normal', 'false'], ['rainy', 'cool', 'normal', 'true'], ['overcast', 'cool', 'normal', 'true'], ['sunny', 'mild', 'high', 'false'], ['sunny', 'cool', 'normal', 'false'], ['rainy', 'mild', 'normal', 'false'], ['sunny', 'mild', 'normal', 'true'], ['overcast', 'mild', 'high', 'true'], ['overcast', 'hot', 'normal', 'false'], ['rainy', 'mild', 'high', 'true']] Y = [ "No", "No", "Yes", "Yes", "Yes", "No", "Yes", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "No" ] pyC45.train(X, Y, "Q2c45.xml")
import pyC45, csv if __name__ == "__main__": #train a C45 decision tree and save the tree as an XML file reader = csv.reader(file('./data/training_set.csv')) training_obs = [] training_cat = [] for line in reader: training_obs.append(line[:-1]) training_cat.append(line[-1]) pyC45.train(training_obs, training_cat, "DecisionTree.xml") #test the C45 decision tree reader = csv.reader(file('./data/training_set.csv')) answer = [] testing_obs = [] for line in reader: testing_obs.append(line[:-1]) answer.append(line[-1]) answer.pop(0) prediction = pyC45.predict("DecisionTree.xml", testing_obs) err = 0 for i in range(len(answer)): if not answer[i] == prediction[i]: err = err + 1 print "error rate=", round(float(err) / len(prediction) * 100, 2), "%"
import pyC45 import csv if __name__ == "__main__": #train a C45 decision tree and save the tree as an XML open reader = csv.reader( open( 'C:/Users/Luiz Felipe/Documents/TCC/assistenteX/plugin/data/training_set.csv' )) training_obs = [] training_cat = [] for line in reader: training_obs.append(line[:-1]) training_cat.append(line[-1]) pyC45.train( training_obs, training_cat, "C:/Users/Luiz Felipe/Documents/TCC/assistenteX/plugin/dataDecisionTree.xml" ) #test the C45 decision tree reader = csv.reader( open( 'C:/Users/Luiz Felipe/Documents/TCC/assistenteX/plugin/data/training_set.csv' )) answer = [] testing_obs = [] for line in reader: testing_obs.append(line[:-1]) answer.append(line[-1]) answer.pop(0) prediction = pyC45.predict(
import pyC45,csv if __name__=="__main__": #train a C45 decision tree and save the tree as an XML file reader = csv.reader(file('./data/training_set.csv')) training_obs=[] training_cat=[] for line in reader: training_obs.append(line[:-1]) training_cat.append(line[-1]) pyC45.train(training_obs,training_cat,"DecisionTree.xml") #test the C45 decision tree reader = csv.reader(file('./data/training_set.csv')) answer=[] testing_obs=[] for line in reader: testing_obs.append(line[:-1]) answer.append(line[-1]) answer.pop(0) prediction=pyC45.predict("DecisionTree.xml",testing_obs) err=0 for i in range(len(answer)): if not answer[i]==prediction[i]: err=err+1 print "error rate=",round(float(err)/len(prediction)*100,2),"%"