Ejemplo n.º 1
0
    def c45(X_train, y_train, X_test, y_test):
        pyC45.train(X_train, y_train, "DecisionTree.xml")

        # test the C45 decision tree
        answer = []
        testing_obs = []
        for index, row in y_test.iteritems():
            # testing_obs.append(row[:-1].tolist())
            answer.append(str(row))
        prediction = pyC45.predict("DecisionTree.xml", X_test)
        return answer, prediction
Ejemplo n.º 2
0
    def trainandpredict(self, trainX, trainY, testX, testY):
        startTime = datetime.now()
        pyC45.train(trainX, trainY, "DecisionTree.xml")
        start_time = datetime.now()

        # test the C45 decision tree
        answer = []
        testing_obs = []
        for index, row in testY.iteritems():
            answer.append(str(row))
        startTime = datetime.now()
        prediction = pyC45.predict("DecisionTree.xml", testX)
        predictionTime = datetime.now()
        dt = datetime.now() - start_time
        ms = (dt.days * 24 * 60 * 60 + dt.seconds) * 1000 + dt.microseconds / 1000.0
        print ('Time taken by this algo in millisec:' + str(ms))

        return answer, prediction
Ejemplo n.º 3
0
import pyC45, csv
if __name__ == "__main__":
    #train a C45 decision tree and save the tree as an XML file
    reader = csv.reader(file('./data/training_set.csv'))
    training_obs = []
    training_cat = []
    for line in reader:
        training_obs.append(line[:-1])
        training_cat.append(line[-1])
    pyC45.train(training_obs, training_cat, "DecisionTree.xml")

    #test the C45 decision tree
    reader = csv.reader(file('./data/training_set.csv'))
    answer = []
    testing_obs = []
    for line in reader:
        testing_obs.append(line[:-1])
        answer.append(line[-1])
    answer.pop(0)

    prediction = pyC45.predict("DecisionTree.xml", testing_obs)
    err = 0
    for i in range(len(answer)):
        if not answer[i] == prediction[i]:
            err = err + 1
    print "error rate=", round(float(err) / len(prediction) * 100, 2), "%"
Ejemplo n.º 4
0
    training_obs = []
    training_cat = []
    for line in reader:
        training_obs.append(line[:-1])
        training_cat.append(line[-1])
    pyC45.train(
        training_obs, training_cat,
        "C:/Users/Luiz Felipe/Documents/TCC/assistenteX/plugin/dataDecisionTree.xml"
    )

    #test the C45 decision tree
    reader = csv.reader(
        open(
            'C:/Users/Luiz Felipe/Documents/TCC/assistenteX/plugin/data/training_set.csv'
        ))
    answer = []
    testing_obs = []
    for line in reader:
        testing_obs.append(line[:-1])
        answer.append(line[-1])
    answer.pop(0)

    prediction = pyC45.predict(
        "C:/Users/Luiz Felipe/Documents/TCC/assistenteX/plugin/dataDecisionTree.xml",
        testing_obs)
    err = 0
    for i in range(len(answer)):
        if not answer[i] == prediction[i]:
            err = err + 1
    print("error rate=", round(float(err) / len(prediction) * 100, 2), "%")
Ejemplo n.º 5
0
import pyC45,csv
if __name__=="__main__":
    #train a C45 decision tree and save the tree as an XML file
    reader = csv.reader(file('./data/training_set.csv'))
    training_obs=[]
    training_cat=[]
    for line in reader:
        training_obs.append(line[:-1])
        training_cat.append(line[-1])
    pyC45.train(training_obs,training_cat,"DecisionTree.xml")
    
    #test the C45 decision tree 
    reader = csv.reader(file('./data/training_set.csv'))
    answer=[]
    testing_obs=[]
    for line in reader:
        testing_obs.append(line[:-1])
        answer.append(line[-1])
    answer.pop(0)
    
    prediction=pyC45.predict("DecisionTree.xml",testing_obs)
    err=0
    for i in range(len(answer)):
        if not answer[i]==prediction[i]:
            err=err+1
    print "error rate=",round(float(err)/len(prediction)*100,2),"%"