Esempio n. 1
0
File: run.py Progetto: whatot/id3-py
def run_app(fTrainIn, fTestIn):
    """
    Runs the algorithm on the data
    """

    linesInTest = [line.strip() for line in fTestIn.readlines()]
    attributes = linesInTest[0].split(" ")

    #反转后,利用list.pop()去除最后一行,再反转回到原次序
    linesInTest.reverse()
    linesInTest.pop()    # pop()弹出并返回最后一行
    linesInTest.reverse()

    attrList, attrDict = prepare_attributes(attributes)
    targetAttribute = attrList[-1]

    # prepare testdata
    testData = []
    for line in linesInTest:
        testData.append(dict(list(zip(attrList,
                                      [datum.strip()
                                       for datum in line.split("\t")]))))

    linesInTrain = [lineTrain.strip() for lineTrain in fTrainIn.readlines()]
    attributesTrain = linesInTrain[0].replace("\t", " ").split(" ")

    #once we have the attributes remove it from lines
    linesInTrain.reverse()
    linesInTrain.pop()   # pops from end of list, hence the two reverses
    linesInTrain.reverse()

    attrListTrain, attrDictTrain = prepare_attributes(attributesTrain)
    targetAttrTrain = attrListTrain[-1]

    # prepare data
    trainData = []
    for lineTrain in linesInTrain:
        trainData.append(dict(list(zip(attrListTrain,
                                       [datum.strip()
                                        for datum in lineTrain.split("\t")]))))

    trainingTree = dtree.create_decision_tree(trainData, attrListTrain,
                                              targetAttrTrain, dtree.gain)
    trainingClassify = dtree.classify(trainingTree, trainData)

    testTree = dtree.create_decision_tree(testData, attrList, targetAttribute,
                                          dtree.gain)
    testClassify = dtree.classify(testTree, testData)

    # also returning the example Classify in both the files
    givenTestClassify = []
    for row in testData:
        givenTestClassify.append(row[targetAttribute])

    givenTrainClassify = []
    for row in trainData:
        givenTrainClassify.append(row[targetAttrTrain])

    return (trainingTree, trainingClassify, testClassify, givenTrainClassify,
            givenTestClassify)
Esempio n. 2
0
 def test_classify(self):
     fpath = self.get_file('dtree/lenses.txt')
     dataset = []
     with open(fpath) as fp:
         for l in fp.readlines():
             dataset.append(l.strip().split('\t'))
     labels = ['age', 'prescript', 'astigmatic', 'tearRate']
     tree = dtree.create_tree(dataset, labels)
     logging.info(tree)
     test_vec = ['pre', 'myope', 'no', 'normal']
     result = dtree.classify(tree, labels, test_vec)
     logging.info('result: ' + result)
     self.assertEquals(result, 'soft')
    def classify(self, trashlist, game):
        '''tworzenie slownika danych testowych'''
        #obliczanie odleglosci dla kazdego smiecia, rozmywanie danych informacji
        #tworzenie slownika danych
        table = []
        for i in range(len(trashlist)):
            collection = {}
            max_size = max(game.tilemap.mapheight, game.tilemap.mapwidth)
            collection['Distance'] = fuzzifyDistance(max_size, game.distance(trashlist[i]))
            collection['Points']=str(trashlist[i][2])
            collection['Equipment'] = fuzzifyEquipment(game.player.capacity, game.player.sumEquipment())
            table.append(collection)

        '''klasyfikacja danych testowych'''
        classification = dtree.classify(self.tree,table)
        table1=[]
        for item in classification:
            table1.append(item)

        return table1
 def filter_unclassifiable(listInst):
     dt = dtree.build_tree(listInst)
     return [inst for inst in listInst
             if dtree.classify(dt, inst) == inst.fLabel]
 def test_classify_unknown(self):
     cValue = 3
     dt = build_random_tree(4, cValue)
     inst = dtree.Instance(randlist(cValue + 1, cValue + 5, 4))
     fLabel = dtree.classify(dt, inst)
     self.assertEqual(fLabel, dt.fDefaultLabel)
 def test_classify(self):
     dt = build_random_tree(4, 3)
     for _ in xrange(5):
         inst, listPath = build_random_instance_from_dt(dt)
         fLabel = dtree.classify(dt, inst)
         self.assertEqual(inst.fLabel, fLabel)
Esempio n. 7
0
    target_attr = pickle.load(fd_dt)
    tree = pickle.load(fd_dt)
    fd_dt.close()

    try:
        fd_pd = open(args.filename_predict, "r")
    except IOError:
        sys.stderr.write("Error: file '%s' not found.\n" 
                         % args.filename_predict)
        sys.exit(0)

    # 2nd argument is a null drop list
    pdata, attributes, ignore_attr_orig, ignore_targ_attr = \
      prepare_data(fd_pd, [], targ_attr_idx, args.verbose_flag, learn=False)
    fd_pd.close()

    if args.verbose_flag == True:
        sys.stderr.write("%d records to predict have been read and prepared.\n"
                         % len(pdata))

    results = classify(tree, pdata)

    if args.results_flag == True:
        print("Prediction results in simple list form:\n%s\n"
                         % results)

    # Write a file with predictions by reading given unclassified data
    # file, append prediction to each record and write out new file.
    if args.write_flag == True:
        predicted_file(args.filename_predict, results, target_attr)
Esempio n. 8
0
#print 'Target-Attr : ',target_attr
print 'Number of Entries in Training Set after Cleanup', len(data)
print 'Number of Entries in Test Set after Cleanup', len(test)

d = input('No. of Training Instances to be taken: ')
s1 = input('Start from Index: ')

tree = dtree.create_decision_tree(c.values, data[s1:s1+d], attributes, target_attr, id3.gain, None)

print '-----------------Decision Tree Created------------------'
h = preprocess.Helper()

h.print_tree(tree, "")
t = input('No. of Test Instances to be taken: ')
s2 = input('Start from Index: ')
classification = dtree.classify(tree, test[s2:s2+t])
#print classification
#print test[s2:s2+t]
correct = 0
i = 0
for item in classification:
    #print item
    if test[i][target_attr] == (item+'.'):
        correct +=1
    i +=1
total = i

print '----------------Classification Results------------------'
print "Correctly-Classified: ", correct
print "Total-Classifications: ", total
print "Accuracy: ", (float(correct)/total)*100
Esempio n. 9
0
 def filter_unclassifiable(listInst):
     dt = dtree.build_tree(listInst)
     return [inst for inst in listInst
             if dtree.classify(dt,inst) == inst.fLabel]
Esempio n. 10
0
 def test_classify_unknown(self):
     cValue = 3
     dt = build_random_tree(4,cValue)
     inst = dtree.Instance(randlist(cValue+1, cValue+5, 4))
     fLabel = dtree.classify(dt,inst)
     self.assertEqual(fLabel, dt.fDefaultLabel)        
Esempio n. 11
0
 def test_classify(self):
     dt = build_random_tree(4,3)
     for _ in xrange(5):
         inst,listPath = build_random_instance_from_dt(dt)
         fLabel = dtree.classify(dt,inst)
         self.assertEqual(inst.fLabel, fLabel)