def run_app(fTrainIn, fTestIn): """ Runs the algorithm on the data """ linesInTest = [line.strip() for line in fTestIn.readlines()] attributes = linesInTest[0].split(" ") #反转后,利用list.pop()去除最后一行,再反转回到原次序 linesInTest.reverse() linesInTest.pop() # pop()弹出并返回最后一行 linesInTest.reverse() attrList, attrDict = prepare_attributes(attributes) targetAttribute = attrList[-1] # prepare testdata testData = [] for line in linesInTest: testData.append(dict(list(zip(attrList, [datum.strip() for datum in line.split("\t")])))) linesInTrain = [lineTrain.strip() for lineTrain in fTrainIn.readlines()] attributesTrain = linesInTrain[0].replace("\t", " ").split(" ") #once we have the attributes remove it from lines linesInTrain.reverse() linesInTrain.pop() # pops from end of list, hence the two reverses linesInTrain.reverse() attrListTrain, attrDictTrain = prepare_attributes(attributesTrain) targetAttrTrain = attrListTrain[-1] # prepare data trainData = [] for lineTrain in linesInTrain: trainData.append(dict(list(zip(attrListTrain, [datum.strip() for datum in lineTrain.split("\t")])))) trainingTree = dtree.create_decision_tree(trainData, attrListTrain, targetAttrTrain, dtree.gain) trainingClassify = dtree.classify(trainingTree, trainData) testTree = dtree.create_decision_tree(testData, attrList, targetAttribute, dtree.gain) testClassify = dtree.classify(testTree, testData) # also returning the example Classify in both the files givenTestClassify = [] for row in testData: givenTestClassify.append(row[targetAttribute]) givenTrainClassify = [] for row in trainData: givenTrainClassify.append(row[targetAttrTrain]) return (trainingTree, trainingClassify, testClassify, givenTrainClassify, givenTestClassify)
def test_classify(self): fpath = self.get_file('dtree/lenses.txt') dataset = [] with open(fpath) as fp: for l in fp.readlines(): dataset.append(l.strip().split('\t')) labels = ['age', 'prescript', 'astigmatic', 'tearRate'] tree = dtree.create_tree(dataset, labels) logging.info(tree) test_vec = ['pre', 'myope', 'no', 'normal'] result = dtree.classify(tree, labels, test_vec) logging.info('result: ' + result) self.assertEquals(result, 'soft')
def classify(self, trashlist, game): '''tworzenie slownika danych testowych''' #obliczanie odleglosci dla kazdego smiecia, rozmywanie danych informacji #tworzenie slownika danych table = [] for i in range(len(trashlist)): collection = {} max_size = max(game.tilemap.mapheight, game.tilemap.mapwidth) collection['Distance'] = fuzzifyDistance(max_size, game.distance(trashlist[i])) collection['Points']=str(trashlist[i][2]) collection['Equipment'] = fuzzifyEquipment(game.player.capacity, game.player.sumEquipment()) table.append(collection) '''klasyfikacja danych testowych''' classification = dtree.classify(self.tree,table) table1=[] for item in classification: table1.append(item) return table1
def filter_unclassifiable(listInst): dt = dtree.build_tree(listInst) return [inst for inst in listInst if dtree.classify(dt, inst) == inst.fLabel]
def test_classify_unknown(self): cValue = 3 dt = build_random_tree(4, cValue) inst = dtree.Instance(randlist(cValue + 1, cValue + 5, 4)) fLabel = dtree.classify(dt, inst) self.assertEqual(fLabel, dt.fDefaultLabel)
def test_classify(self): dt = build_random_tree(4, 3) for _ in xrange(5): inst, listPath = build_random_instance_from_dt(dt) fLabel = dtree.classify(dt, inst) self.assertEqual(inst.fLabel, fLabel)
target_attr = pickle.load(fd_dt) tree = pickle.load(fd_dt) fd_dt.close() try: fd_pd = open(args.filename_predict, "r") except IOError: sys.stderr.write("Error: file '%s' not found.\n" % args.filename_predict) sys.exit(0) # 2nd argument is a null drop list pdata, attributes, ignore_attr_orig, ignore_targ_attr = \ prepare_data(fd_pd, [], targ_attr_idx, args.verbose_flag, learn=False) fd_pd.close() if args.verbose_flag == True: sys.stderr.write("%d records to predict have been read and prepared.\n" % len(pdata)) results = classify(tree, pdata) if args.results_flag == True: print("Prediction results in simple list form:\n%s\n" % results) # Write a file with predictions by reading given unclassified data # file, append prediction to each record and write out new file. if args.write_flag == True: predicted_file(args.filename_predict, results, target_attr)
#print 'Target-Attr : ',target_attr print 'Number of Entries in Training Set after Cleanup', len(data) print 'Number of Entries in Test Set after Cleanup', len(test) d = input('No. of Training Instances to be taken: ') s1 = input('Start from Index: ') tree = dtree.create_decision_tree(c.values, data[s1:s1+d], attributes, target_attr, id3.gain, None) print '-----------------Decision Tree Created------------------' h = preprocess.Helper() h.print_tree(tree, "") t = input('No. of Test Instances to be taken: ') s2 = input('Start from Index: ') classification = dtree.classify(tree, test[s2:s2+t]) #print classification #print test[s2:s2+t] correct = 0 i = 0 for item in classification: #print item if test[i][target_attr] == (item+'.'): correct +=1 i +=1 total = i print '----------------Classification Results------------------' print "Correctly-Classified: ", correct print "Total-Classifications: ", total print "Accuracy: ", (float(correct)/total)*100
def filter_unclassifiable(listInst): dt = dtree.build_tree(listInst) return [inst for inst in listInst if dtree.classify(dt,inst) == inst.fLabel]
def test_classify_unknown(self): cValue = 3 dt = build_random_tree(4,cValue) inst = dtree.Instance(randlist(cValue+1, cValue+5, 4)) fLabel = dtree.classify(dt,inst) self.assertEqual(fLabel, dt.fDefaultLabel)
def test_classify(self): dt = build_random_tree(4,3) for _ in xrange(5): inst,listPath = build_random_instance_from_dt(dt) fLabel = dtree.classify(dt,inst) self.assertEqual(inst.fLabel, fLabel)