def Pruning_the_Tree(): print '>>Pruning the Tree' reload(treepredict) tree=treepredict.buildtree(treepredict.my_data) print '------------------' treepredict.prune(tree,0.1) treepredict.printtree(tree) treepredict.prune(tree,1.0) treepredict.printtree(tree)
["d", "France", "yes", "23", "Basic"], ] train_flowers = data.read_filedata("..//data//train_data.txt", "ALL", ",", [0, 1, 2, 3]) test_flowers = data.read_filedata("..//data//test_data.txt", "ALL", ",", [0, 1, 2, 3]) tree = DecisionTree(train_flowers) treepredict.buildtree(tree) tree.printTree() right = 0 wrong = 0 for flower in test_flowers: result = treepredict.predic(tree, flower) if flower[-1] in result: if right == 49: pass right += 1 else: wrong += 1 print "正确预测:" + str(right) + "个" print "错误预测:" + str(wrong) + "个" print "-------------------------------------------------------------------" treepredict.prune(tree, 0.95) tree.printTree()
gender = doc2.getElementsByTagName('gender')[0].firstChild.data age = doc2.getElementsByTagName('age')[0].firstChild.data loc = doc2.getElementsByTagName('location')[0].firstChild.data[0:2] # 将州转换成地区 for r, s in stateregions.items(): if loc in s: region = r if region != None: result.append((gender, int(age), region, rating)) except: pass return result l1 = getrandomratings(500) print len(l1) pdata = getpeopledata(l1) print pdata[0] import treepredict hottree = treepredict.buildtree(pdata, scoref=treepredict.variance) treepredict.prune(hottree, 0.5) treepredict.drawtree(hottree, 'hottree.jpg') south = treepredict.mdclassify((None, None, 'south'), hottree) midat = treepredict.mdclassify((None, None, 'Mid Atlantic'), hottree) print south[10] / sum(south.values()) print midat[10] / sum(midat.values())
# #测试divideSet # print treepredict.divideSet(data.my_data, 2, "yes") # #测试经过训练后,基尼不纯度和熵的变化 # print treepredict.giniImpurity(data.my_data) # print treepredict.entropy(data.my_data) # set1,set2=treepredict.divideSet(data.my_data, 2, "yes") # print treepredict.giniImpurity(set1) # print treepredict.entropy(set1) #测试buildTree tree=treepredict.buildTree(data.my_data) draw=DrawTree.DrawTree(tree,'treeview.jpg') draw.drawTree() # #使用classify函数进行预测 # tree=treepredict.buildTree(data.my_data) # print treepredict.classify(['(direct)','USA','yes',5], tree) #尝试剪枝函数,并绘图 tree=treepredict.buildTree(data.my_data) treepredict.prune(tree, 1.0) draw=DrawTree.DrawTree(tree,'treeview2.jpg') draw.drawTree() # #使用mdclassify函数进行预测 tree=treepredict.buildTree(data.my_data) print treepredict.classify(['(direct)','USA','yes',5], tree) print treepredict.mdclassify(['google',None,'yes',None], tree) print treepredict.mdclassify(['google','France',None,None], tree)
#print doc.toxml() gender = doc.getElementsByTagName('gender')[0].firstChild.data age = doc.getElementsByTagName('age')[0].firstChild.data loc = doc.getElementsByTagName('location')[0].firstChild.data region = None for r, s in stateregions.iteritems(): if loc[0:2] in s: region = r if region: result.append((gender, int(age), region, rating)) except: pass return result if __name__ == '__main__': d = getrandomratings(50) # hu, all results are always of the same gender? pdata = getpeopledata(d) print pdata import drawtree import treepredict tree = treepredict.buildtree(pdata, treepredict.variance) treepredict.prune(tree, 0.5) drawtree.drawtree(tree, 'hottree.png') print 'Wrote hottree.png'
url = "http://services.hotornot.com/rest/?app_key=%s" % api_key url += "&method=MeetMe.getProfile&emid=%s&get_keywords=true" % emid # 得到所有关于此人的详细信息 try: rating = int(float(rating) + 0.5) doc2 = xml.dom.minidom.parseString( urllib.request.urlopen(url).read()) gender = doc2.getElementsByTagName('gender')[0].firstChild.data age = doc2.getElementsByTagName('age')[0].firstChild.data loc = doc2.getElementsByTagName('location')[0].firstChild.data[0:2] # 将州转换为地区 for r, s in stateregions.items(): if loc in s: region = r if region != None: result.append((gender, int(age), region, rating)) except: pass return result if __name__ == '__main__': #只有在执行当前模块时才会运行此函数 l1 = getrandomratings(500) print(len(l1)) pdata = getpeopledata(l1) print(pdata) tree = treepredict.buildtree(pdata, scoref=treepredict.variance) #创建决策树 treepredict.prune(tree, 0.5) #剪支 treepredict.drawtree(tree, 'hot.jpg')