예제 #1
0
파일: run.py 프로젝트: wz125/courses
def Pruning_the_Tree():
  print '>>Pruning the Tree'
  reload(treepredict)
  tree=treepredict.buildtree(treepredict.my_data)
  print '------------------'
  treepredict.prune(tree,0.1)
  treepredict.printtree(tree)
  treepredict.prune(tree,1.0)
  treepredict.printtree(tree)
예제 #2
0
    ["d", "France", "yes", "23", "Basic"],
]


train_flowers = data.read_filedata("..//data//train_data.txt", "ALL", ",", [0, 1, 2, 3])
test_flowers = data.read_filedata("..//data//test_data.txt", "ALL", ",", [0, 1, 2, 3])

tree = DecisionTree(train_flowers)
treepredict.buildtree(tree)
tree.printTree()

right = 0
wrong = 0
for flower in test_flowers:
    result = treepredict.predic(tree, flower)
    if flower[-1] in result:
        if right == 49:
            pass
        right += 1
    else:
        wrong += 1

print "正确预测:" + str(right) + "个"
print "错误预测:" + str(wrong) + "个"

print "-------------------------------------------------------------------"

treepredict.prune(tree, 0.95)

tree.printTree()
예제 #3
0
            gender = doc2.getElementsByTagName('gender')[0].firstChild.data
            age = doc2.getElementsByTagName('age')[0].firstChild.data
            loc = doc2.getElementsByTagName('location')[0].firstChild.data[0:2]

            # 将州转换成地区
            for r, s in stateregions.items():
                if loc in s: region = r

            if region != None:
                result.append((gender, int(age), region, rating))
        except:
            pass
    return result


l1 = getrandomratings(500)
print len(l1)
pdata = getpeopledata(l1)
print pdata[0]

import treepredict

hottree = treepredict.buildtree(pdata, scoref=treepredict.variance)
treepredict.prune(hottree, 0.5)
treepredict.drawtree(hottree, 'hottree.jpg')

south = treepredict.mdclassify((None, None, 'south'), hottree)
midat = treepredict.mdclassify((None, None, 'Mid Atlantic'), hottree)
print south[10] / sum(south.values())
print midat[10] / sum(midat.values())
예제 #4
0
# #测试divideSet
# print treepredict.divideSet(data.my_data, 2, "yes")

# #测试经过训练后,基尼不纯度和熵的变化
# print treepredict.giniImpurity(data.my_data)
# print treepredict.entropy(data.my_data)
# set1,set2=treepredict.divideSet(data.my_data, 2, "yes")
# print treepredict.giniImpurity(set1)
# print treepredict.entropy(set1)

#测试buildTree
tree=treepredict.buildTree(data.my_data)
draw=DrawTree.DrawTree(tree,'treeview.jpg')
draw.drawTree()

# #使用classify函数进行预测
# tree=treepredict.buildTree(data.my_data)
# print treepredict.classify(['(direct)','USA','yes',5], tree)

#尝试剪枝函数,并绘图
tree=treepredict.buildTree(data.my_data)
treepredict.prune(tree, 1.0)
draw=DrawTree.DrawTree(tree,'treeview2.jpg')
draw.drawTree()

# #使用mdclassify函数进行预测
tree=treepredict.buildTree(data.my_data)
print treepredict.classify(['(direct)','USA','yes',5], tree)
print treepredict.mdclassify(['google',None,'yes',None], tree)
print treepredict.mdclassify(['google','France',None,None], tree)
예제 #5
0
            #print doc.toxml()
            gender = doc.getElementsByTagName('gender')[0].firstChild.data
            age = doc.getElementsByTagName('age')[0].firstChild.data
            loc = doc.getElementsByTagName('location')[0].firstChild.data

            region = None
            for r, s in stateregions.iteritems():
                if loc[0:2] in s: region = r

            if region:
                result.append((gender, int(age), region, rating))
        except:
            pass
    return result


if __name__ == '__main__':
    d = getrandomratings(50)

    # hu, all results are always of the same gender?
    pdata = getpeopledata(d)
    print pdata

    import drawtree
    import treepredict

    tree = treepredict.buildtree(pdata, treepredict.variance)
    treepredict.prune(tree, 0.5)
    drawtree.drawtree(tree, 'hottree.png')
    print 'Wrote hottree.png'
예제 #6
0
        url = "http://services.hotornot.com/rest/?app_key=%s" % api_key
        url += "&method=MeetMe.getProfile&emid=%s&get_keywords=true" % emid

        # 得到所有关于此人的详细信息
        try:
            rating = int(float(rating) + 0.5)
            doc2 = xml.dom.minidom.parseString(
                urllib.request.urlopen(url).read())
            gender = doc2.getElementsByTagName('gender')[0].firstChild.data
            age = doc2.getElementsByTagName('age')[0].firstChild.data
            loc = doc2.getElementsByTagName('location')[0].firstChild.data[0:2]

            # 将州转换为地区
            for r, s in stateregions.items():
                if loc in s: region = r

            if region != None:
                result.append((gender, int(age), region, rating))
        except:
            pass
    return result


if __name__ == '__main__':  #只有在执行当前模块时才会运行此函数
    l1 = getrandomratings(500)
    print(len(l1))
    pdata = getpeopledata(l1)
    print(pdata)
    tree = treepredict.buildtree(pdata, scoref=treepredict.variance)  #创建决策树
    treepredict.prune(tree, 0.5)  #剪支
    treepredict.drawtree(tree, 'hot.jpg')