def reDraw(tolN, tolS): reDraw.f.clf() #清空图像 reDraw.a = reDraw.f.add_subplot(111) trainData = regTrees.loadDataSet( ".//machinelearninginaction//ch09//sine.txt") if chkBtnVar.get(): #复选框被选中,求模型树 modelTree = regTrees.createTree(trainData, leafType=regTrees.modelLeaf, errType=regTrees.modelErr, ops=(tolS, tolN)) y_hat = regTrees.createForeCast(modelTree, trainData[:, 0], modelEval=regTrees.modelTreeEval) else: #回归树 modelTree = regTrees.createTree(trainData, ops=(tolS, tolN)) y_hat = regTrees.createForeCast(modelTree, trainData[:, 0]) data_hat = np.hstack((trainData[:, 0], y_hat)) sort_hat = sorted(data_hat.tolist(), key=lambda x: x[0]) sort_x = [x[0] for x in sort_hat] sort_y = [x[1] for x in sort_hat] reDraw.a.scatter(trainData[:, 0].T.tolist()[0], trainData[:, 1].T.tolist()[0], s=50) reDraw.a.plot(sort_x, sort_y) reDraw.canvas.show()
def reDraw(tolS,tolN): # 调用 Figure 对象的 clf() 方法清理画布 reDraw.f.clf() # 添加 Axes 对象 reDraw.a = reDraw.f.add_subplot(111) # 检查复选框是否被选中 if chkBtnVar.get(): # 绘制模型树 # tolN至少为2 if tolN < 2: tolN = 2 # 创建模型树 myTree=regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf,\ regTrees.modelErr, (tolS,tolN)) # 计算预测值向量 yHat = regTrees.createForeCast(myTree, reDraw.testDat, \ regTrees.modelTreeEval) else: # 创建回归树 myTree=regTrees.createTree(reDraw.rawDat, ops=(tolS,tolN)) # 计算预测值向量 yHat = regTrees.createForeCast(myTree, reDraw.testDat) # 调用 scatter() 方法绘制真实数据图 reDraw.a.scatter(reDraw.rawDat[:,0], reDraw.rawDat[:,1], s=5) # 预测值采用 plot() 方法绘制 reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) # 在画布上显示绘制的图形 reDraw.canvas.show()
def reDraw(tolS, tolN): ''' 画出数据点和模型拟合线 :param tolS: 用户输入的最小误差值 :param tolN: 用户输入的最少样本数 :return: ''' # pass # 空语句, 是为了保持程序结构的完整性 reDraw.f.clf() # 清空之前的图像 reDraw.a = reDraw.f.add_subplot(111) # 添加一个新图 if chkBtnVar.get(): # 复选框被选中, 构建模型树 if tolN < 2: # 最少样本数不能少于2 tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) else: # 复选框未选中, 构建回归树 myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:, 0].flatten().A[0], reDraw.rawDat[:, 1].flatten().A[0], s=5) # 画出数据点 reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) # 画出模型拟合线 reDraw.canvas.show()
def test_compare(self): train_mat = mat(regTrees.loadDataSet("bikeSpeedVsIq_train.txt")) test_mat = mat(regTrees.loadDataSet("bikeSpeedVsIq_test.txt")) # 创建一个回归树 myTree = regTrees.createTree(train_mat, ops=(1, 20)) yHat = compareRegression.createForeCast(myTree, test_mat[:, 0]) corrcoef = compareRegression.corrcoef(yHat, test_mat[:, 1], rowvar=0)[0, 1] print("\n corrcoef == %s" % (corrcoef)) # 创建一颗模型树 myTree = regTrees.createTree(train_mat, linearModelTrees.modelLeaf, linearModelTrees.modelErr, ops=(1, 20)) yHat = compareRegression.createForeCast( myTree, test_mat[:, 0], compareRegression.modelTreeEval) corrcoef = compareRegression.corrcoef(yHat, test_mat[:, 1], rowvar=0)[0, 1] print("\n corrcoef == %s" % (corrcoef)) # 标准回归 ws, X, Y = linearModelTrees.linearSolve(train_mat) print("\n ws == %s" % (ws)) for i in range(shape(test_mat)[0]): yHat[i] = test_mat[i, 0] * ws[1, 0] + ws[0, 0] corrcoef = compareRegression.corrcoef(yHat, test_mat[:, 1], rowvar=0)[0, 1] print("\n corrcoef == %s" % (corrcoef))
def reDraw(tolS, tolN): """ 绘制原始数据的散点图以及拟合数据的曲线图 Parameters ----------- tolS : 允许的误差下降值 tolN : 诶分的最小样本值 Returns ------------ None """ reDraw.f.clf() reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:, 0].tolist(), reDraw.rawDat[:, 1].tolist(), s=5) reDraw.a.plot(reDraw.testDat, yHat, 'b', linewidth=2.0) reDraw.canvas.show()
def test_create_reg_tree(self): data_set = regTrees.loadDataSet("ex0.txt") data_set = mat(data_set) print("\n data_set == %s" % (data_set)) tree = regTrees.createTree(data_set) print("\n tree == %s" % (tree)) # 测试过拟合 tree = regTrees.createTree(data_set, ops=(0, 1)) print("\n tree == %s" % (tree))
def reDraw(tolS,tolN): reDraw.f.clf() reDraw.a = reDraw.f.add_subplit(111) if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.reDat,regTrees.modelLeaf,regTrees.modelErr(tolS,tolN)) yHat = regTrees.createForeCast(myTree,reDraw.testDat,regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat[:,0],reDraw.rawDat[:,1],s=5) reDraw.canvas.show()
def reDraw(tolS,tolN): reDraw.f.clf() # clear the figure reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): #see if check box has been selected if tolN < 2: tolN = 2 myTree=regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf,regTrees.modelErr, (tolS,tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat,regTrees.modelTreeEval) else: myTree=regTrees.createTree(reDraw.rawDat, ops=(tolS,tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:,0], reDraw.rawDat[:,1], s=5) #use scatter for data set reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) #use plot for yHat reDraw.canvas.show()
def reDraw(tolS, tolN): reDraw.f.clf() #清空之前的图像 reDraw.a = reDraw.f.add_subplot(111)#重新添加新图 if chkBtnVar.get():#检查选框model tree是否被选中 if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf,regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:, 0], reDraw.rawDat[:, 1], s=5) # 绘制真实值 reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) # 绘制预测值 reDraw.canvas.show()
def reDraw(tolS, tolN): reDraw.f.clf() reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:,0], reDraw.rawDat[:,1], s=5) reDraw.a.plot(reDraw.testDat, yHat, linewidth = 2.0) reDraw.canvas.show()
def reDraw(tolS, tolN): reDraw.f.clf() reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(array(reDraw.rawDat[:, 0]), array(reDraw.rawDat[:, 1]), s=5) reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) reDraw.canvas.draw()
def reDraw(tolS, tolN): reDraw.f.clf() # clear the figure reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:, 0], reDraw.rawDat[:, 1], s=5) # use scatter for data set reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) # use plot for yHat reDraw.canvas.show()
def reDraw(tolS, tolN): reDraw.f.clf() reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): # 复选框选中,则为模型树,否则会回归树 if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawData, regTrees.modelLeaf, regTrees.modelError, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testData, regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawData, ops = (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testData) reDraw.a.scatter(reDraw.rawData[:, 0], reDraw.rawData[:, 1], s=5) reDraw.a.plot(reDraw.testData, yHat, linewidth=2.0) reDraw.canvas.show()
def reDraw(tolS,tolN): reDraw.f.clf() # clear the figure reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree=regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf,\ regTrees.modelErr, (tolS,tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, \ regTrees.modelTreeEval) else: myTree=regTrees.createTree(reDraw.rawDat, ops=(tolS,tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:,0], reDraw.rawDat[:,1], s=20, alpha=.7) #use scatter for data set reDraw.a.plot(reDraw.testDat, yHat, 'or', markersize=5.0, linewidth=2.0, alpha=.6) #use plot for yHat reDraw.canvas.show()
def reDraw(tolS, tolN): reDraw.f.clf() reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createTree(myTree, reDraw.testDat, regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat, ops = (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:,0].T.A.tolist()[0], reDraw.rawDat[:,1].T.A.tolist()[0], s = 5) reDraw.a.plot(reDraw.testDat, yHat, linewidth = 2.0, color = 'red') reDraw.canvas.show()
def reDraw(tolS,tolN): reDraw.rawDat=mat(regTrees.loadDataSet("C:\Users\YAN\Desktop\Cart\sine.txt")) reDraw.testDat=arange(min(reDraw.rawDat[:,0]),max(reDraw.rawDat[:,0]),0.01) reDraw.f.clf() reDraw.a=reDraw.f.add_subplot(111) if chkBtnVar.get(): if tolN<2: tolN=2 myTree=regTrees.createTree(reDraw.rawDat,regTrees.modelLeaf,regTrees.modelErr,(tolS,tolN)) yHat=regTrees.createForeCast(myTree,reDraw.testDat,regTrees.modelTreeEval) else: myTree=regTrees.createTree(reDraw.rawDat,ops=(tolS,tolN)) yHat=regTrees.createForeCast(myTree,reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:,0],reDraw.rawDat[:,1],s=5) reDraw.a.plot(reDraw.testDat,yHat,linewidth=2.0) reDraw.canvas.show()
def reDraw(tolS, tolN): #清空之前的图像 reDraw.f.clf() reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree = rt.createTree(reDraw.rawDat, rt.modelleaf, rt.modelErr, (tolS, tolN)) yHat = rt.createForeCast(myTree, reDraw.testDat, rt.modelTreeEval) else: myTree = rt.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = rt.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:, 0], reDraw.rawDat[:, 1], s=5) reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) reDraw.canvas.show()
def reDraw(tolS, tolN): reDraw.f.clf() # 清空之前的图像 reDraw.a = reDraw.f.add_subplot(111) # 重新添加子图 if chkBtnVar.get(): # 检查复选框是否选中,确定是模型树还是回归树 if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) else: # 回归树 myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(array(reDraw.rawDat[:, 0]), array(reDraw.rawDat[:, 1]), s=5) # 画真实值的散点图 reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) # 画预测值的直线图 reDraw.canvas.draw()
def reDraw(tolS, tolN): #后边会编写函数 reDraw.f.clf() reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): #如果选中模型树的话 if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) #根据指定的规则创建树 yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) #计算测试集的预测集 else: #如果选中回归树 myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:, 0], reDraw.rawDat[:, 1], s=5) #绘制样本集的散点图 reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) #绘制测试集的拟合曲线 reDraw.canvas.show()
def test2(): ''' line = '0.530897\t0.893462' curLine = line.strip().split('\t') print(curLine) fltLine = map(float, curLine) #在python3里面变成map了。。。错了 print(list(curLine)) print(set(fltLine)) ''' myDat = regTrees.loadDataSet('exp.txt') #200*2 float, exp和ex2也差不多,y的方差更小 #print(myDat[2]) #print(list(myDat[2]))一样的 print(shape(myDat)) #print(m1[:,-1]) #print(m1[5,:]) # 用mat()转成矩阵才能这样用 myMat = mat(myDat) #retTree = regTrees.createTree(myMat, ops=(1000,10)) #(0,1)就是每个点都分了一个叉,典型的overfitting #retTree = regTrees.createTree(myMat, ops=(0.2,4)) #ex2比ex00分布差不多,y的取值大了100倍,因此用10000,4和原来的效果差不多 #print(retTree) retTree = regTrees.createTree(myMat, ops=(10, 4)) testDat = mat( regTrees.loadDataSet('ex2test.txt')) #ex2test.txt的数据分布范围和ex2很接近,真实的测试集 pruned_Tree = regTrees.prune(retTree, testDat) print(pruned_Tree) #regTrees.plot1(myMat) regTrees.plot1withTree(myMat, retTree) regTrees.plot1withTree(myMat, pruned_Tree)
def reDraw(tolS,tolN): #清空之前的图像,使得前后两个图像不会重叠 reDraw.f.clf() reDraw.a=reDraw.f.add_subplot(111) if chkBtnVar.get():#检查复选框是否选中,则为树模型 if tolN<2: tolN=2 myTree=regTrees.createTree(reDraw.rawDat,regTrees.modelLeaf,regTrees.modelErr,(tolS,tolN)) yHat=regTrees.createForeCast(myTree,reDraw.testDat,regTrees.modelTreeEval) else:#否则为回归模型 myTree=regTrees.createTree(reDraw.rawDat,ops=(tolS,tolN)) yHat=regTrees.createForeCast(myTree,reDraw.testDat) reDraw.a.scatter(list(reDraw.rawDat[:,0]),list(reDraw.rawDat[:,1]),s=5) reDraw.a.plot(reDraw.testDat,yHat,linewidth=2.0) reDraw.canvas.show()
def reDraw(tolS, tolN): reDraw.f.clf() reDraw.a = reDraw.f.add_subplot(111) # Check which is selected if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, \ regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, \ regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat, ops = (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:, 0].A, reDraw.rawDat[:, 1].A, s = 5) reDraw.a.plot(reDraw.testDat, yHat, linewidth = 2.0) reDraw.canvas.show()
def reDraw(tolS, tolN): reDraw.f.clf() reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): #如果选中Model Tree的话,就使用模型树相关的方法来创建和预测 if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForecast(myTree, reDraw.testDat, regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForecast(myTree, reDraw.testDat) reDraw.a.scatter(array(reDraw.rawDat[:, 0]), array(reDraw.rawDat[:, 1]), s=5) #画出散点图 reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) #plot方法构建连续曲线 reDraw.canvas.show()
def test3(): myDat = mat(regTrees.loadDataSet('exp2.txt')) #200*2 float print(shape(myDat)) retTree = regTrees.createTree(myDat, regTrees.modelLeaf, regTrees.modelErr, (1, 10)) print(retTree) #regTrees.plot1(myDat) regTrees.plot1withTree_Linear(myDat, retTree)
def reDraw(tolS, tolN): reDraw.f.clf() reDraw.a = reDraw.f.add_subplot(111) # See if check box has been selected if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:, 0], reDraw.rawDat[:, 1], c="black", s=5) reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) reDraw.canvas.draw()
def reDraw(tolS, tolN): reDraw.f.clf() # 清空之前的图像 reDraw.a = reDraw.f.add_subplot(111) #重新添加一个新图 if chkBtnVar.get(): #检查复选框是否被选中,根据复选框是否被选中来确定构建模型树还是回归树 if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) #用真实值构建树 yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) #用测试值构建树 else: myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.rawDat[:, 0].A reDraw.a.scatter(reDraw.rawDat[:, 0], reDraw.rawDat[:, 1], s=5) #真实值用scatter()方法绘制,因为scatter构建的是离散型散点图 reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) #预测值用plot()方法构建,连续曲线 reDraw.canvas.show()
def test_linear_model_tree(self): data_set = regTrees.loadDataSet("exp2.txt") data_set = mat(data_set) tree = regTrees.createTree(data_set, linearModelTrees.modelLeaf, linearModelTrees.modelErr, ops=(1, 10)) print("\n tree == %s" % (tree))
def SL_Model(): test = re.loadDataSet('bikeSpeedVsIq_test.txt') train = re.loadDataSet('bikeSpeedVsIq_train.txt') testMat = mat(test) trainMat = mat(train) mytree = re.createTree(trainMat, ops=(1, 20)) yHat = createForeCast(mytree, testMat[:, 0]) print('回归树的相关性R^2:', corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1]) myTree = re.createTree(trainMat, tr.modelLeaf, tr.modelErr, ops=(1, 20)) yHat = createForeCast(myTree, testMat[:, 0], modelTreeEval) print('模型树的相关性R^2:', corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1]) ws, X, Y = tr.linearSolve(trainMat) for i in range(shape(testMat)[0]): yHat[i] = testMat[i, 0] * ws[1, 0] + ws[0, 0] print('标准线性回归的相关性R^2:', corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1])
def reDraw(tolS, tolN): reDraw.f.clf() # clear画布 reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): # 判断 模型树 or 回归树 if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, \ regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:, 0].A, reDraw.rawDat[:, 1].A, s=5) # 使用散点图,描绘真实数据 reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) # 使用折线图,描绘预测数据 reDraw.canvas.show()
def reDraw(tolS, tolN): # 清空 reDraw.f = Figure(figsize=(5,4), dpi=100) reDraw.f.clf() # 建新图 reDraw.a = reDraw.f.add_subplot(111) # 确认复选框是否被选中 if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:, 0], reDraw.rawDat[:, 1], s=5) reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) reDraw.canvas = FigureCanvasTkAgg(reDraw.f, master=root) reDraw.canvas.show()
def reDraw(tolS, tolN): reDraw.f.clf() # clear the figure reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, \ regTrees.modelErr, (tolS, tolN)) yHat = compareRegression.createForeCast(myTree, reDraw.testDat, \ compareRegression.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = compareRegression.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:, 0].A, reDraw.rawDat[:, 1].A, s=5, c='red', marker='s') # use scatter for data set reDraw.a.plot(reDraw.testDat, yHat.A, 'g', linewidth=1.0) # use plot for yHat reDraw.canvas.show()
def reDraw(tolS, tolN): reDraw.f.clf() reDraw.a = reDraw.f.add_subplot(111) #检查复选框是否选中 if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) #print shape(reDraw.rawDat[:,0]) reDraw.a.scatter(reDraw.rawDat[:, 0].flatten().A[0], reDraw.rawDat[:, 1].flatten().A[0], s=5) reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) reDraw.canvas.show()
def reDraw(tolS, tolN): #图形是回归树还是模型树在程序内部判断 reDraw.f.clf() # 清空图像 reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): #检查复选框(Model Tree)是否选中,选中就是模型树(叶子节点是线性模型) if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) else: #没有选中就是回归型(叶子节点就是常数型) myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) print "\n××××××××××××reDraw.rawDat的值是:\n", type( numpy.mat(reDraw.rawDat[:, 0])) reDraw.a.scatter(list(reDraw.rawDat[:, 0]), list(reDraw.rawDat[:, 1]), c='r') # 绘制原始数据的散点图(真实值) reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) # 使用预测数据yHat绘制折线图(预测值) # (如果叶子节点是常数绘制出的图形就是方正的,如果叶子节点的模型是线性的绘制出的图形拟合程度就比较好) reDraw.canvas.show() #绘制图像
def reDraw(tols, tolN): # 清空图像,重新绘图 reDraw.f.clf() reDraw.a = reDraw.f.add_subplot(111) # 判断复选框是否选中 if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tols, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat, ops=(tols, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) # 绘出真实值 reDraw.a.scatter(reDraw.rawDat[:, 0].A, reDraw.rawDat[:, 1].A, s=5) # 会出预测值 reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) reDraw.canvas.show()
def reDraw(tolS,tolN): reDraw.f.clf() # clear the figure reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree=regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf,\ regTrees.modelErr, (tolS,tolN)) print 'myTree=', myTree yHat = regTrees.createForeCast(myTree, reDraw.testDat, \ regTrees.modelTreeEval) else: myTree=regTrees.createTree(reDraw.rawDat, ops=(tolS,tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) # print 'min=', min(reDraw.rawDat[:,0]) # print 'min=', min(reDraw.rawDat[:,1]) print 'srcdata=',reDraw.rawDat # reDraw.a.scatter(reDraw.rawDat[:,0], reDraw.rawDat[:,1], s=5) #use scatter for data set reDraw.a.plot(reDraw.rawDat[:,0], reDraw.rawDat[:,1], '.') #use plot for yHat reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) #use plot for yHat reDraw.canvas.show()
def reDraw(tolS, tolN): # clear the figure reDraw.f.clf() reDraw.a = reDraw.f.add_subplot(111) # 检查复选框是否选中 if chkBtnVar.get(): if tolN < 2: tolN = 2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) else: myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat) # use scatter for data set reDraw.a.scatter(reDraw.rawDat[:, 0].A, reDraw.rawDat[:, 1].A, s=5) # use plot for yHat reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0, c='red') reDraw.canvas.show()
def reDraw(tolS, tolN): # tolS是容许的误差下降值,tolN是切分的最少样本数 # tolN=200,则不切分树,用一条直线来拟合;tolN=50,5直线拟合;10,仅需要8直线便可拟合, # 为构建尽量大的树,将tolN设置为1,tolN设为0.此时构建模型树,过拟合严重 reDraw.f.clf() # 清空之前的图像 reDraw.a = reDraw.f.add_subplot(111) if chkBtnVar.get(): # 检查复选框是否选中,选中则构建模型树 if tolN < 2: tolN = 2 # tolN设置的最小值为2 myTree = regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf, regTrees.modelErr, (tolS, tolN)) yHat = regTrees.createForeCast(myTree, reDraw.testDat, regTrees.modelTreeEval) else: # 否则构建回归树 myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN)) # 中间两个参数采用默认参数regLeaf yHat = regTrees.createForeCast(myTree, reDraw.testDat) reDraw.a.scatter(reDraw.rawDat[:, 0].tolist(), reDraw.rawDat[:, 1].tolist(), s=5) # 注意加上 .tolist() 将matrix或array转换为list,才能使用scatter函数,否则会报错 reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) # 预测值采用plot()方法绘制 reDraw.canvas.show()
import regTrees from numpy import * testMat=mat(eye(4)) print(testMat) mat0,mat1=regTrees.binSplitDataSet(testMat,1,0.5)#1为特征下标,0.5为阈值 print("mat0=\n",mat0) print("mat1=\n",mat1) myDat=regTrees.loadDataSet('ex00.txt') #print(myDat) myMat=mat(myDat) print("myMat size:",shape(myMat)) #print(myMat) print(regTrees.createTree(myMat)) print("ex0.txt") myDat1=regTrees.loadDataSet('ex0.txt') myMat1=mat(myDat1) print(shape(myMat1))#200 3 print(regTrees.createTree(myMat1)) #建树完成 myDat2=regTrees.loadDataSet('ex2.txt') myMat2=mat(myDat2) print(regTrees.createTree(myMat2)) myTree=regTrees.createTree(myMat2,ops=(0,1)) myDatTest=regTrees.loadDataSet('ex2test.txt') myMat2Test=mat(myDatTest) regTrees.prune(myTree,myMat2Test) print(myTree)
import regTrees from numpy import * myDat=regTrees.loadDataSet('ex00.txt') myMat = mat(myDat) tree = regTrees.createTree(myMat) print "%s" % tree myDat1=regTrees.loadDataSet('ex0.txt') myMat1=mat(myDat1) tree2 = regTrees.createTree(myMat1) print "%s" % tree2
b, alphas = svmMLiA.smoP(data, label, 0.6, 0.001, 40, ('rbf', 1.3)) datamat = mat(data) labelmat = mat(label) svind = nonzero(alphas.A>0)[0] print svind svs = datamat[svind] print svs ''' #ADABOOST ''' data, label = readfile5() #print data #print label weak, est = adaboost.adaBoostTrainDS(data, label, 9) print weak #print est ''' #TREE REGRES data = readfile6() #print data mymat = mat(data) print regTrees.createTree(mymat) #print regTrees.createTree(mymat, regTrees.modelLeaf, regTrees.modelErr, (1,10)) #ws,X,Y = regTrees.linearSolve(mymat) #print ws
#myDat2 = regTrees.loadDataSet('ex2.txt') #myDat2 = regTrees.loadDataSet('ex2.txt') #myMat2 = mat(myDat2, ) #print myDat2 #print regTrees.createTree(myMat2, ops=(10000,4)) #model tree #myMat2 = mat(regTrees.loadDataSet('exp2.txt')) #print regTrees.createTree(myMat2, regTrees.modelLeaf, regTrees.modelErr, (1,10)) trainMat = mat(regTrees.loadDataSet('bikeSpeedVsIq_train.txt')) testMat = mat(regTrees.loadDataSet('bikeSpeedVsIq_test.txt')) myTree = regTrees.createTree(trainMat, ops=(1,20)) #print myTree #___REGRES TREE___ yHat = regTrees.createForeCast(myTree, testMat[:,0]) #print yHat print corrcoef(yHat, testMat[:,1], rowvar=0)[0,1] #___MODEL TREE___ myTree = regTrees.createTree(trainMat, regTrees.modelLeaf, regTrees.modelErr, (1,20)) yHat = regTrees.createForeCast(myTree, testMat[:,0], regTrees.modelTreeEval) print corrcoef(yHat, testMat[:,1], rowvar=0)[0,1] #___STAND REGRES___ ws,X,Y = regTrees.linearSolve(trainMat) print "ws=",ws for i in range(shape(testMat)[0]): yHat[i] = testMat[i,0]*ws[1,0]+ws[0,0] print corrcoef(yHat, testMat[:,1], rowvar=0)[0,1]