def test8(self, msg=warn("test ORF Regression")): def f(x): return math.sin(x[0]) if x[0] < x[1] else math.cos(x[1] + math.pi / 2) n = 1000 X = np.random.randn(n, 2) y = map(f, X) param = { 'minSamples': 10, 'minGain': 0, 'xrng': dataRange(X), 'maxDepth': 10 } xtest = np.random.randn(n, 2) ytest = map(f, xtest) orf = ORF(param, numTrees=50) for i in range(n): orf.update(X[i, :], y[i]) preds = orf.predicts(xtest) mse = mean( map(lambda z: (z[0] - z[1]) * (z[0] - z[1]), zip(preds, ytest))) print "ORF Regression:" print "f(0,0): " + str(orf.predict([0, 0])) + " +/- " + str( orf.predStat([0, 0], sd)) print "Mean size: " + str(orf.meanTreeSize()) print "SD size: " + str(orf.sdTreeSize()) print "Mean max depth: " + str(orf.meanMaxDepth()) print "SD max depth: " + str(orf.sdMaxDepth()) print "RMSE: " + str(math.sqrt(mse)) print
def test7(self, msg=warn("test ORF Classify")): def f(x): return int(x[0] * x[0] + x[1] * x[1] < 1) n = 1000 X = np.random.randn(n, 2) y = map(f, X) param = { 'minSamples': 10, 'minGain': .01, 'numClasses': 2, 'xrng': dataRange(X) } orf = ORF(param, numTrees=50) for i in range(n): orf.update(X[i, :], y[i]) xtest = np.random.randn(n, 2) ytest = map(f, xtest) preds = orf.predicts(xtest) conf = orf.confusion(xtest, ytest) print print sum(ytest) orf.printConfusion(conf) acc = map(lambda z: z[0] == z[1], zip(preds, ytest)) print "ORF Classify:" print "Mean max depth: " + str(orf.meanMaxDepth()) print "Mean Size: " + str(orf.meanTreeSize()) print "SD Size: " + str(orf.sdTreeSize()) print "Accuracy: " + str(mean(acc)) print
def test5(self, msg=warn("test ORT Classify")): def f(x): return int(x[0] * x[0] + x[1] * x[1] < 1) n = 1000 X = np.random.randn(n, 2) y = map(f, X) param = { 'minSamples': 10, 'minGain': .01, 'numClasses': 2, 'xrng': dataRange(X), 'maxDepth': 5 } ort = ORT(param) map(lambda i: ort.update(X[i, :], y[i]), range(n)) #ort.draw() preds = map(lambda i: ort.predict(X[i, :]), range(n)) acc = map(lambda z: z[0] == z[1], zip(preds, y)) print "ORT Classify:" print "Accuracy: " + str(mean(acc)) print "max depth: " + str(ort.tree.maxDepth()) print
def test6(self, msg=warn("test ORT Regression")): def f(x): return math.sin(x[0]) if x[0] < x[1] else math.cos(x[1] + math.pi / 2) n = 1000 X = np.random.randn(n, 2) y = map(f, X) param = { 'minSamples': 10, 'minGain': .01, 'xrng': dataRange(X), 'maxDepth': 5 } ort = ORT(param) for i in range(n): ort.update(X[i, :], y[i]) #ort.draw() preds = map(lambda i: ort.predict(X[i, :]), range(n)) mse = mean(map(lambda z: (z[0] - z[1]) * (z[0] - z[1]), zip(preds, y))) print "ORT Regression:" print "RMSE: " + str(math.sqrt(mse)) print "max depth: " + str(ort.tree.maxDepth()) print
import numpy as np import math from ORFpy import ORF, dataRange def g(x): return math.sin(x[0]) if x[0] < x[1] else math.cos(x[1] + math.pi / 2) n = 10 X = np.random.randn(n, 2) y = map(g, X) param = {'minSamples': 10, 'minGain': 0, 'xrng': dataRange(X), 'maxDepth': 10} xtest = np.random.randn(n, 2) ytest = map(g, xtest) orf = ORF(param, numTrees=50) for i in range(n): orf.update(X[i, :], y[i]) preds = orf.predicts(xtest) sse = sum(map(lambda z: (z[0] - z[1]) * (z[0] - z[1]), zip(preds, ytest))) rmse = math.sqrt(sse / float(len(preds))) print "RMSE: " + str(round(rmse, 2)) # RMSE: 0.22
print "Online train finish!" def save(self): # 保存Model(注:save文件夹要预先建立,否则会报错) joblib.dump(self.orf, "./save/rf.pkl") def load(self): # 读取Model self.orf = joblib.load("./save/rf.pkl") def test(self): preds = self.orf.predicts(self.testX) # RMSE 均方根误差亦称标准误差 sse = sum(map(lambda z: (z[0] - z[1]) * (z[0] - z[1]), zip(preds, self.testY))) rmse = math.sqrt(sse / float(len(preds))) print "RMSE: " + str(round(rmse, 2)) + "\n" if __name__ == "__main__": fopen = open("data/data.csv", 'r') tempData = [] for eachLine in fopen: eachLineData = eachLine.split(",") eachLineData = np.array(map(lambda x: float(x), eachLineData)) tempData.append(eachLineData) tempData = np.array(tempData) fopen.close() param = {'minSamples': 10, 'minGain': 0, 'xrng': dataRange(tempData[:,:722]), 'maxDepth': 10} orf = OnlineRandomForest(param=param,numTrees=50,input=722) orf.offline_train() orf.online_train()
from ORF import OnlineRandomForest from ORFpy import dataRange import numpy as np if __name__ == "__main__": fopen = open("data/data.csv", 'r') tempData = [] for eachLine in fopen: eachLineData = eachLine.split(",") eachLineData = np.array(map(lambda x: float(x), eachLineData)) tempData.append(eachLineData) tempData = np.array(tempData) fopen.close() param = { 'minSamples': 10, 'minGain': 0, 'xrng': dataRange(tempData[:, :722]), 'maxDepth': 10 } orf = OnlineRandomForest(param=param, numTrees=50, input=722) orf.online_train()