예제 #1
0
    def test8(self, msg=warn("test ORF Regression")):
        def f(x):
            return math.sin(x[0]) if x[0] < x[1] else math.cos(x[1] +
                                                               math.pi / 2)

        n = 1000
        X = np.random.randn(n, 2)
        y = map(f, X)
        param = {
            'minSamples': 10,
            'minGain': 0,
            'xrng': dataRange(X),
            'maxDepth': 10
        }
        xtest = np.random.randn(n, 2)
        ytest = map(f, xtest)
        orf = ORF(param, numTrees=50)

        for i in range(n):
            orf.update(X[i, :], y[i])

        preds = orf.predicts(xtest)

        mse = mean(
            map(lambda z: (z[0] - z[1]) * (z[0] - z[1]), zip(preds, ytest)))
        print "ORF Regression:"
        print "f(0,0):          " + str(orf.predict([0, 0])) + " +/- " + str(
            orf.predStat([0, 0], sd))
        print "Mean size:       " + str(orf.meanTreeSize())
        print "SD size:         " + str(orf.sdTreeSize())
        print "Mean max depth:  " + str(orf.meanMaxDepth())
        print "SD max depth:    " + str(orf.sdMaxDepth())
        print "RMSE:            " + str(math.sqrt(mse))
        print
예제 #2
0
    def test7(self, msg=warn("test ORF Classify")):
        def f(x):
            return int(x[0] * x[0] + x[1] * x[1] < 1)

        n = 1000
        X = np.random.randn(n, 2)
        y = map(f, X)
        param = {
            'minSamples': 10,
            'minGain': .01,
            'numClasses': 2,
            'xrng': dataRange(X)
        }
        orf = ORF(param, numTrees=50)
        for i in range(n):
            orf.update(X[i, :], y[i])

        xtest = np.random.randn(n, 2)
        ytest = map(f, xtest)
        preds = orf.predicts(xtest)
        conf = orf.confusion(xtest, ytest)
        print
        print sum(ytest)
        orf.printConfusion(conf)

        acc = map(lambda z: z[0] == z[1], zip(preds, ytest))
        print "ORF Classify:"
        print "Mean max depth: " + str(orf.meanMaxDepth())
        print "Mean Size: " + str(orf.meanTreeSize())
        print "SD Size: " + str(orf.sdTreeSize())
        print "Accuracy: " + str(mean(acc))
        print
예제 #3
0
    def test5(self, msg=warn("test ORT Classify")):
        def f(x):
            return int(x[0] * x[0] + x[1] * x[1] < 1)

        n = 1000
        X = np.random.randn(n, 2)
        y = map(f, X)
        param = {
            'minSamples': 10,
            'minGain': .01,
            'numClasses': 2,
            'xrng': dataRange(X),
            'maxDepth': 5
        }
        ort = ORT(param)
        map(lambda i: ort.update(X[i, :], y[i]), range(n))
        #ort.draw()
        preds = map(lambda i: ort.predict(X[i, :]), range(n))
        acc = map(lambda z: z[0] == z[1], zip(preds, y))
        print "ORT Classify:"
        print "Accuracy: " + str(mean(acc))
        print "max depth: " + str(ort.tree.maxDepth())
        print
예제 #4
0
    def test6(self, msg=warn("test ORT Regression")):
        def f(x):
            return math.sin(x[0]) if x[0] < x[1] else math.cos(x[1] +
                                                               math.pi / 2)

        n = 1000
        X = np.random.randn(n, 2)
        y = map(f, X)
        param = {
            'minSamples': 10,
            'minGain': .01,
            'xrng': dataRange(X),
            'maxDepth': 5
        }
        ort = ORT(param)
        for i in range(n):
            ort.update(X[i, :], y[i])
        #ort.draw()
        preds = map(lambda i: ort.predict(X[i, :]), range(n))
        mse = mean(map(lambda z: (z[0] - z[1]) * (z[0] - z[1]), zip(preds, y)))
        print "ORT Regression:"
        print "RMSE: " + str(math.sqrt(mse))
        print "max depth: " + str(ort.tree.maxDepth())
        print
예제 #5
0
import numpy as np
import math
from ORFpy import ORF, dataRange


def g(x):
    return math.sin(x[0]) if x[0] < x[1] else math.cos(x[1] + math.pi / 2)


n = 10
X = np.random.randn(n, 2)
y = map(g, X)

param = {'minSamples': 10, 'minGain': 0, 'xrng': dataRange(X), 'maxDepth': 10}
xtest = np.random.randn(n, 2)
ytest = map(g, xtest)
orf = ORF(param, numTrees=50)
for i in range(n):
    orf.update(X[i, :], y[i])

preds = orf.predicts(xtest)
sse = sum(map(lambda z: (z[0] - z[1]) * (z[0] - z[1]), zip(preds, ytest)))
rmse = math.sqrt(sse / float(len(preds)))
print "RMSE: " + str(round(rmse, 2))
# RMSE: 0.22
예제 #6
0
        print "Online train finish!"

    def save(self):
        # 保存Model(注:save文件夹要预先建立,否则会报错)
        joblib.dump(self.orf, "./save/rf.pkl")

    def load(self):
        # 读取Model
        self.orf = joblib.load("./save/rf.pkl")

    def test(self):
        preds = self.orf.predicts(self.testX)
        # RMSE 均方根误差亦称标准误差
        sse = sum(map(lambda z: (z[0] - z[1]) * (z[0] - z[1]), zip(preds, self.testY)))
        rmse = math.sqrt(sse / float(len(preds)))
        print "RMSE: " + str(round(rmse, 2)) + "\n"

if __name__ == "__main__":
    fopen = open("data/data.csv", 'r')
    tempData = []
    for eachLine in fopen:
        eachLineData = eachLine.split(",")
        eachLineData = np.array(map(lambda x: float(x), eachLineData))
        tempData.append(eachLineData)
    tempData = np.array(tempData)
    fopen.close()
    param = {'minSamples': 10, 'minGain': 0, 'xrng': dataRange(tempData[:,:722]), 'maxDepth': 10}
    orf = OnlineRandomForest(param=param,numTrees=50,input=722)
    orf.offline_train()
    orf.online_train()
예제 #7
0
from ORF import OnlineRandomForest
from ORFpy import dataRange
import numpy as np

if __name__ == "__main__":
    fopen = open("data/data.csv", 'r')
    tempData = []
    for eachLine in fopen:
        eachLineData = eachLine.split(",")
        eachLineData = np.array(map(lambda x: float(x), eachLineData))
        tempData.append(eachLineData)
    tempData = np.array(tempData)
    fopen.close()
    param = {
        'minSamples': 10,
        'minGain': 0,
        'xrng': dataRange(tempData[:, :722]),
        'maxDepth': 10
    }
    orf = OnlineRandomForest(param=param, numTrees=50, input=722)
    orf.online_train()