Beispiel #1
0
def experiment(train_file, test_file, predictor, ntrees, mtry, sample, ignores,
               parser_header, cpu_cores):
    # Load train data
    trainDataX = pandas.read_csv(train_file, header=0)
    fmap = preprocessFrame(trainDataX)
    predictorCol = trainDataX.columns[predictor]
    trainDataY = array(trainDataX.pop(predictorCol))
    trainDataX = array(popIgnores(trainDataX, ignores))

    timeTrainRF = time()
    rf = WiseRF(n_estimators=ntrees, n_jobs=cpu_cores)
    rf.fit(trainDataX, trainDataY)
    timeTrainRF = time() - timeTrainRF

    # Validation
    testDataX = pandas.read_csv(test_file, header=0)
    preprocessFrame(testDataX, fmap)
    predictorCol = testDataX.columns[predictor]
    testDataY = array(testDataX.pop(predictorCol))
    testDataX = array(popIgnores(testDataX, ignores))

    timeTestRF = time()
    predict = rf.predict(testDataX)
    print predict
    testScore = rf.score(testDataX, testDataY)
    timeTestRF = time() - timeTestRF
    print """
Train time: {0}
 Test time: {1}
 Err. rate: {2} %
Trees,Sample,Mtry,TrainTime,TestTime,OOB,ClassErr
{3},NA,NA,{0},{1},NA,{4}""".format(timeTrainRF, timeTestRF,
                                   100 * (1 - testScore), ntrees,
                                   1 - testScore)