# std = np.std([tree.feature_importances_ for tree in model.model.estimators_],
#              axis=0)
# indices = np.argsort(importances)[::-1]
# print("Feature ranking:")
# for f in range(0,len(model.modelColumns)):
#     print(model.modelColumns[indices[f]] + " -> " + str(importances[indices[f]]))

end = time.time()
print(end - start)

# load apply data
dataFile2 = "/media/sf_lur/data/grid_hour.csv"
data2 = {}
columns2 = []
loadData(dataFile2, [], data2, columns2)
 
start = time.time()
         
predictionData = applyRandomForest(data2, model, {'estimators': 59, 'leaf': 9})
 
end = time.time()
print(end - start)
 
output = open("/media/sf_lur/data/apply_grid.csv", 'w')
output.write("location,timestamp,prediction\n")
for i in range(0, len(predictionData)):
    output.write(str(int(data2["location"][i])) + "," + str(int(data2["timestamp"][i])) + "," + str(predictionData[i]))
    output.write("\n")
 
output.close()
Example #2
0
outputColumns = []

for v in values:
    sName = stationNames[str(v)]
    print("location: " + str(v) + " -> " + sName)
    trainData, testData = splitDataForXValidation(v, "location", data)
    trainColumns = []
    for c in trainData:
        if c != "target":
            trainColumns.append(c)

    model = trainRandomForest(trainData, trainColumns, "target", {
        'estimators': 59,
        'leaf': 9
    })
    predictionData = applyRandomForest(testData, model, 0)
    rae = raeEval(testData["target"], predictionData)
    print(str(len(rae[1])))
    print(str(len(predictionData)))
    print(str(len(testData["target"])))

    # write header
    if len(outputColumns) == 0:
        outputColumns = deepcopy(trainColumns)
        for c in outputColumns:
            output.write(c)
            output.write(",")
        output.write("error_rae\n")

    for i in range(0, len(testData["target"])):
        for c in outputColumns: