# std = np.std([tree.feature_importances_ for tree in model.model.estimators_], # axis=0) # indices = np.argsort(importances)[::-1] # print("Feature ranking:") # for f in range(0,len(model.modelColumns)): # print(model.modelColumns[indices[f]] + " -> " + str(importances[indices[f]])) end = time.time() print(end - start) # load apply data dataFile2 = "/media/sf_lur/data/grid_hour.csv" data2 = {} columns2 = [] loadData(dataFile2, [], data2, columns2) start = time.time() predictionData = applyRandomForest(data2, model, {'estimators': 59, 'leaf': 9}) end = time.time() print(end - start) output = open("/media/sf_lur/data/apply_grid.csv", 'w') output.write("location,timestamp,prediction\n") for i in range(0, len(predictionData)): output.write(str(int(data2["location"][i])) + "," + str(int(data2["timestamp"][i])) + "," + str(predictionData[i])) output.write("\n") output.close()
outputColumns = [] for v in values: sName = stationNames[str(v)] print("location: " + str(v) + " -> " + sName) trainData, testData = splitDataForXValidation(v, "location", data) trainColumns = [] for c in trainData: if c != "target": trainColumns.append(c) model = trainRandomForest(trainData, trainColumns, "target", { 'estimators': 59, 'leaf': 9 }) predictionData = applyRandomForest(testData, model, 0) rae = raeEval(testData["target"], predictionData) print(str(len(rae[1]))) print(str(len(predictionData))) print(str(len(testData["target"]))) # write header if len(outputColumns) == 0: outputColumns = deepcopy(trainColumns) for c in outputColumns: output.write(c) output.write(",") output.write("error_rae\n") for i in range(0, len(testData["target"])): for c in outputColumns: