for group in range(0,5): print("Test group " + str(group + 1)) trainStationList = [] testStationList = [] for i in range(0,5): if i == group: testStationList.extend(groups[i]) else: trainStationList.extend(groups[i]) trainStations = set(float(station) for station in trainStationList) testStations = set(float(station) for station in testStationList) trainX, testX, trainY, testY = splitDataForXValidation(trainStations, testStations, "location", data, features_TW, "target") print("\tTW #train: " + str(len(trainY)) + ", #test:" + str(len(testY))) model = RandomForestRegressor(min_samples_leaf = 9, n_estimators = 59, n_jobs = -1, random_state=42) model.fit(trainX, trainY) prediction = model.predict(testX) rmse = rmseEval(testY, prediction)[1] print("\trmse: " + str(rmse)) dataDict[str(group) + "_obs"] = testY ae = [] for i in range(0, len(testY)): ae.append(abs(testY[i] - prediction[i])) dataDict[str(group) + "_ae_tw"] = ae rmseDict[str(group) + "_ae_tw"] = rmse trainX, testX, trainY, testY = splitDataForXValidation(trainStations, testStations, "location", data, features_TWA, "target") print("\tTWA #train: " + str(len(trainY)) + ", #test:" + str(len(testY)))
for i in range(0, 5): if i == group: testStationList.extend(groups[i]) else: trainStationList.extend(groups[i]) trainStations = set(float(station) for station in trainStationList) # reorder train stations log(output_log, "\ttrainStationList:" + str(trainStationList)) trainStationList = [s for s in all_stations if float(s) in trainStations] log(output_log, "\ttrainStationList:" + str(trainStationList)) testStations = set(float(station) for station in testStationList) trainX, testX, trainY, testY = splitDataForXValidation( trainStations, testStations, "location", data, features_TW, "target") log(output_log, "\tTW #train: " + str(len(trainY)) + ", #test:" + str(len(testY))) model = RandomForestRegressor(min_samples_leaf=9, n_estimators=59, n_jobs=-1, random_state=42) model.fit(trainX, trainY) prediction_TW = model.predict(testX) rmse = rmseEval(testY, prediction_TW)[1] log(output_log, "\trmse: " + str(rmse)) obs.extend(testY) all_pred_TW.extend(prediction_TW) trainX, testX, trainY, testY, trainLocation, testLocation = splitDataForXValidationWithLocation( trainStations, testStations, "location", data, columns, "target")