def evalTrainStationTestStation(trainStation, testStation, features): trainX, _, trainY, _ = splitDataForXValidation(set([trainStation]), set(), "location", dataByStation[trainStation], features, "target") _, testX2, _, testY2 = splitDataForXValidation(set(), set([testStation]), "location", dataByStation[testStation], features, "target") model = RandomForestRegressor(max_depth=10, n_estimators = 60, n_jobs = -1, random_state=42) model.fit(trainX, trainY) prediction = model.predict(testX2) rmse = rmseEval(testY2, prediction)[1] print("Training on station " + str(trainStation) + ", applying on station " + str(testStation) + ": rmse: " + str(rmse)) return rmse
'bank_holiday', 'hour', 'month', 'day_of_week', 'building_count', 'length', 'natural_area' ] dataDict = {} rmseDict = {} for location in all_stations: print("stations " + str(location)) trainStations = set( float(station) for station in all_stations if station != location) testStations = set([float(location)]) trainX, testX, trainY, testY = splitDataForXValidation( trainStations, testStations, "location", data, features_TW, "target") print("\tTW #train: " + str(len(trainY)) + ", #test:" + str(len(testY))) model = RandomForestRegressor(min_samples_leaf=9, n_estimators=59, n_jobs=-1, random_state=42) model.fit(trainX, trainY) prediction = model.predict(testX) rmse = rmseEval(testY, prediction)[1] print("\trmse: " + str(rmse)) dataDict[str(location) + "_obs"] = testY ae = [] for i in range(0, len(testY)): ae.append(abs(testY[i] - prediction[i])) dataDict[str(location) + "_ae_tw"] = ae rmseDict[str(location) + "_ae_tw"] = rmse
columns = [] loadData(DATA_FILE, ["timestamp"], data, columns) for iteration in range(0, 5): print("iter_" + str(iteration)) trainStations = [] testStations = [] for i in range(0, 5): if i == iteration: testStations = testStations + locations_grouped[i] else: trainStations = trainStations + locations_grouped[i] print("\ttrainStations: " + str(trainStations)) print("\ttestStations: " + str(testStations)) trainStationSet = set(s for s in trainStations) testStationSet = set(s for s in testStations) trainX, testX, trainY, testY = splitDataForXValidation( trainStationSet, testStationSet, "location", data, columns, "target") print("\t#trainX: " + str(len(trainX)) + ", #testX:" + str(len(testX))) print("\t#trainY: " + str(len(trainY)) + ", #testY:" + str(len(testY))) model = RandomForestRegressor(max_depth=10, n_estimators=30, n_jobs=-1, random_state=42) model.fit(trainX, trainY) prediction = model.predict(testX) rmse = rmseEval(testY, prediction)[1] print("\trmse: " + str(rmse))