def evalTrainStationTestStation(trainStation, testStation): trainX, _, trainY, _ = splitDataForXValidation(set([trainStation]), set(), "location", dataByStation[trainStation], columns, "target") _, testX2, _, testY2 = splitDataForXValidation(set(), set([testStation]), "location", dataByStation[testStation], columns, "target") model = RandomForestRegressor(max_depth=10, n_estimators=60, n_jobs=-1, random_state=42) model.fit(trainX, trainY) prediction = model.predict(testX2) rmse = rmseEval(testY2, prediction)[1] print("Training on station " + str(trainStation) + ", applying on station " + str(testStation) + ": rmse: " + str(rmse))
"timestamp", 'natural_area', 'building_count', 'leisure_area', 'landuse_area', 'lane_length', 'length', 'building_area' ], data, columns) for iteration in range(0, 5): print("iter_" + str(iteration)) trainStations = [] testStations = [] for i in range(0, 5): if i == iteration: testStations = testStations + locations_grouped[i] else: trainStations = trainStations + locations_grouped[i] print("\ttrainStations: " + str(trainStations)) print("\ttestStations: " + str(testStations)) trainStationSet = set(s for s in trainStations) testStationSet = set(s for s in testStations) trainX, testX, trainY, testY = splitDataForXValidation( trainStationSet, testStationSet, "location", data, columns, "target") print("\t#train: " + str(len(trainY)) + ", #test:" + str(len(testY))) model = RandomForestRegressor(min_samples_leaf=9, n_estimators=59, n_jobs=-1, random_state=42) model.fit(trainX, trainY) prediction = model.predict(testX) rmse = rmseEval(testY, prediction)[1] print("\trmse: " + str(rmse))
loc66Data = defaultdict(list) loc51Data = defaultdict(list) for i in range(0, len(data["location"])): if data["location"][i] == 61.0: for c in columns: loc61Data[c].append(data[c][i]) if data["location"][i] == 66.0: for c in columns: loc66Data[c].append(data[c][i]) if data["location"][i] == 51.0: for c in columns: loc51Data[c].append(data[c][i]) trainX, testX, trainY, testY = splitDataForXValidation(set([61.0]), set(), "location", loc61Data, columns, "target") trainX2, testX2, trainY2, testY2 = splitDataForXValidation( set(), set([61.0]), "location", loc61Data, columns, "target") model = RandomForestRegressor(max_depth=10, n_estimators=60, n_jobs=-1, random_state=42) model.fit(trainX, trainY) prediction = model.predict(testX2) rmse = rmseEval(testY2, prediction)[1] print("Training on station 61, applying on station 61: rmse: " + str(rmse)) trainX, testX, trainY, testY = splitDataForXValidation(set([61.0]), set(), "location", loc61Data, columns, "target")