def eval_one(step): eval_features = [] for i in range(0, len(all_features)): if step[i]: eval_features.append(all_features[i]) all_observations = [] all_pred_combined = [] Y = [] P = [] for group in range(0, len(groups)): train_stations, test_stations = generate_train_test_station_list( group, groups) train_station_set = set([float(s) for s in train_stations]) test_station_set = set([float(s) for s in test_stations]) train_lower = [ float(train_stations[i]) for i in range(0, len(train_stations)) if i < (len(train_stations) / 2.0) ] train_lower_set = set(train_lower) train_upper = [ float(train_stations[i]) for i in range(0, len(train_stations)) if i >= (len(train_stations) / 2.0) ] train_upper_set = set(train_upper) test_lower = [ float(test_stations[i]) for i in range(0, len(test_stations)) if i < (len(test_stations) / 2.0) ] # tw_lower trainX, testX, trainY, testY = splitDataForXValidation( train_lower_set, test_station_set, "location", data, tw_features, "target") model = create_model() model.fit(trainX, trainY) prediction_lower = model.predict(testX) # tw_upper trainX, testX, trainY, testY = splitDataForXValidation( train_upper_set, test_station_set, "location", data, tw_features, "target") model = create_model() model.fit(trainX, trainY) prediction_upper = model.predict(testX) trainX, testX, trainY, testY, train_location, test_location = splitDataForXValidationWithLocation( train_station_set, test_station_set, "location", data, eval_features, "target") train_label = generate_label(train_location, train_lower) test_label = generate_label(test_location, test_lower) model = create_classifier_model() model.fit(trainX, train_label) prediction_label = model.predict(testX) pred_combined = generate_combined_prediction(prediction_label, prediction_lower, prediction_upper) all_pred_combined.extend(pred_combined) all_observations.extend(testY) Y.extend(test_label) P.extend(prediction_label) rmse = rmseEval(all_observations, all_pred_combined)[1] accuracy = accuracy_score(Y, P) return rmse, accuracy
def eval_one(step): if step in cached_results: return cached_results[step] eval_features = [] for i in range(0, len(all_features)): if step[i]: eval_features.append(all_features[i]) all_observations = [] all_pred_combined = [] for group in range(0, len(groups)): train_stations, test_stations = generate_train_test_station_list( group, groups) train_station_set = set([float(s) for s in train_stations]) test_station_set = set([float(s) for s in test_stations]) train_lower = [ float(train_stations[i]) for i in range(0, len(train_stations)) if i < (len(train_stations) / 2.0) ] train_lower_set = set(train_lower) train_upper = [ float(train_stations[i]) for i in range(0, len(train_stations)) if i >= (len(train_stations) / 2.0) ] train_upper_set = set(train_upper) test_lower = [ float(test_stations[i]) for i in range(0, len(test_stations)) if i < (len(test_stations) / 2.0) ] # tw_lower trainX, testX, trainY, testY = splitDataForXValidation( train_lower_set, test_station_set, "location", data, tw_features, "target") model = create_model() model.fit(trainX, trainY) prediction_lower = model.predict(testX) # tw_upper trainX, testX, trainY, testY = splitDataForXValidation( train_upper_set, test_station_set, "location", data, tw_features, "target") model = create_model() model.fit(trainX, trainY) prediction_upper = model.predict(testX) trainX, testX, trainY, testY, train_location, test_location = splitDataForXValidationWithLocation( train_station_set, test_station_set, "location", data, eval_features, "target") train_label = generate_label(train_location, train_lower) test_label = generate_label(test_location, test_lower) model = create_classifier_model() model.fit(trainX, train_label) prediction_label = model.predict(testX) pred_combined = generate_combined_prediction(prediction_label, prediction_lower, prediction_upper) all_pred_combined.extend(pred_combined) all_observations.extend(testY) rmse = rmseEval(all_observations, all_pred_combined)[1] cached_results[step] = rmse # save down the cached result cache_output = open(CACHE_FILE, "a") step_list = [str(s) for s in step] step_str = ",".join(step_list) cache_output.write(str(rmse) + ";" + step_str + "\n") cache_output.close() return rmse
all_pred_TWA = [] all_pred_combined = [] all_test_location = [] for group in range(0, len(groups)): log("group: " + str(group + 1)) train_stations, test_stations = generate_train_test_station_list( group, groups) log("\ttrain_stations: " + str(train_stations)) log("\ttest_stations: " + str(test_stations)) train_station_set = set([float(s) for s in train_stations]) test_station_set = set([float(s) for s in test_stations]) trainX, testX, trainY, testY, trainLocation, testLocation = splitDataForXValidationWithLocation( train_station_set, test_station_set, "location", data, all_features, "target") model = RandomForestRegressor(min_samples_leaf=29, n_estimators=64, n_jobs=-1, random_state=42) model.fit(trainX, trainY) prediction_TW = model.predict(testX) rmse = rmseEval(testY, prediction_TW)[1] log("\tALL rmse: " + str(rmse)) all_observations.extend(testY) all_pred_ALL.extend(prediction_TW) all_test_location.extend(testLocation) trainX, testX, trainY, testY, trainLocation, testLocation = splitDataForXValidationWithLocation( train_station_set, test_station_set, "location", data, tw_features,