예제 #1
0
def eval_one(step):

    eval_features = []
    for i in range(0, len(all_features)):
        if step[i]:
            eval_features.append(all_features[i])

    all_observations = []
    all_pred_combined = []
    Y = []
    P = []

    for group in range(0, len(groups)):

        train_stations, test_stations = generate_train_test_station_list(
            group, groups)
        train_station_set = set([float(s) for s in train_stations])
        test_station_set = set([float(s) for s in test_stations])

        train_lower = [
            float(train_stations[i]) for i in range(0, len(train_stations))
            if i < (len(train_stations) / 2.0)
        ]
        train_lower_set = set(train_lower)
        train_upper = [
            float(train_stations[i]) for i in range(0, len(train_stations))
            if i >= (len(train_stations) / 2.0)
        ]
        train_upper_set = set(train_upper)
        test_lower = [
            float(test_stations[i]) for i in range(0, len(test_stations))
            if i < (len(test_stations) / 2.0)
        ]

        # tw_lower
        trainX, testX, trainY, testY = splitDataForXValidation(
            train_lower_set, test_station_set, "location", data, tw_features,
            "target")
        model = create_model()
        model.fit(trainX, trainY)
        prediction_lower = model.predict(testX)

        # tw_upper
        trainX, testX, trainY, testY = splitDataForXValidation(
            train_upper_set, test_station_set, "location", data, tw_features,
            "target")
        model = create_model()
        model.fit(trainX, trainY)
        prediction_upper = model.predict(testX)

        trainX, testX, trainY, testY, train_location, test_location = splitDataForXValidationWithLocation(
            train_station_set, test_station_set, "location", data,
            eval_features, "target")
        train_label = generate_label(train_location, train_lower)
        test_label = generate_label(test_location, test_lower)

        model = create_classifier_model()
        model.fit(trainX, train_label)
        prediction_label = model.predict(testX)

        pred_combined = generate_combined_prediction(prediction_label,
                                                     prediction_lower,
                                                     prediction_upper)
        all_pred_combined.extend(pred_combined)
        all_observations.extend(testY)
        Y.extend(test_label)
        P.extend(prediction_label)

    rmse = rmseEval(all_observations, all_pred_combined)[1]
    accuracy = accuracy_score(Y, P)

    return rmse, accuracy
예제 #2
0
def eval_one(step):

    if step in cached_results:
        return cached_results[step]

    eval_features = []
    for i in range(0, len(all_features)):
        if step[i]:
            eval_features.append(all_features[i])

    all_observations = []
    all_pred_combined = []

    for group in range(0, len(groups)):

        train_stations, test_stations = generate_train_test_station_list(
            group, groups)
        train_station_set = set([float(s) for s in train_stations])
        test_station_set = set([float(s) for s in test_stations])

        train_lower = [
            float(train_stations[i]) for i in range(0, len(train_stations))
            if i < (len(train_stations) / 2.0)
        ]
        train_lower_set = set(train_lower)
        train_upper = [
            float(train_stations[i]) for i in range(0, len(train_stations))
            if i >= (len(train_stations) / 2.0)
        ]
        train_upper_set = set(train_upper)
        test_lower = [
            float(test_stations[i]) for i in range(0, len(test_stations))
            if i < (len(test_stations) / 2.0)
        ]

        # tw_lower
        trainX, testX, trainY, testY = splitDataForXValidation(
            train_lower_set, test_station_set, "location", data, tw_features,
            "target")
        model = create_model()
        model.fit(trainX, trainY)
        prediction_lower = model.predict(testX)

        # tw_upper
        trainX, testX, trainY, testY = splitDataForXValidation(
            train_upper_set, test_station_set, "location", data, tw_features,
            "target")
        model = create_model()
        model.fit(trainX, trainY)
        prediction_upper = model.predict(testX)

        trainX, testX, trainY, testY, train_location, test_location = splitDataForXValidationWithLocation(
            train_station_set, test_station_set, "location", data,
            eval_features, "target")
        train_label = generate_label(train_location, train_lower)
        test_label = generate_label(test_location, test_lower)

        model = create_classifier_model()
        model.fit(trainX, train_label)
        prediction_label = model.predict(testX)

        pred_combined = generate_combined_prediction(prediction_label,
                                                     prediction_lower,
                                                     prediction_upper)
        all_pred_combined.extend(pred_combined)
        all_observations.extend(testY)

    rmse = rmseEval(all_observations, all_pred_combined)[1]

    cached_results[step] = rmse

    # save down the cached result

    cache_output = open(CACHE_FILE, "a")
    step_list = [str(s) for s in step]
    step_str = ",".join(step_list)
    cache_output.write(str(rmse) + ";" + step_str + "\n")
    cache_output.close()

    return rmse
예제 #3
0
all_pred_TWA = []
all_pred_combined = []
all_test_location = []

for group in range(0, len(groups)):
    log("group: " + str(group + 1))

    train_stations, test_stations = generate_train_test_station_list(
        group, groups)
    log("\ttrain_stations: " + str(train_stations))
    log("\ttest_stations: " + str(test_stations))
    train_station_set = set([float(s) for s in train_stations])
    test_station_set = set([float(s) for s in test_stations])

    trainX, testX, trainY, testY, trainLocation, testLocation = splitDataForXValidationWithLocation(
        train_station_set, test_station_set, "location", data, all_features,
        "target")
    model = RandomForestRegressor(min_samples_leaf=29,
                                  n_estimators=64,
                                  n_jobs=-1,
                                  random_state=42)
    model.fit(trainX, trainY)
    prediction_TW = model.predict(testX)
    rmse = rmseEval(testY, prediction_TW)[1]
    log("\tALL rmse: " + str(rmse))
    all_observations.extend(testY)
    all_pred_ALL.extend(prediction_TW)
    all_test_location.extend(testLocation)

    trainX, testX, trainY, testY, trainLocation, testLocation = splitDataForXValidationWithLocation(
        train_station_set, test_station_set, "location", data, tw_features,