예제 #1
0
def evalColumns(columns):

    overallY = []
    overallPred = []

    for location in locations:
        location2s = [l for l in locations if l != location]
        
        print("Location: " + str(location) + ", location2: " + str(location2s))
        
        # generating testPreds
        testPreds = {}
        for datagroup in topDatagroups:
            tag, features = getTagAndFeatures(datagroup)
            trainX, testX, trainY, testY = splitDataForXValidation(location, "location", data, features, "target")
                
            model = RandomForestRegressor(min_samples_leaf = 9, n_estimators = 59, n_jobs = -1, random_state=42)                    
            model.fit(trainX, trainY)
            prediction = model.predict(testX)
            testPreds[tag] = prediction
          
        trainPreds = defaultdict(list)
          
        for datagroup in topDatagroups:
            tag, features = getTagAndFeatures(datagroup)
            print("\ttag: " + str(tag) + ", features: " + str(features))
            for location2 in location2s:
                trainX1, trainX2, trainY1, trainY2, testX, testY = splitDataForXValidationSampled2(location, location2, "location", data, features, "target")
                model = RandomForestRegressor(min_samples_leaf = 9, n_estimators = 59, n_jobs = -1, random_state=42)                    
                model.fit(trainX1, trainY1)
                train1Prediction = model.predict(trainX1)
                train2Prediction = model.predict(trainX2)
                testPrediction = model.predict(testX)
                train1Rmse = str(rmseEval(trainY1, train1Prediction)[1])
                train2Rmse = str(rmseEval(trainY2, train2Prediction)[1])
                testRmse = str(rmseEval(testY, testPrediction)[1])
                print("\t\ttrain1 rmse: " + train1Rmse)
                print("\t\ttrain2 rmse: " + train2Rmse)
                print("\t\ttest rmse: " + testRmse)
                for x in train2Prediction:
                    trainPreds[tag].append(x)

        # get combined train2y                
        combinedTrain2Y = []        
        for location2 in location2s:
            trainX1, trainX2, trainY1, trainY2, testX, testY = splitDataForXValidationSampled2(location, location2, "location", data, all_features, "target")
            combinedTrain2Y = combinedTrain2Y + trainY2
          
        # calculate labels 
        labelTrain2Y = []
        for i in range(0, len(combinedTrain2Y)):
            bestModel = 0
            bestAbs = abs(combinedTrain2Y[i] - trainPreds[topTags[0]][i])
            for j in range(0, len(topTags)):
                tag = topTags[j]
                modelAbs = abs(combinedTrain2Y[i] - trainPreds[tag][i])
                if modelAbs < bestAbs:
                    bestAbs = modelAbs
                    bestModel = j
            labelTrain2Y.append(bestModel)
            
        # generating testX
        _, testX, _, _ = splitDataForXValidation(location, "location", data, all_features, "target")

        # trainX2             
        tX2 = []
        for location2 in location2s:
            _, trainX2, _, _, _, _ = splitDataForXValidationSampled2(location, location2, "location", data, all_features, "target")
            for row in trainX2:
                tX2.append(row)
        
        for tag in topTags:
            for i in range(0, len(trainPreds[tag])):
                tX2[i].append(trainPreds[tag][i]) 
        
        reducedTrainX2 = []
        for d in tX2:
            reducedD = []
            for i in range(0, len(all_columns)):
                if columns[i]:
                    reducedD.append(d[i])
            reducedTrainX2.append(reducedD)
              
        model = RandomForestClassifier(random_state=42, n_estimators=100, max_depth=15)
        model.fit(reducedTrainX2, labelTrain2Y)
        
        for tag in topTags:
            for i in range(0, len(testPreds[tag])):
                testX[i].append(testPreds[tag][i]) 
        
        reducedTestX = []
        for d in testX:
            reducedD = []
            for i in range(0, len(all_columns)):
                if columns[i]:
                    reducedD.append(d[i])
            reducedTestX.append(reducedD)
         
        pred = model.predict(reducedTestX)
         
        finalPrediction = []
        for i in range(0, len(testY)):
            p = testPreds[topTags[pred[i]]][i]
            finalPrediction.append(p)      
        rmse = str(rmseEval(testY, finalPrediction)[1])
        print("\tRMSE: " + str(rmse))
        
        for x in testY:
            overallY.append(x)
        for x in finalPrediction:
            overallPred.append(x)
    
    rmse = rmseEval(overallPred, overallY)[1]
    return rmse
예제 #2
0
            pred.append(prediction_twa[i])
    return pred


allObservations = []
allPredictions = []
allPredictionsTW = []
allPredictionsTWA = []

for location in locations:
    location2s = [l for l in locations if l != location]

    log("Location: " + str(location) + ", location2: " + str(location2s))

    # tw_4stations
    trainX, testX, trainY, testY = splitDataForXValidation(
        location, "location", data, tw_features, "target")
    allObservations.extend(testY)
    model = create_model()
    model.fit(trainX, trainY)
    predictionTW = model.predict(testX)
    rmse = rmseEval(testY, predictionTW)[1]
    log("\tTW:" + str(rmse))
    allPredictionsTW.extend(predictionTW)

    # tw_4stations
    trainX, testX, trainY, testY = splitDataForXValidation(
        location, "location", data, twa_features, "target")
    model = create_model()
    model.fit(trainX, trainY)
    predictionTWA = model.predict(testX)
    rmse = rmseEval(testY, predictionTWA)[1]
예제 #3
0
    for datagroup in data_groups:
        dgtag, _ = getTagAndFeatures(datagroup)
        if dgtag == tag:
            top10datagroups.append(datagroup)
            break

for location in locations:
    location2 = [l for l in locations if l != location][0]

    print("Location: " + str(location) + ", location2: " + str(location2))

    # generating testPreds
    testPreds = {}
    for datagroup in top10datagroups:
        tag, features = getTagAndFeatures(datagroup)
        trainX, testX, trainY, testY = splitDataForXValidation(
            location, "location", data, features, "target")

        model = RandomForestRegressor(min_samples_leaf=9,
                                      n_estimators=59,
                                      n_jobs=-1,
                                      random_state=42)
        model.fit(trainX, trainY)
        prediction = model.predict(testX)
        testPreds[tag] = prediction

    trainPreds = {}
    t2Y = None
    tY = None

    for datagroup in top10datagroups:
        tag, features = getTagAndFeatures(datagroup)
예제 #4
0
locations = [2.0, 3.0, 4.0, 6.0, 8.0]

data = {}
columns = ['timestamp', 'location']
loadData(DATA_FILE, [], data, columns)

columnsTW = [
    'hour', 'day_of_week', 'month', 'bank_holiday', 'race_day',
    'winddirection', 'windspeed', 'temperature', 'rain', 'pressure'
]

for location in locations:
    print("location: " + str(location))
    # save down trainX, trainY, testX, testY
    trainX, testX, trainY, testY = splitDataForXValidation(
        location, "location", data, columns, "target")
    print("\t#train: " + str(len(trainY)) + ", #test:" + str(len(testY)))
    model = RandomForestRegressor(min_samples_leaf=9,
                                  n_estimators=59,
                                  n_jobs=-1,
                                  random_state=42)
    model.fit(trainX, trainY)
    testPrediction = model.predict(testX)
    testRmse = str(rmseEval(testY, testPrediction)[1])
    print("\tRFR+All rmse: " + str(testRmse))

    trainX, testX, trainY, testY = splitDataForXValidation(
        location, "location", data, columnsTW, "target")
    print("\t#train: " + str(len(trainY)) + ", #test:" + str(len(testY)))
    model = RandomForestRegressor(min_samples_leaf=9,
                                  n_estimators=59,
예제 #5
0
def eval_one(step):

    eval_features = []
    for i in range(0, len(all_features)):
        if step[i]:
            eval_features.append(all_features[i])

    allObservations = []
    allPredictions = []
    allPredictionsTW = []
    allPredictionsTWA = []
    allLabel = []
    allLabelPrediction = []

    for location in locations:
        location2s = [l for l in locations if l != location]

        # tw_4stations
        trainX, testX, trainY, testY = splitDataForXValidation(
            location, "location", data, tw_features, "target")
        allObservations.extend(testY)
        model = create_model()
        model.fit(trainX, trainY)
        predictionTW = model.predict(testX)
        allPredictionsTW.extend(predictionTW)

        # tw_4stations
        trainX, testX, trainY, testY = splitDataForXValidation(
            location, "location", data, twa_features, "target")
        model = create_model()
        model.fit(trainX, trainY)
        predictionTWA = model.predict(testX)
        allPredictionsTWA.extend(predictionTWA)

        #combination
        classifier_X = []
        classifier_Y = []
        for loc in location2s:
            # tw_3stations
            trainX, testX, trainY, testY = splitDataForXValidationForCombination(
                loc, location, "location", data, tw_features, "target")
            model = create_model()
            model.fit(trainX, trainY)
            prediction_3station_TW = model.predict(testX)
            # twa_3stations
            trainX, testX, trainY, testY = splitDataForXValidationForCombination(
                loc, location, "location", data, twa_features, "target")
            model = create_model()
            model.fit(trainX, trainY)
            prediction_3station_TWA = model.predict(testX)

            trainX, testX, trainY, testY = splitDataForXValidationForCombination(
                loc, location, "location", data, eval_features, "target")
            classifier_X.extend(testX)
            label = generate_label(testY, prediction_3station_TW,
                                   prediction_3station_TWA)
            classifier_Y.extend(label)

        model = create_classifier_model()
        model.fit(classifier_X, classifier_Y)
        _, testX, _, testY = splitDataForXValidation(location, "location",
                                                     data, eval_features,
                                                     "target")
        classifier_prediction = model.predict(testX)
        classifier_testLabel = generate_label(testY, predictionTW,
                                              predictionTWA)
        allLabel.extend(classifier_testLabel)
        allLabelPrediction.extend(classifier_prediction)
        combined_prediction = generate_combined_prediction(
            classifier_prediction, predictionTW, predictionTWA)
        allPredictions.extend(combined_prediction)

    rmse = rmseEval(allObservations, allPredictions)[1]
    accuracy = calculate_accuracy(allLabel, allLabelPrediction)

    return rmse, accuracy