Example #1
0
def evaluateRandomForest():
    print("\nEvaluating Random Forest")
    regResults.append(["Results for Random Forest"])
    for data in regDatasets:
        #Import the Dataset and separate X and y
        data_to_test = "regression/" + data + '.csv'
        X_before, y_before = importDataset(data_to_test)
        
        count = 0
        avg_explained_variance_score = 0
        avg_max_error = 0
        avg_mae = 0
        avg_mse = 0
        avg_r2_score = 0
       
        for train, test in kfold.split(X_before):
            print("Test:", count+1, "for", data_to_test)
            X_train, X_test = X_before.iloc[train], X_before.iloc[test]
            y_train, y_true = y_before[train], y_before[test]
            
            #feature scaling
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)
            
            # run algorithm
            from randomForest import randomForest
            rfModel = randomForest(X_train, y_train, X_test, y_true, X_before)
            predictions = rfModel.getPredictions()
            
            # get metrics
            avg_explained_variance_score += metrics.explained_variance_score(y_true, predictions)
            avg_max_error += metrics.max_error(y_true, predictions)
            avg_mae += metrics.mean_absolute_error(y_true, predictions)
            avg_mse += metrics.mean_squared_error(y_true, predictions)
            avg_r2_score += metrics.r2_score(y_true, predictions)
            
            count += 1
            
        avg_explained_variance_score = avg_explained_variance_score / count
        avg_max_error = avg_max_error / count
        avg_mae = avg_mae / count
        avg_mse = avg_mse / count
        avg_r2_score = avg_r2_score / count
        
        regResults.append(['', data_to_test, float(avg_explained_variance_score), float(avg_max_error),
                   float(avg_mae), float(avg_mse), float(avg_r2_score)])
        
        
    
    print("Random Forest evaluation results")
    print("Average explained variance score:", avg_explained_variance_score)
    print("Average mean absolute error:", avg_mae)
    print("Average mean squared error:", avg_mse)
    print("Average r2 score:", avg_r2_score)
Example #2
0
def evaluateANN():
    regResults.append(["Results for ANN"])
    for data in regDatasets:
        #Import the Dataset and separate X and y
        data_to_test = "regression/" + data + '.csv'
        X_before, y_before = importDataset(data_to_test)
        
        count = 0
        avg_explained_variance_score = 0
        avg_max_error = 0
        avg_mae = 0
        avg_mse = 0
        avg_r2_score = 0
       
        for train, test in kfold.split(X_before):
            print("Test:", count+1, " for", data_to_test)
            X_train, X_test = X_before.iloc[train], X_before.iloc[test]
            y_train, y_true = y_before[train], y_before[test]
            
            #feature scaling
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)
            
            # run ANN
            from regressionAnalysis import sequentialNN
            regressor = sequentialNN(X_train, y_train, X_test, y_true)
            exp_variance_score, max_error, loss, mae, mse = regressor.getEvaluationMetrics()
            
            # get metrics
            avg_explained_variance_score += exp_variance_score
            avg_max_error += max_error
            avg_mae += mae
            avg_mse += mse
            avg_r2_score += metrics.r2_score(y_true, regressor.getPredictions())
            
            count += 1
            
        avg_explained_variance_score = avg_explained_variance_score / count
        avg_max_error = avg_max_error / count
        avg_mae = avg_mae / count
        avg_mse = avg_mse / count
        avg_r2_score = avg_r2_score / count
        
        regResults.append(['', data_to_test, float(avg_explained_variance_score), float(avg_max_error),
                   float(avg_mae), float(avg_mse), float(avg_r2_score)])
        
        
    
    print("ANN evaluation results")
    print("Average explained variance score:", avg_explained_variance_score)
    print("Average mean absolute error:", avg_mae)
    print("Average mean squared error:", avg_mse)
    print("Average r2 score:", avg_r2_score)
Example #3
0
def selectDataset():
    print("Select a file to use:")
    print("1 - Regression Original 1437 rows")
    print("2 - Regression Balanced (83% deleted)")
    print("3 - Regression Encoded variables all 1437 rows")
    print("4 - Regression Encoded variables balanced (83% deleted)")
    print("5 - Regression no 365 days")
    print("6 - Regression only 365 days")
    print("7 - Regression only synthetic 3211 rows")
    print("8 - Regression synthetic plus 365 days")
    print("9 - Another dataset")

    number = 0
    acceptedDataset = False
    while acceptedDataset is False:
        number = int(input("Select number to import dataset: "))
        if number > 0 and number < 10:
            acceptedDataset = True
        else:
            print(
                "Invalid number, select a dataset by selecting its number (1 to 9)"
            )

    choice = ""
    if number == 1: choice = "regression/regAll.csv"
    if number == 2: choice = "regression/regBalanced.csv"
    if number == 3: choice = "regression/regEncoded.csv"
    if number == 4: choice = "regression/regEncodedBalanced.csv"
    if number == 5: choice = "regression/regNo365.csv"
    if number == 6: choice = "regression/regOnly365.csv"
    if number == 7: choice = "regression/regSynthetic.csv"
    if number == 8: choice = "regression/regSyntheticWith365.csv"
    if number == 9: choice = input("input full path of dataset: ")

    print("dataset chosen:", choice)
    # Import the Dataset and separate X and y
    X_before, y_before = importDataset(choice)
    # Split the dataset
    X_train, X_test, y_train, y_test = splitAndScale(X_before, y_before)

    return X_before, y_before, X_train, X_test, y_train, y_test, choice
Example #4
0
def findBestMatch():
    print("\nEvaluating different recipients")
    
    X_before, y_before = importDataset('regression/regSyntheticWith365.csv')
    X_train, X_test, y_train, y_true = splitAndScale(X_before, y_before)
    
    # Train models with synthetic dataset
    from regressionAnalysis import sequentialNN
    sequentialNN(X_train, y_train, X_test, y_true)
    ann = tf.keras.models.load_model('models/ann.h5')
    from randomForest import randomForest
    randomForest(X_train, y_train, X_test, y_true, X_before)
    rf = joblib.load('models/rf.sav')
    from svr import svr
    svr(X_train, y_train, X_test, y_true)
    svr = joblib.load('models/svr.sav')
    
    MLmodels = [ann, rf, svr]
    
    for data in recipientDatasets:
        predict_results.append([data])
        print("Predicting for",data)
        dataset = pd.read_csv('datasets/' + data + '.csv')
        to_predict = dataset.iloc[:, :-1].values 
        count = 1
        for row in to_predict:
            transform = scaler.fit_transform(row.reshape(-1, 1))
            prediction = ['','donor'+ str(count)]
            for model in MLmodels:
                new_pred = model.predict(transform.reshape(1, -1))
                if 'Sequential' in str(type(model)):
                    prediction.append(new_pred[0][0])
                else:
                    prediction.append(new_pred[0])
            predict_results.append(prediction)
            count += 1
    print('Predictions saved to file RecipientsPredictions.csv')