Example #1
0
def validation(path,
               n,
               classifier="RF",
               m=3,
               k=10,
               N=5,
               hot=False,
               dataset="Data"):

    # parse input
    im = Importer(path)

    im.parse_data(hot=hot)

    # build tree
    #tree = DecisionTree(im)
    #tree.build_tree()
    #print(tree.tree_to_json())
    df = im.get_frame()

    if n >= 2:
        pass
    elif n == 0 or n == 0:
        print("No Cross validation")
        return
    elif n == -1:
        print("all but one")
        n = df.shape[0]

    kf = KFold(n_splits=n, shuffle=True)

    labels = im.get_values()

    open("results.txt", "w")

    conf_list = []
    for trainidx, testidx in kf.split(df):
        train = df.iloc[trainidx]
        test = df.iloc[testidx]

        if (classifier == "C45"):
            tree = DecisionTree.C45(train,
                                    im.get_fields().copy(), im.get_variable(),
                                    THRESH)
            conf = get_confusionC45(tree, test, im.get_variable(), labels)
            conf_list += [conf]

        elif (classifier == "KNN"):
            conf = get_confusionKNN(df, test, im.get_variable(), labels, k)
            conf_list += [conf]

        elif (classifier == "RF"):
            trees = randomForest.randomForest(df, test, im.get_variable(), m,
                                              k, N)
            conf = get_confusionRF(trees, test, im.get_variable(), labels, m,
                                   k, N)
            conf_list += [conf]

    print_output(conf_list, labels, n)
Example #2
0
def evaluateRandomForest():
    print("\nEvaluating Random Forest")
    regResults.append(["Results for Random Forest"])
    for data in regDatasets:
        #Import the Dataset and separate X and y
        data_to_test = "regression/" + data + '.csv'
        X_before, y_before = importDataset(data_to_test)
        
        count = 0
        avg_explained_variance_score = 0
        avg_max_error = 0
        avg_mae = 0
        avg_mse = 0
        avg_r2_score = 0
       
        for train, test in kfold.split(X_before):
            print("Test:", count+1, "for", data_to_test)
            X_train, X_test = X_before.iloc[train], X_before.iloc[test]
            y_train, y_true = y_before[train], y_before[test]
            
            #feature scaling
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)
            
            # run algorithm
            from randomForest import randomForest
            rfModel = randomForest(X_train, y_train, X_test, y_true, X_before)
            predictions = rfModel.getPredictions()
            
            # get metrics
            avg_explained_variance_score += metrics.explained_variance_score(y_true, predictions)
            avg_max_error += metrics.max_error(y_true, predictions)
            avg_mae += metrics.mean_absolute_error(y_true, predictions)
            avg_mse += metrics.mean_squared_error(y_true, predictions)
            avg_r2_score += metrics.r2_score(y_true, predictions)
            
            count += 1
            
        avg_explained_variance_score = avg_explained_variance_score / count
        avg_max_error = avg_max_error / count
        avg_mae = avg_mae / count
        avg_mse = avg_mse / count
        avg_r2_score = avg_r2_score / count
        
        regResults.append(['', data_to_test, float(avg_explained_variance_score), float(avg_max_error),
                   float(avg_mae), float(avg_mse), float(avg_r2_score)])
        
        
    
    print("Random Forest evaluation results")
    print("Average explained variance score:", avg_explained_variance_score)
    print("Average mean absolute error:", avg_mae)
    print("Average mean squared error:", avg_mse)
    print("Average r2 score:", avg_r2_score)
Example #3
0
def findBestMatch():
    print("\nEvaluating different recipients")
    
    X_before, y_before = importDataset('regression/regSyntheticWith365.csv')
    X_train, X_test, y_train, y_true = splitAndScale(X_before, y_before)
    
    # Train models with synthetic dataset
    from regressionAnalysis import sequentialNN
    sequentialNN(X_train, y_train, X_test, y_true)
    ann = tf.keras.models.load_model('models/ann.h5')
    from randomForest import randomForest
    randomForest(X_train, y_train, X_test, y_true, X_before)
    rf = joblib.load('models/rf.sav')
    from svr import svr
    svr(X_train, y_train, X_test, y_true)
    svr = joblib.load('models/svr.sav')
    
    MLmodels = [ann, rf, svr]
    
    for data in recipientDatasets:
        predict_results.append([data])
        print("Predicting for",data)
        dataset = pd.read_csv('datasets/' + data + '.csv')
        to_predict = dataset.iloc[:, :-1].values 
        count = 1
        for row in to_predict:
            transform = scaler.fit_transform(row.reshape(-1, 1))
            prediction = ['','donor'+ str(count)]
            for model in MLmodels:
                new_pred = model.predict(transform.reshape(1, -1))
                if 'Sequential' in str(type(model)):
                    prediction.append(new_pred[0][0])
                else:
                    prediction.append(new_pred[0])
            predict_results.append(prediction)
            count += 1
    print('Predictions saved to file RecipientsPredictions.csv')
def randomForest(X_train, y_train, X_test, y_test, X_before):
    from randomForest import randomForest
    # RandomForestRegressor
    rfModel = randomForest(X_train, y_train, X_test, y_test, X_before)
    randomForest.plotRandomForest(y_test, rfModel.predictions)
    print("\nMean absolute error of predictions:", int(rfModel.getMAE()), "days")
    # Get top 15 instances
    print("\n-- Variable Importances --")
    importances = rfModel.getImportance()
    # Plot graph
    
#    randomForest.makeTree(rfModel)
    # Grid Search
#    rfModel.gridSearch()
    return rfModel.getMAE(), importances
from evaluateClustering import evaluateClustering
from vizualizeData import vizualizeData

#loading pre-processed data
X_train, X_test, yClass_train, yClass_test, yReg_train, yReg_test = processData(
)

yClass_lr = logisticRegression(X_train, X_test, yClass_train)
lrAc = accuracy_score(yClass_test, yClass_lr)
X_lr = X_test

#reloading data pre-processed with different parameters
X_train, X_test, yClass_train, yClass_test, yReg_train, yReg_test = processData(
    normalization='mms')

yClass_rf = randomForest(X_train, X_test, yClass_train)
rfAc = accuracy_score(yClass_test, yClass_rf)
X_rf = X_test

yClass_knn = kNN(X_train, X_test, yClass_train)
knnAc = accuracy_score(yClass_test, yClass_knn)
X_knn = X_test

X_train, X_test, yClass_train, yClass_test, yReg_train, yReg_test = processData(
)

yReg_lr = linearRegression(X_train, X_test, yReg_train, 0)
lrSc = r2_score(yReg_test, yReg_lr)
X_linReg = X_test

yReg_nr = kNNRegression(X_train, X_test, yReg_train, 0)
Example #6
0
score_knn_df = pd.DataFrame(
    score_knn_df,
    columns=['precision0', 'precision1', 'recall0', 'recall1', 'accuracy'])

fig, ax = plt.subplots(figsize=(8, 5))
ax = sns.heatmap(score_knn_df)

#Randon Forest Classifier
score_rf = {}
corr_numeric = train_data[list(numerical_var)].corr()
for n in [5, 10, 50, 100, 200]:
    for d in [None, 2, 5, 10]:
        for l in [1, 3, 5, 10]:
            regr = randomForest.randomForest(train_data,
                                             corr_numeric,
                                             min_corr=0.0005,
                                             estimators=n,
                                             depth=d,
                                             leaf=l)
            score_rf[n, d, l] = regr.classifier()

score_rf_df = pd.DataFrame(list(score_rf.values()),
                           index=list(score_rf.keys()),
                           columns=['precision', 'recall', 'accuracy'])
score_rf_df[['precision0',
             'precision1']] = score_rf_df['precision'].apply(pd.Series)
score_rf_df[['recall0', 'recall1']] = score_rf_df['recall'].apply(pd.Series)
score_rf_df = pd.DataFrame(
    score_rf_df,
    columns=['precision0', 'precision1', 'recall0', 'recall1', 'accuracy'])

fig = plt.figure(figsize=(8, 5))
def estimateParametersRealImage(trainingParameters, trainingReflectances,
                                shape, image, trainsegmentation,
                                testsegmentation, activateDA):

    sourceReflectancesDA = image[np.nonzero(trainsegmentation)[0], :]
    # choose m reflectances for training DA
    m = trainingReflectances.shape[0]
    sourceReflectancesDA = np.matrix(random.sample(sourceReflectancesDA, m))

    #%% 2. determine domain adaptation weights

    trainingWeights = np.ones(trainingReflectances.shape[0])

    if (activateDA):
        trainingWeights = calculateWeights(trainingReflectances,
                                           sourceReflectancesDA)

    #%% 3. train forest

    rf = randomForest(trainingParameters, trainingReflectances,
                      trainingWeights)

    #%% 4. estimate the parameters for the image

    print "starting to estimate the tissue parameters"
    start = time.time()

    estimatedParameters = rf.predict(image)

    # set to zero if not in segmentation mask
    #estimatedParameters[np.where(0 == testsegmentation), :] = 0

    end = time.time()
    print "time necessary to estimate parameters for image [s]: " + str(
        (end - start))

    #%% save the parametric images TODO delete after everything works
    #    import Image
    #
    #    for i in np.arange(0,estimatedParameters.shape[1]):
    #        parameterImage_i = np.reshape(estimatedParameters[:,i], shape)
    #        im = Image.fromarray(parameterImage_i)
    #        im.save("data/output/" + "parameterImage_" + str(i) + ".tiff")

    #%% 6. evaluate data

    # for this, create monte carlo simulation for each
    # parameter estimate. The resulted reflectance estimate can then be compared to
    # the measured reflectance.

    from setup import systemPaths
    from setup import simulation

    import helper.monteCarloHelper as mch

    infileString, outfolderMC, outfolderRS, gpumcmlDirectory, gpumcmlExecutable = systemPaths.initPaths(
    )
    infile = open(infileString)

    BVFs, Vss, ds, SaO2s, rs, nrSamples, photons, wavelengths, FWHM, eHbO2, eHb, nrSimulations = simulation.noisy(
    )

    # the estimated parameters within the segmentation
    estimatedParametersOnlySegmented = estimatedParameters[
        np.nonzero(testsegmentation)[0], :]
    # the image reflectances from which these parameters where estimated
    inputReflectancesOnlySegmented = image[np.nonzero(testsegmentation)[0], :]

    # index vector for selecting n samples from this data
    indices = np.arange(0, estimatedParametersOnlySegmented.shape[0], 1)
    # choose n
    n = 20
    nSamples = random.sample(indices, n)
    estimatedParametersOnlySegmented = estimatedParametersOnlySegmented[
        nSamples]
    inputReflectancesOnlySegmented = inputReflectancesOnlySegmented[nSamples]

    # placeholder for the reflectance computed from MC with the estimated parameters
    reflectancesFromEstimatedParameters = np.zeros(
        (inputReflectancesOnlySegmented.shape[0],
         inputReflectancesOnlySegmented.shape[1] + 1))

    #wavelengths = np.delete(wavelengths, [2, 7])

    for i, (BVF, Vs, d) in enumerate(estimatedParametersOnlySegmented):

        print('starting simulation ' + str(i) + ' of ' +
              str(estimatedParametersOnlySegmented.shape[0]))

        for j, wavelength in enumerate(wavelengths):

            reflectanceValue = mch.runOneSimulation(
                wavelength,
                eHbO2,
                eHb,
                infile,
                outfolderMC,
                gpumcmlDirectory,
                gpumcmlExecutable,
                BVF,
                Vs,
                d,
                # np.mean(rs), SaO2,
                # submucosa_BVF=sm_BVF, submucosa_Vs=sm_Vs, submucosa_SaO2=SaO2,
                Fwhm=FWHM,
                nrPhotons=photons)

            #print((BVF, Vs, d, wavelength))
            reflectancesFromEstimatedParameters[i, j] = reflectanceValue

    # correct these reflectances by image quotient
    reflectancesFromEstimatedParameters = mch.normalizeImageQuotient(
        reflectancesFromEstimatedParameters)

    wavelengths = mch.removeIqWavelength(wavelengths)
    #%% plot data for nicer inspection

    from sklearn.metrics import r2_score

    r2Score = r2_score(reflectancesFromEstimatedParameters.T,
                       inputReflectancesOnlySegmented.T)

    print("r2Score for random forest estimatation of:", str(r2Score))

    #%% sort by wavelength:

    for plot_i in range(n):

        sortedIndices = sorted(range(len(wavelengths)),
                               key=lambda k: wavelengths[k])

        plt.figure()
        plt.plot(wavelengths[sortedIndices],
                 reflectancesFromEstimatedParameters[plot_i,
                                                     sortedIndices], 'g-o')
        plt.plot(wavelengths[sortedIndices],
                 inputReflectancesOnlySegmented[plot_i, sortedIndices], 'b-o')
        print(
            str(
                r2_score(reflectancesFromEstimatedParameters[plot_i, :],
                         inputReflectancesOnlySegmented[plot_i, :])))
        plt.legend(["estimated", "measurement"])
        plt.xlabel("wavelength [m]")
        plt.ylabel("normalized reflectance")
        plt.savefig("data/output/example_fit_" + str(plot_i) + '.png')

    return estimatedParameters, r2Score, reflectancesFromEstimatedParameters, inputReflectancesOnlySegmented
Example #8
0
def estimateParametersRealImage(trainingParameters, trainingReflectances, shape, image, trainsegmentation, testsegmentation, activateDA):

    sourceReflectancesDA = image[np.nonzero(trainsegmentation)[0], :]
    # choose m reflectances for training DA
    m = trainingReflectances.shape[0]
    sourceReflectancesDA = np.matrix(random.sample(sourceReflectancesDA, m))

    #%% 2. determine domain adaptation weights

    trainingWeights = np.ones(trainingReflectances.shape[0])

    if (activateDA):
        trainingWeights = calculateWeights(trainingReflectances, sourceReflectancesDA)

    #%% 3. train forest

    rf = randomForest(trainingParameters, trainingReflectances, trainingWeights)

    #%% 4. estimate the parameters for the image

    print "starting to estimate the tissue parameters"
    start = time.time()

    estimatedParameters = rf.predict(image)

    # set to zero if not in segmentation mask
    #estimatedParameters[np.where(0 == testsegmentation), :] = 0

    end = time.time()
    print "time necessary to estimate parameters for image [s]: " + str((end - start))


    #%% save the parametric images TODO delete after everything works
#    import Image
#
#    for i in np.arange(0,estimatedParameters.shape[1]):
#        parameterImage_i = np.reshape(estimatedParameters[:,i], shape)
#        im = Image.fromarray(parameterImage_i)
#        im.save("data/output/" + "parameterImage_" + str(i) + ".tiff")


    #%% 6. evaluate data

    # for this, create monte carlo simulation for each
    # parameter estimate. The resulted reflectance estimate can then be compared to
    # the measured reflectance.

    from setup import systemPaths
    from setup import simulation

    import helper.monteCarloHelper as mch



    infileString, outfolderMC, outfolderRS, gpumcmlDirectory, gpumcmlExecutable = systemPaths.initPaths()
    infile = open(infileString)

    BVFs, Vss, ds, SaO2s, rs, nrSamples, photons, wavelengths, FWHM, eHbO2, eHb, nrSimulations = simulation.noisy()

    # the estimated parameters within the segmentation
    estimatedParametersOnlySegmented    = estimatedParameters[np.nonzero(testsegmentation)[0], :]
    # the image reflectances from which these parameters where estimated
    inputReflectancesOnlySegmented      = image[np.nonzero(testsegmentation)[0], :]

    # index vector for selecting n samples from this data
    indices = np.arange(0, estimatedParametersOnlySegmented.shape[0], 1)
    # choose n
    n = 20
    nSamples = random.sample(indices, n)
    estimatedParametersOnlySegmented = estimatedParametersOnlySegmented[nSamples]
    inputReflectancesOnlySegmented   = inputReflectancesOnlySegmented[nSamples]

    # placeholder for the reflectance computed from MC with the estimated parameters
    reflectancesFromEstimatedParameters = np.zeros((inputReflectancesOnlySegmented.shape[0], inputReflectancesOnlySegmented.shape[1]+1))

    #wavelengths = np.delete(wavelengths, [2, 7])

    for i, (BVF, Vs, d) in enumerate(estimatedParametersOnlySegmented):


        print('starting simulation ' + str(i) + ' of ' + str(estimatedParametersOnlySegmented.shape[0]))

        for j, wavelength in enumerate(wavelengths):

            reflectanceValue = mch.runOneSimulation(
                wavelength, eHbO2, eHb,
                infile, outfolderMC, gpumcmlDirectory, gpumcmlExecutable,
                BVF, Vs, d,
                # np.mean(rs), SaO2,
                # submucosa_BVF=sm_BVF, submucosa_Vs=sm_Vs, submucosa_SaO2=SaO2,
                Fwhm = FWHM, nrPhotons=photons)


            #print((BVF, Vs, d, wavelength))
            reflectancesFromEstimatedParameters[i, j] = reflectanceValue


    # correct these reflectances by image quotient
    reflectancesFromEstimatedParameters = mch.normalizeImageQuotient(reflectancesFromEstimatedParameters)

    wavelengths = mch.removeIqWavelength(wavelengths)
    #%% plot data for nicer inspection

    from sklearn.metrics      import r2_score

    r2Score = r2_score(reflectancesFromEstimatedParameters.T, inputReflectancesOnlySegmented.T)


    print("r2Score for random forest estimatation of:", str(r2Score))


    #%% sort by wavelength:

    for plot_i in range(n):

        sortedIndices = sorted(range(len(wavelengths)), key=lambda k: wavelengths[k])

        plt.figure()
        plt.plot(wavelengths[sortedIndices], reflectancesFromEstimatedParameters[plot_i,sortedIndices], 'g-o')
        plt.plot(wavelengths[sortedIndices], inputReflectancesOnlySegmented[plot_i,sortedIndices], 'b-o')
        print(str(r2_score(reflectancesFromEstimatedParameters[plot_i, :], inputReflectancesOnlySegmented[plot_i, :])))
        plt.legend(["estimated", "measurement"])
        plt.xlabel("wavelength [m]")
        plt.ylabel("normalized reflectance")
        plt.savefig("data/output/example_fit_" + str(plot_i) + '.png')

    return estimatedParameters, r2Score, reflectancesFromEstimatedParameters, inputReflectancesOnlySegmented
Example #9
0
## Menú
option = input(
    "###      Banknote Authentication     ###\n" +
    " Ingrese por favor el modelo a entrenar, el sistema retornará\n" +
    " un análisis completo del resultado: \n" + " 1: k-vecinos\n" +
    " 2: Random Forest\n" + " 3: Suport Vector Machines\n" +
    " 4: Red Neuronal\n" + " 5: Funciones discriminantes gaussianas\n" +
    " 6: Selección Secuencial\n" + " 7: Indice de Fisher\n" +
    " 8: Análisis de componentes principales\n" +
    " 9: Analisis de correlacion\n-> ")

if (option == "1"):
    model = kv.kVecinos(X, y)
elif (option == "2"):
    model = rf.randomForest(X, y)
elif (option == "3"):
    model = svm.suportVectorMachine(X, y)
elif (option == "4"):
    model = rn.neuralNetwork(X, y)
elif (option == "5"):
    model = dg.fdg(X, y)
elif (option == "6"):
    seleccionSecuencial(X, y)
elif (option == "7"):
    fisher(X, y)
elif (option == "8"):
    pca(X, y, 3)
elif (option == "9"):
    corr(X, y)
else:
Example #10
0
from baggedDecisionTree import baggedDecisionTree
baggedDecisionTree(
    nEstimators = 500,
    X_train     = X_train,
    y_train     = y_train,
    X_test      = X_test,
    y_test      = y_test
    )

### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
from randomForest import randomForest
randomForest(
    nEstimators  = 500,
    maxLeafNodes = 16,
    irisData     = irisData,
    X_train      = X_train,
    y_train      = y_train,
    X_test       = X_test,
    y_test       = y_test
    )

### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
from adaBoost import adaBoost 
adaBoost(
    nEstimators  = 200,
    learningRate = 0.5,
    X_train      = X_train,
    y_train      = y_train,
    X_test       = X_test,
    y_test       = y_test
    )
    def changeDepth(self, depths):

        # Get data as array
        train = getLines(self.trainData, 100)

        cv = []
        train = np.array(train)
        np.random.shuffle(train)

        for i in range(10):
            cv.append(train[i * 200:(i + 1) * 200])

        zoltempdt = [0 for xtemp in range(10)]
        zoltemprf = [0 for xtemp in range(10)]
        zoltempbag = [0 for xtemp in range(10)]
        zoltempsvm = [0 for xtemp in range(10)]

        w = 1000
        it = 10

        avgzoldt = [0 for xtemp in range(len(depths))]
        avgzolrf = [0 for xtemp in range(len(depths))]
        avgzolbag = [0 for xtemp in range(len(depths))]
        avgzolsvm = [0 for xtemp in range(len(depths))]

        stddevzoldt = [0 for xtemp in range(len(depths))]
        stddevzolrf = [0 for xtemp in range(len(depths))]
        stddevzolbag = [0 for xtemp in range(len(depths))]
        stddevzolsvm = [0 for xtemp in range(len(depths))]

        stderrzoldt = [0 for xtemp in range(len(depths))]
        stderrzolrf = [0 for xtemp in range(len(depths))]
        stderrzolbag = [0 for xtemp in range(len(depths))]
        stderrzolsvm = [0 for xtemp in range(len(depths))]

        testnew = []
        trainnew = []

        for r in range(len(depths)):

            for i in range(it):
                trainnew = []
                testnew = cv[i]

                for j in range(it):
                    if j != i:
                        for k in range(200):
                            trainnew.append(cv[j][k])

                temptrain = trainnew
                trainDataset = getTrainData(temptrain, 0.25)

                rid_train, x_train, y_train = splitColumns(trainDataset)
                rid_test, x_test, y_test = splitColumns(testnew)

                # Pre-processing data
                x_train = preprocess(x_train)
                x_test = preprocess(x_test)

                # Creating dictionary from x_train
                words, wordList = getWordList(x_train)

                # Removing most frequent 100 words
                for _ in range(100):
                    words.pop(0)

                wordList = [x for x, _ in words]

                # Forming feature vector, calculating Conditional probabilities, applying bag
                trainfv, trainfv0, trainfv1 = featureVector(
                    wordList[:w], x_train, y_train)
                testfv, testfv0, testfv1 = featureVector(
                    wordList[:w], x_test, y_test)

                zoltempdt[i] = decisionTree(trainfv, testfv, depths[r])
                zoltemprf[i] = randomForest(trainfv, testfv, depths[r])
                zoltempbag[i] = bagging(trainfv, testfv, depths[r])
#                    zoltempsvm[i] = svm(trainfv,testfv)

            avgzoldt[r] = np.mean(zoltempdt)
            avgzolrf[r] = np.mean(zoltemprf)
            avgzolbag[r] = np.mean(zoltempbag)
            avgzolsvm[r] = np.mean(zoltempsvm)

            stddevzoldt[r] = np.std(zoltempdt)
            stddevzolrf[r] = np.std(zoltemprf)
            stddevzolbag[r] = np.std(zoltempbag)
            stddevzolsvm[r] = np.std(zoltempsvm)

            stderrzoldt[r] = stddevzoldt[r] / math.sqrt(it)
            stderrzolrf[r] = stddevzolrf[r] / math.sqrt(it)
            stderrzolbag[r] = stddevzolbag[r] / math.sqrt(it)
            stderrzolsvm[r] = stddevzolsvm[r] / math.sqrt(it)

        print avgzoldt
        print avgzolbag
        print avgzolrf
        #            print avgzolsvm

        print stddevzoldt
        print stddevzolbag
        print stddevzolrf
        #            print stddevzolsvm

        print stderrzoldt
        print stderrzolbag
        print stderrzolrf
        #            print stderrzolsvm

        f = open(self.file, "a+")
        f.write("\n AVERAGE ZERO ONE LOSS")
        f.write("\n 1. Decision Tree")
        f.write(str(avgzoldt))
        f.write("\n 2. Bagging")
        f.write(str(avgzolbag))
        f.write("\n 3. Random forest")
        f.write(str(avgzolrf))
        #            f.write("\n 4. SVM")
        #            f.write(str(avgzolsvm))

        f.write("\n STANDARD DEVIATION ZERO ONE LOSS")
        f.write("\n 1. Decision Tree")
        f.write(str(stddevzoldt))
        f.write("\n 2. Bagging")
        f.write(str(stddevzolbag))
        f.write("\n 3. Random forest")
        f.write(str(stddevzolrf))
        #            f.write("\n 4. SVM")
        #            f.write(str(stddevzolsvm))

        f.write("\n STANDARD ERROR ZERO ONE LOSS")
        f.write("\n 1. Decision Tree")
        f.write(str(stderrzoldt))
        f.write("\n 2. Bagging")
        f.write(str(stderrzolbag))
        f.write("\n 3. Random forest")
        f.write(str(stderrzolrf))
        #            f.write("\n 4. SVM")
        #            f.write(str(stderrzolsvm))
        f.close()
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

from randomForest import randomForest


def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy


data = datasets.load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1234)

clf = randomForest(n_trees=3, max_depth=10)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
acc = accuracy(y_test, y_pred)

print("Accuracy:", acc)
Example #13
0
import extractKeyStroke as extract
import prepareInputTarget as prepare
import neuralnet as nn
import randomForest as rf

my_input, target = prepare.prepareInputTarget()

rf.randomForest(my_input)

#hiddenLayerSize = 100
#net = nn.neuralnet(my_input, target, hiddenLayerSize)

Example #14
0
from setup import data
from randomForest import randomForest

#%% load data
# the folder with the reflectance spectra
dataFolder = 'data/output/'

trainingParameters, trainingReflectances, testParameters, testReflectances = \
    data.noisy(dataFolder)

trainingWeights = np.ones((trainingParameters.shape[0], ))

#%% train forest

rf = randomForest(trainingParameters, trainingReflectances, trainingWeights)

#%% test

#with open("iris.dot", 'w') as f:
#    f = tree.export_graphviz(rf, out_file=f)

# predict test reflectances and get absolute errors.

absErrors = np.abs(rf.predict(testReflectances) - testParameters)

r2Score = rf.score(testReflectances, testParameters)

#import matplotlib.pyplot as plt

print("absolute error distribution BVF, Volume fraction")
Example #15
0
from randomForest import randomForest


#%% load data
# the folder with the reflectance spectra
dataFolder = 'data/output/'

trainingParameters, trainingReflectances, testParameters, testReflectances = \
    data.noisy(dataFolder)


trainingWeights = np.ones((trainingParameters.shape[0],))

#%% train forest

rf = randomForest(trainingParameters, trainingReflectances, trainingWeights)

#%% test

#with open("iris.dot", 'w') as f:
#    f = tree.export_graphviz(rf, out_file=f)

# predict test reflectances and get absolute errors.

absErrors = np.abs(rf.predict(testReflectances) - testParameters)

r2Score = rf.score(testReflectances, testParameters)

#import matplotlib.pyplot as plt

print("absolute error distribution BVF, Volume fraction")