Ejemplo n.º 1
0
    #print(cross_validation(5, X_train.to_numpy(), Processor.ToNumpyCol(Y_train), model))

elif ds == "mam":
    path = "./datasets/mam/mam.data"
    header = ["BI-RADS", "age", "shape", "margin", "density", "result"]
    All = Processor.read(path, header)

    [X, Y] = Clean.mam(All)

    [X_train, X_test, Y_train, Y_test] = Processor.split(X, Y, train=0.8)

    model = NaiveBayes()

    print(
        cross_validation(5, X_train.to_numpy(), Processor.ToNumpyCol(Y_train),
                         model))

elif ds == "ttt":
    path = "./datasets/tictactoe/tic-tac-toe.data"
    header = ["tl", "tm", "tr", "ml", "mm", "mr", "bl", "bm", "br", "result"]

    All = Processor.read(path, header)

    [X, Y] = Clean.ttt(All)

    print(X.shape)

    [X_train, X_test, Y_train, Y_test] = Processor.split(X, Y, train=0.8)

    model = NaiveBayes()
    if type(model) == NaiveBayes:
X = X.to_numpy()
Y = Processor.ToNumpyCol(Y)

iters = np.arange(20, X.shape[0], 50)
#print(X.shape)
#print(Y.shape)

accuracies = []

for iter_ in iters:
    #rowsX = X[0:X.shape[0], :]
    #rowsY = Y[0:Y.shape[0], :]
    rowsX = X[0:iter_, :]
    rowsY = Y[0:iter_, :]
    #acc, _, _ = cross_validation(5, rowsX, rowsY, LogisticRegression(), learning_rate=0.1, max_gradient=1e-3, max_iters=iter_)
    acc = cross_validation(5, rowsX, rowsY, NaiveBayes())
    accuracies.append(acc)

"""path = "../datasets/ionosphere/ionosphere.data"
header = ["{}{}".format("col", x) for x in range(33 + 1)]
header.append("signal")
All = Processor.read(path, header)
[X, Y] = Clean.Ionosphere(All)

path = "../datasets/adult/adult.data"

header = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation',
          'relationship',
          'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'salary']

All = Processor.read(path, header)
Ejemplo n.º 3
0
from Project1.src.HPTuning import df_to_table
import pandas as pd

# Find accuracies for ionosphere data set
print("Analyzing the ionosphere data set")
path = "../datasets/ionosphere/ionosphere.data"
header = ["{}{}".format("col", x) for x in range(33 + 1)]
header.append("signal")
All = Processor.read(path, header)
[X, Y] = Clean.Ionosphere(All)

ionosphere_results = ['ionosphere']
acc, _, _ = cross_validation(5,
                             X.to_numpy(),
                             Processor.ToNumpyCol(Y),
                             LogisticRegression(),
                             learning_rate=1.0,
                             max_gradient=1e-2,
                             max_iters=50000)
ionosphere_results.append(round(acc, 2))
acc = cross_validation(5, X.to_numpy(), Processor.ToNumpyCol(Y), NaiveBayes())
ionosphere_results.append(round(acc, 2))
print(ionosphere_results)

# Find accuracies for adult data set
print("Analyzing the adult data set")
path = "../datasets/adult/adult.data"
header = [
    'age', 'workclass', 'fnlwgt', 'education', 'education-num',
    'marital-status', 'occupation', 'relationship', 'race', 'sex',
    'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
if dataset == 'ionosphere':

    header = ["{}{}".format("col", x) for x in range(33 + 1)]
    header.append("signal")

    All = Processor.read(path, header)

    [X, Y] = Clean.Ionosphere(All)

    [X_train, X_test, Y_train, Y_test] = Processor.split(X, Y, train=0.8)

    results = []

    for rate in learning_rates:
        r = cross_validation(5, X_train.to_numpy(), Processor.ToNumpyCol(Y_train), LogisticRegression(),
                             learning_rate=rate, max_gradient=1e-2, max_iters=50000, random=False)
        r.insert(0, rate)
        results.append(r)

    df = pd.DataFrame(results, columns=['learning rate', 'accuracy', 'last gradient', 'iterations'])
    df_to_table(df, 'ionosphere_table')

elif dataset == 'adult':

    header = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation',
              'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
              'salary']

    All = Processor.read(path, header)

    [X, Y] = Clean.adult(All)
Ejemplo n.º 5
0
    header = ["{}{}".format("col", x) for x in range(33 + 1)]
    header.append("signal")

    All = Processor.read(path, header)

    [X, Y] = Clean.Ionosphere(All)

    [X_train, X_test, Y_train, Y_test] = Processor.split(X, Y, train=0.8)

    model = LogisticRegression()

    print(
        cross_validation(5,
                         X_train.to_numpy(),
                         Processor.ToNumpyCol(Y_train),
                         model,
                         learning_rate=0.2))

elif ds == "mam":
    path = "./datasets/mam/mam.data"
    header = ["BI-RADS", "age", "shape", "margin", "density", "result"]
    All = Processor.read(path, header)

    [X, Y] = Clean.mam(All)

    [X_train, X_test, Y_train, Y_test] = Processor.split(X, Y, train=0.8)

    model = LogisticRegression()

    print(