Exemple #1
0
def cros_validate(number_lambd,X_train,Y_train,X_test,Y_test,X_shuffle_train,Y_shuffle_train):
    lambda_vals = np.logspace(-4, -1, number_lambd)
    test_error = np.zeros(number_lambd)
    train_error = np.zeros(number_lambd)

    for j, lambd in enumerate(lambda_vals):
        beta, v = clss.svm(X_train, Y_train, lambd)
        test_error[j] = clss.svm_test(X_test, Y_test, beta, v)
        train_error[j] = clss.svm_test(X_train, Y_train, beta, v)

    shuffle_test_error = np.zeros(number_lambd)
    shuffle_train_error = np.zeros(number_lambd)

    for j, lambd in enumerate(lambda_vals):
        beta2, v2 = clss.svm(X_shuffle_train, Y_shuffle_train, lambd)
        shuffle_test_error[j] = clss.svm_test(X_test, Y_test, beta2, v2)
        shuffle_train_error[j] = clss.svm_test(X_shuffle_train, Y_shuffle_train, beta2, v2)
    return test_error, shuffle_test_error
def cros_validate(number_lambd, X_train, Y_train, X_test, Y_test,
                  X_shuffle_train, Y_shuffle_train):
    lambda_vals = np.logspace(-4, -1, number_lambd)
    test_error = np.zeros(number_lambd)
    train_error = np.zeros(number_lambd)

    for j, lambd in enumerate(lambda_vals):
        beta, v = clss.svm(X_train, Y_train, lambd)
        test_error[j] = clss.svm_test(X_test, Y_test, beta, v)
        train_error[j] = clss.svm_test(X_train, Y_train, beta, v)

    shuffle_test_error = np.zeros(number_lambd)
    shuffle_train_error = np.zeros(number_lambd)

    for j, lambd in enumerate(lambda_vals):
        beta2, v2 = clss.svm(X_shuffle_train, Y_shuffle_train, lambd)
        shuffle_test_error[j] = clss.svm_test(X_test, Y_test, beta2, v2)
        shuffle_train_error[j] = clss.svm_test(X_shuffle_train,
                                               Y_shuffle_train, beta2, v2)
    return test_error, shuffle_test_error
Exemple #3
0
def test_svm(df):
    # Tabela de resultados do experimento
    table = {}
    x = df[['preg', 'plas', 'pres', 'skin', 'insu', 'mass', 'pedi', 'age']]
    y = df['class'].transform(lambda k: 1 if bool(k) else -1)
    for t in range(60, 91, 3):
        table[t] = []
        for i in range(1, 41):
            print(f"Executing: treino={t}% iteração={i} ...")
            xtrain, xtest, ytrain, ytest = train_test_split(
                x,
                y,
                test_size=((100 - t) / 100),
                random_state=None,
                stratify=y)
            results = svm(xtrain, ytrain, xtest, ytest)
            print(f"Acurácia: {results.accurracy()}")
            table[t].append(results.accurracy())
        table[t] = {
            'min': np.min(table[t]),
            'mean': np.mean(table[t]),
            'max': np.max(table[t])
        }
    return table
import numpy as np
from classifier import svm, svm_2, svm_test
lambd = .01
X = np.random.rand(4,3)
Y = np.array([0,1,2,3])
    #np.array([[1,1],[0,0],[2,3]])
#Y = np.array([1,0,2])
beta, v = svm(X,Y,lambd)

#Y2 = np.array([1,-1])
#beta2, v2 = svm_2(X,Y2,lambd)


test_error = svm_test(X,Y,beta,v)
print(test_error)
Exemple #5
0
import numpy as np
from classifier import svm, svm_2, svm_test
lambd = .01
X = np.random.rand(4, 3)
Y = np.array([0, 1, 2, 3])
#np.array([[1,1],[0,0],[2,3]])
#Y = np.array([1,0,2])
beta, v = svm(X, Y, lambd)

#Y2 = np.array([1,-1])
#beta2, v2 = svm_2(X,Y2,lambd)

test_error = svm_test(X, Y, beta, v)
print(test_error)
Exemple #6
0
    svm = config["svm_linear"]
elif svm_kernel == "rbf":
    svm = config["svm_rbf"]
else:
    print("Invalid kernel for svm")
    exit()

print(svm)
model = CNNModel(signet, svm["model_path"])
images_dictionary = {}

list_of_signatures_use_on_train = []
list_of_signatures_use_on_test = []

weights = {1: config["c-plus"], 0: svm["c-minus"]}
svc_linear = classifier.svm(gamma='auto', weights=weights, kernel="linear")
print(svc_linear)
svc_rbf = classifier.svm(gamma=2**(-11), weights=weights, kernel="rbf")
print(svc_rbf)
mlp = classifier.mlp(0.0001, (100, 500))
print(mlp)
knn = classifier.knn(3, "uniform")
print(knn)
tree = classifier.tree(weights, "log2", "gini", 0.0000001)
print(tree)

random_users = get_signature_folders(config["dataset_for_random_path"])
print("Loading list for random users to train")

train_config = config["train_config"]
Exemple #7
0
def DAE_trainer(learning_rate=1e-3,
                batch_size=100,
                num_epoch=10,
                hidden_layers=[7, 4, 2],
                input_dim=0,
                step=20,
                X_train=[],
                X_test=[],
                Y_train=[],
                Y_test=[],
                dt=[],
                noise_factor=0.25):
    model1 = DAE(learning_rate=learning_rate,
                 batch_size=batch_size,
                 hidden_layers=hidden_layers,
                 input_dim=input_dim,
                 noise_factor=noise_factor)
    for epoch in range(num_epoch):
        num_sample = len(X_train)
        for iter in range(num_sample // batch_size):
            X_mb, _ = dt.train.next_batch(batch_size)
            # Execute the forward and the backward pass and report computed losses
            recon_loss = model1.run_single_step(X_mb)

        if epoch % step == 0:
            chartcolumn.take_display_traning('Epoch ' + str(epoch) +
                                             ' Recon loss: ' + str(recon_loss))
            # model.writer.add_summary(summary, epoch )

            z_train = model1.transformer(X_train)
            s = time.time()
            z_test = model1.transformer(X_test)
            e = time.time()
            t_tr = (e - s) / float(len(X_test))

            auc_svm, t1 = classifier.svm(z_train, Y_train, z_test, Y_test)
            auc_dt, t2 = classifier.decisiontree(z_train, Y_train, z_test,
                                                 Y_test)
            auc_rf, t3 = classifier.rf(z_train, Y_train, z_test, Y_test)
            auc_nb, t4 = classifier.naive_baves(z_train, Y_train, z_test,
                                                Y_test)
            auc_kn, t5 = classifier.KNeighbors(z_train, Y_train, z_test,
                                               Y_test)
            auc_logistic, t6 = classifier.Logistic(z_train, Y_train, z_test,
                                                   Y_test)

            DAE_recon_loss_.append(recon_loss)

            DAE_auc_svm_.append(auc_svm)
            DAE_auc_dt_.append(auc_dt)
            DAE_auc_rf_.append(auc_rf)
            DAE_auc_nb_.append(auc_nb)
            DAE_auc_kn_.append(auc_kn)
            DAE_auc_logistic_.append(auc_logistic)

            DAE_t1_.append((t1 + t_tr))
            DAE_t2_.append((t2 + t_tr))
            DAE_t3_.append((t3 + t_tr))
            DAE_t4_.append((t4 + t_tr))
            DAE_t5_.append((t5 + t_tr))
            DAE_t6_.append((t6 + t_tr))

    print('Done DAE!')

    return model1
Exemple #8
0
def VAE_trainer(learning_rate=1e-3,
                batch_size=100,
                num_epoch=10,
                hidden_layers=[7, 4, 2],
                input_dim=0,
                step=20,
                X_train=[],
                X_test=[],
                Y_train=[],
                Y_test=[],
                dt=[]):
    model = VAE(learning_rate=learning_rate,
                batch_size=batch_size,
                hidden_layers=hidden_layers,
                input_dim=input_dim)
    for epoch in range(num_epoch):

        num_sample = len(X_train)
        for iter in range(num_sample // batch_size):
            X_mb, _ = dt.train.next_batch(batch_size)
            # Execute the forward and the backward pass and report computed losses
            loss, recon_loss, latent_loss = model.run_single_step(X_mb)

        if epoch % step == 0:
            print(
                '[Epoch {}] Loss: {}, Recon loss: {}, Latent loss: {}'.format(
                    epoch, loss, recon_loss, latent_loss))
            chartcolumn.take_display_traning('Epoch ' + str(epoch) +
                                             ' Loss: ' + str(loss) +
                                             ' Recon loss: ' +
                                             str(recon_loss) +
                                             ' Latent loss: ' +
                                             str(latent_loss))

            z_train = model.transformer(X_train)
            s = time.time()
            z_test = model.transformer(X_test)
            e = time.time()
            t_tr = (e - s) / len(X_test)
            # np.savetxt(path +  "z_train_"+str(epoch)+".csv", z_train, delimiter=",", fmt='%f' )
            # np.savetxt(path +  "z_test_"+str(epoch)+".csv", z_train, delimiter=",", fmt='%f' )
            auc_svm, t1 = classifier.svm(z_train, Y_train, z_test, Y_test)
            auc_dt, t2 = classifier.decisiontree(z_train, Y_train, z_test,
                                                 Y_test)
            auc_rf, t3 = classifier.rf(z_train, Y_train, z_test, Y_test)
            auc_nb, t4 = classifier.naive_baves(z_train, Y_train, z_test,
                                                Y_test)

            VAE_loss_.append(loss)
            VAE_recon_loss_.append(recon_loss)
            VAE_latent_loss_.append(latent_loss)

            VAE_auc_svm_.append(auc_svm)
            VAE_auc_dt_.append(auc_dt)
            VAE_auc_rf_.append(auc_rf)
            VAE_auc_nb_.append(auc_nb)
            VAE_t1_.append((t1 + t_tr))
            VAE_t2_.append((t2 + t_tr))
            VAE_t3_.append((t3 + t_tr))
            VAE_t4_.append((t4 + t_tr))

    print('Done VAE!')

    return model
Exemple #9
0
def make_init_deep_learning(data_index, pathTrain, pathTest, pathColumn,
                            AUC_and_Structure):
    datasets = np.asarray([
        "unsw", "ctu13_8", "Ads", "Phishing", "IoT", "Spam", "Antivirus",
        "VirusShare", 'nslkdd'
    ])  #30 ? 57 513 482

    dataname = datasets[data_index]
    dt = shuttle.read_data_sets(dataname, pathTrain, pathTest, pathColumn)
    num_sample = dt.train.num_examples
    chartcolumn.take_display_traning("size of dataset: " + str(num_sample))
    input_dim = dt.train.features.shape[1]
    balance_rate = len(dt.train.features1) / float(len(dt.train.features0))

    label_dim = dt.train.labels.shape[1]
    chartcolumn.take_display_traning("dimension: " + str(input_dim))
    chartcolumn.take_display_traning("number of class: " + str(label_dim))

    data_save = np.asarray([data_index, input_dim, balance_rate, label_dim])
    data_save = np.reshape(data_save, (-1, 4))
    if os.path.isfile("Results/Infomation/" + dataname +
                      "/datainformation.csv"):  #
        auc = np.genfromtxt("Results/Infomation/" + dataname +
                            "/datainformation.csv",
                            delimiter=',')
        auc = np.reshape(auc, (-1, 4))
        data_save = np.concatenate((auc, data_save), axis=0)
        np.savetxt("Results/Infomation/" + dataname + "/datainformation.csv",
                   data_save,
                   delimiter=",",
                   fmt="%f")

    else:
        np.savetxt("Results/Infomation/" + dataname + "/datainformation.csv",
                   data_save,
                   delimiter=",",
                   fmt="%f")

    num_epoch = 2000
    step = 20
    # filter_sizes = np.asarray([[1, 2, 3], [3, 5, 7], [1, 2, 3], [7, 11, 15], [1, 2, 3], [3, 5, 7], [1, 2, 3]])
    #data_shapes = np.asarray([[12, 12], [14, 14], [8, 8], [40, 40], [9, 9], [25, 25], [8, 8]])
    hidden_layer = hidden_layers[data_index]
    block_size = batch_sizes[data_index]
    lr = 1e-4
    noise_factor = 0.0025  # 0, 0.0001, 0,001, 0.01, 0.1, 1.0
    # filter_size = filter_sizes[data_index]  # [1,2,3]
    #data_shape = data_shapes[data_index]  # [12,12]
    conf = str(num_epoch) + "_" + str(block_size) + "_" + str(lr) + "_" + str(
        hidden_layer[0]) + "_" + str(hidden_layer[1]) + "_" + str(
            hidden_layer[2]) + "_noise: " + str(noise_factor)

    X_train = dt.train.features
    Y_train = dt.train.labels
    X_test = dt.test.features
    Y_test = dt.test.labels

    svm, t1 = classifier.svm(X_train, Y_train, X_test, Y_test)
    auc_dt, t2 = classifier.decisiontree(X_train, Y_train, X_test, Y_test)
    rf, t3 = classifier.rf(X_train, Y_train, X_test, Y_test)
    nb, t4 = classifier.naive_baves(X_train, Y_train, X_test, Y_test)
    kn, t5 = classifier.KNeighbors(X_train, Y_train, X_test, Y_test)
    logistic, t6 = classifier.Logistic(X_train, Y_train, X_test, Y_test)

    data_save = np.asarray([
        data_index, input_dim, balance_rate, svm, auc_dt, rf, nb, 1000 * t1,
        1000 * t2, 1000 * t3, 1000 * t4
    ])
    data_save = np.reshape(data_save, (-1, 11))
    AUC_and_Structure.append(svm)
    AUC_and_Structure.append(auc_dt)
    AUC_and_Structure.append(rf)
    AUC_and_Structure.append(nb)
    AUC_and_Structure.append(kn)
    AUC_and_Structure.append(logistic)
    if os.path.isfile("Results/RF_AUC_DIF/" + dataname + "/AUC_Input.csv"):  #
        auc = np.genfromtxt("Results/RF_AUC_DIF/" + dataname +
                            "/AUC_Input.csv",
                            delimiter=',')
        auc = np.reshape(auc, (-1, 11))
        data_save = np.concatenate((auc, data_save), axis=0)
        np.savetxt("Results/RF_AUC_DIF/" + dataname + "/AUC_Input.csv",
                   data_save,
                   delimiter=",",
                   fmt="%f")

    else:
        np.savetxt("Results/RF_AUC_DIF/" + dataname + "/AUC_Input.csv",
                   data_save,
                   delimiter=",",
                   fmt="%f")
    return data_index, input_dim, balance_rate, lr, block_size, num_epoch, hidden_layer, \
           step, X_train, X_test, Y_train, Y_test, dt, label_dim, noise_factor, conf, \
           dataname
Exemple #10
0
        count_s += 1
        correct_class.append(0)

if (not is_mcyt):
    dataset_folders = os.listdir(dataset_path)
    dataset_folders_filtered = filter(filter_dataset_folders, dataset_folders)
    dataset_folders_sample = random.sample(dataset_folders_filtered, 10)
    print("Adding Random to test set (Only for GPDS's dataset)")
    for p in dataset_folders_sample:
        f = os.listdir(dataset_path + p)
        # Load and pre-process the signature
        f_filtered = filter(filter_genuine, f)
        f_sample = random.sample(f_filtered, 1)[0]
        filename = os.path.join(dataset_path + p, f_sample)
        original = imread(filename, flatten=1)
        processed = preprocess_signature(original, canvas_size)

        # Use the CNN to extract features
        feature_vector = model.get_feature_vector(processed)
        data.append(feature_vector[0])
        correct_class.append(0)

data_test = np.array(data)

print("Correctly data test classes: ")
print(correct_class)

classifier.knn(data_train, data_test, expected, correct_class)
classifier.svm(data_train, data_test, expected, correct_class)
classifier.mlp(data_train, data_test, expected, correct_class)
classifier.tree(data_train, data_test, expected, correct_class)
Exemple #11
0
def main():
    p = Path("./result")
    if not p.exists():
        os.makedirs(p)

    parser = argparse.ArgumentParser(
        description='Bioinf project. The arguments can be passed in any order.'
    )

    classes = parser.add_mutually_exclusive_group()
    classes.add_argument('-cl2',
                         help='in order to classify two cancer types.',
                         action='store_true')
    classes.add_argument(
        '-cl3',
        help='in order to classify two cancer types AND sane.',
        action='store_true')

    classifier = parser.add_mutually_exclusive_group()
    classifier.add_argument('-svm',
                            help='train a Support Vector Machine classifier',
                            action='store_true')
    classifier.add_argument('-knn',
                            help='train a K Nearest Neighbors classifier',
                            action='store_true')
    classifier.add_argument('-rforest',
                            help='train a Random Forest classifier',
                            action='store_true')
    classifier.add_argument('-kmeans',
                            help='train a Kmeans clustering',
                            action='store_true')
    classifier.add_argument(
        '-hierarc',
        help='train an Agglomerative Hierarchical clustering',
        action='store_true')

    inbalance = parser.add_mutually_exclusive_group()
    inbalance.add_argument('-over',
                           help='imbalance: Random Oversampling ',
                           action='store_true')
    inbalance.add_argument('-smote',
                           help='imbalance: SMOTE',
                           action='store_true')

    preprocess = parser.add_mutually_exclusive_group()
    preprocess.add_argument(
        '-ttest',
        help=
        'feature selection: ttest per chromosoma and per cpg site - 2 classes',
        action='store_true')
    preprocess.add_argument(
        '-fisher',
        help='feature selection: fisher criterion - 3 classes',
        action='store_true')
    preprocess.add_argument('-anova',
                            help='feature selection: anova - 3 classes',
                            action='store_true')
    preprocess.add_argument(
        '-pca',
        help='dimensionality reduction: Principal Component Analisys',
        action='store_true')
    preprocess.add_argument(
        '-lda',
        help='dimensionality reduction: Linear Discriminant Analysis',
        action='store_true')
    preprocess.add_argument(
        '-sfs',
        help=
        'feature selection - wrapper: Step Forward Selection (nearly unfeasible)',
        action='store_true')
    preprocess.add_argument(
        '-ga',
        help='feature selection - wrapper: Genetic Algorithm',
        action='store_true')

    parser.add_argument(
        '-d',
        '--download',
        nargs=2,
        help='download Adenoma and Adenocarcinoma and Squamous Cell Neoplasm '
        + 'data from Genomic Data Common. It needs 2 parameters: ' +
        'first parameter is the destination folder; ' +
        'second parameters is the number of files to be downloaded for each class ',
        action='store')
    parser.add_argument(
        '-ds',
        '--downloadsane',
        nargs=2,
        help='download Sane data from Genomic Data Common' +
        'It needs 2 parameters: ' +
        'first parameter is the destination folder; ' +
        'second parameters is the number of files to be downloaded ',
        action='store')
    parser.add_argument(
        '-s',
        '--store',
        help=
        'concatenate files belonging to same cancer type and store them in a binary file',
        action='store')

    parser.add_argument(
        '--alpha',
        type=float,
        default=0.001,
        help='to set a different ALPHA: ttest parameter - default is 0.001',
        action='store')
    parser.add_argument(
        '--perc',
        type=float,
        default=0.95,
        help='to set PERC of varaince explained by the features kept by PCA',
        action='store')
    parser.add_argument(
        '-rs',
        '--r_state',
        type=int,
        default=8,
        help='to set a user defined Random State - default is 8',
        action='store')
    parser.add_argument('--only_chrms_t',
                        default=False,
                        help='select only chrms for ttest',
                        action='store_true')
    parser.add_argument(
        '--crossval',
        help=
        'to do crossvalidation OR in case of unsupervised to plot the Inertia curve',
        action='store_true')
    parser.add_argument('--plot_lc',
                        help='plot the learning curve',
                        action='store_true')
    parser.add_argument(
        '--remove_nan_cpgs',
        type=str2bool,
        default=True,
        help='IF True: removes features containing at least one NaN value. ' +
        'IF False: NaN are substituted by the mean over the feature. ' +
        'The old file resulted by feature reduction must be eliminated when changing option. '
        + 'By Default is True.',
        action='store')

    args = parser.parse_args()

    if args.download:
        print("download ")
        dgdc.getDataEx(path=args.download[0], file_n=args.download[1])
    if args.downloadsane:
        print("download sane ")
        dgdc.getSaneDataEx(path=args.downloadsane[0],
                           file_n=args.downloadsane[1])
    if args.store:
        print("store")
        dgdc.storeDataIntoBinary(path=args.store)
        print("Data stored.")

    # validity checks
    if not args.cl2 and not args.cl3:
        print(
            "insert arg -cl2 for classifying 2 classes OR -cl3 for 3 classes")
        return

    # parameters and variables
    alpha = args.alpha  # alpha parameter for t-test
    perc = args.perc  # percentage of variance explained
    classes = 2 if args.cl2 else 3
    random_state = args.r_state
    no_nan = args.remove_nan_cpgs
    n_components = 100

    cl.setPlot_lc(args.plot_lc)

    cl.addToName("cl{}".format(classes))
    cl.addToName("rs{}".format(random_state))

    # load data
    print("Loading....")
    x, y, chrms_pos = pr.loadData(classes=classes)
    if no_nan:
        cl.addToName("no_nan")
        length = x.shape[1]
        x = pr.removeNanFeature(x)
        print("{} NaN features removed!".format(length - x.shape[1]))
    print("Loaded!")

    x_train, x_test, y_train, y_test = sk.model_selection.train_test_split(
        x, y, test_size=0.2, random_state=random_state)
    del x, y

    # preprocess
    if args.ttest:
        if classes != 2:
            print("wrong number of classes")
            return
        #print("Start ttest axis={}....".format(args.ttest))
        r, cpg_r = pr.compute_t_test(x_train,
                                     y_train,
                                     chrms_pos,
                                     alpha,
                                     random_state,
                                     axis=0,
                                     remove_nan=no_nan)
        print(r)
        cl.addToName("ttest{}".format(args.ttest))
        length = x_train.shape[1]
        x_train, x_test = pr.removeFeatures(x_train,
                                            x_test,
                                            cpg_r,
                                            chrms_pos,
                                            args.only_chrms_t,
                                            remove_nan=no_nan,
                                            y_train=y_train)
        print("Features removed: {}".format(length - x_train.shape[1]))
        print("End ttest!")

    if args.ga:
        print("genetic algorithm")
        cl.addToName("ga")
        # per lavorare con meno componenti
        # x_train = x_train[:, 1:100]
        result = g.GA_function(x_train, y_train, random_state, classes, 0.1)
        path = Path('./data/GA_{}_{}.npy'.format(random_state, classes))
        np.save(path, result)
        x_train = x_train[:, result]
        x_test = x_test[:, result]

    if args.pca:
        print("pca")
        cl.addToName("pca")
        x_train, x_test = pr.pca_function(x_train,
                                          x_test,
                                          y_train,
                                          y_test,
                                          classes,
                                          perc,
                                          random_state,
                                          name=cl.name,
                                          remove_nan=no_nan)

    if args.lda:
        #print("lda - {} components".format(args.lda))
        cl.addToName("lda")
        x_train, x_test = pr.lda_function(x_train, x_test, y_train, y_test,
                                          classes, args.lda, random_state,
                                          cl.name)

    if args.fisher:
        if classes != 2:
            print("wrong number of classes")
            return
        #cl.addToName("fisher{}".format(args.fisher))
        cl.addToName("fisher")
        print("fisher")
        x_train, x_test = pr.fisher_function(x_train,
                                             x_test,
                                             y_train,
                                             y_test,
                                             random_state,
                                             best=True,
                                             n=n_components,
                                             remove_nan=no_nan)
        # if best=True selects the n best features, if False the worst n features (for debugging)
    if args.sfs:
        if classes != 2:
            print("wrong number of classes")
            return
        print("Start sfs....")
        feat_col = pr.sfs(x_train, x_test, y_train, y_test, chrms_pos, alpha,
                          random_state)
        x_train = x_train[:, feat_col]
        x_test = x_test[:, feat_col]

    if args.anova:
        if classes != 3:
            print("wrong number of classes")
            return
        print("anova")
        cl.addToName("anova")
        x_train, x_test = pr.anova_function(x_train,
                                            x_test,
                                            y_train,
                                            y_test,
                                            alpha,
                                            random_state,
                                            remove_nan=no_nan)

    # imbalance
    if args.over:
        print("over ")
        x_train, y_train = pr.imbalance(x_train, y_train, "over", random_state)
        cl.addToName("over")

    if args.smote:
        print("smote ")
        x_train, y_train = pr.imbalance(x_train, y_train, "smote",
                                        random_state)
        cl.addToName("smote")

    cl.random_state(random_state)

    # classify
    if args.svm:
        print("svm ")
        cl.svm(x_train,
               x_test,
               y_train,
               y_test,
               classes=classes,
               crossval=args.crossval)

    if args.knn:
        print("knn ")
        cl.knn(x_train,
               x_test,
               y_train,
               y_test,
               classes=classes,
               crossval=args.crossval)

    if args.rforest:
        print("rforest")
        cl.random_forest(x_train,
                         x_test,
                         y_train,
                         y_test,
                         classes=classes,
                         crossval=args.crossval)

    if args.kmeans:
        print("kmeans")
        uc.kmeans(x_train,
                  x_test,
                  y_train,
                  y_test,
                  classes=classes,
                  random_state=random_state,
                  crossval=args.crossval)

    if args.hierarc:
        print("hierarchical clustering")
        uc.hierarchical(x_train,
                        x_test,
                        y_train,
                        y_test,
                        classes=classes,
                        random_state=random_state,
                        crossval=args.crossval)

    print("Log name: {}.log".format(cl.name))

    handlers = log.getLogger().handlers[:]
    for handler in handlers:
        handler.close()
        log.getLogger().removeHandler(handler)
    nf = p / cl.name
    if not nf.exists():
        os.makedirs(nf)
    npath = Path(nf / '{}.log'.format(cl.name))
    i = 1
    while npath.exists():
        npath = Path(nf / '{}_{}.log'.format(cl.name, i))
        i += 1
    os.rename('log.log', npath)
Exemple #12
0
                test_classification.append(1)
            for k in range(option[0] + option[1]):
                test_classification.append(0)
          #  metrics = classifier.knn(np.array(train_sets_processed[i]), test, classifications[i], test_classification, genuine_quantity, option[0], option[1])
          #  frr_metrics[0].append(metrics[0])
          #  far_skilled_metrics[0].append(metrics[1])
          #  far_random_metrics[0].append(metrics[2])
          #  eer_metrics[0].append(metrics[3])

          #  metrics = classifier.tree(np.array(train_sets_processed[i]), test, classifications[i], test_classification, genuine_quantity, option[0], option[1])
          #  frr_metrics[1].append(metrics[0])
          #  far_skilled_metrics[1].append(metrics[1])
          #  far_random_metrics[1].append(metrics[2])
          #  eer_metrics[1].append(metrics[3])

            metrics = classifier.svm(np.array(train_sets_processed[i]), test, classifications[i], test_classification, genuine_quantity, option[0], option[1], weights=svm_weights[i])
            frr_metrics[2].append(metrics[0])
            far_skilled_metrics[2].append(metrics[1])
            far_random_metrics[2].append(metrics[2])
            eer_metrics[2].append(metrics[3])

            frr_metrics_global.append(metrics[4])
            far_skilled_metrics_global.append(metrics[5])
            far_random_metrics_global.append(metrics[6])
            eer_metrics_global.append(metrics[7])

            frr_metrics_global_sd.append(metrics[4])
            far_skilled_metrics_global_sd.append(metrics[5])
            far_random_metrics_global_sd.append(metrics[6])
            eer_metrics_global_sd.append(metrics[7])