Esempio n. 1
0
def main():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    DATASET_DIRECTORY = '../data_part1'

    X, y, X_hidden = dataset_manip.load_dataset(DATASET_DIRECTORY)
    num_classes = len(set(y))

    print('X.shape = ' + str(X.shape))
    print('X_hidden.shape = ' + str(X_hidden.shape))

    ens = Ensemble(input_shape=(77, 71, 1),
                   num_classes=10,
                   num_models=11,
                   batch_size=512,
                   path='./ensemble_files',
                   load=False)
    ens.train(X=X, y=y, epochs_per_model=300, split_rate=0.9)
    print(ens.measure_accuracy(X, y))

    return
    X_train, X_validation, y_train, y_validation = dataset_manip.split_dataset(
        X, y, rate=0.5)

    model = Model(image_shape=X.shape[1:],
                  num_classes=num_classes,
                  model_path='./model_files/model',
                  batch_size=512,
                  first_run=True)  # 1250

    model.train(X_train, y_train, X_validation, y_validation, 500)
    model.train_unsupervised(X_hidden, X_validation, y_validation, 200)

    print('Final Accuracy: {}'.format(
        model.measure_accuracy(X_validation, y_validation)))
Esempio n. 2
0
def worker(fold, n_users, n_items, dataset_dir):
    traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt'
    trasR = lil_matrix(
        matBinarize(loadSparseR(n_users, n_items, traFilePath),
                    binarize_threshold))

    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape,
        trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0])))

    tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt'
    tstsR = lil_matrix(
        matBinarize(loadSparseR(n_users, n_items, tstFilePath),
                    binarize_threshold))

    sampler = Sampler(trasR=trasR, batch_size=batch_size)

    en = Ensemble(n_users, n_items, kensemble, topN, split_method,
                  eval_metrics, reg, n_factors, batch_size)
    scores = en.train(fold + 1, trasR, tstsR, sampler)

    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1),
        ','.join(['%s' % eval_metric for eval_metric in eval_metrics]) +
        '@%d=' % (topN) + ','.join(['%.6f' % (score) for score in scores]))

    en.close()
    return scores
def main():
    # Dataset path
    dataset_name = ['credit_card_clients_balanced', 'credit_card_clients']

    for data_name in dataset_name:
        dataset_path = os.getcwd() + "\\dataset\\" + data_name + ".csv"
        dataset = pd.read_csv(dataset_path, encoding='utf-8')

        # Datasets columns
        data_x = dataset[[
            'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11',
            'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20',
            'X21', 'X22', 'X23'
        ]]
        data_y = dataset['Y']

        # Preprocessing data
        min_max_scaler = preprocessing.MinMaxScaler()
        X_normalized = min_max_scaler.fit_transform(data_x)

        acc_rate = []
        reject_rate = []

        # Runs to test the model
        for i in range(20):
            print('---------------- Ensemble -----------------')
            print('--- MLP - SVM - KNN - GMM - Naive Bayes ---')
            print(i + 1, 'of 20 iterations')
            X_train, X_test, y_train, y_test = train_test_split(X_normalized,
                                                                data_y,
                                                                test_size=0.2)
            y_train = np.array(y_train)
            y_test = np.array(y_test)

            model = Ensemble()
            model.train(X_train, y_train, gridSearch=False)
            y_hat = model.predict(X_test)

            error, reject = model.evaluate(y_hat, y_test)
            acc_rate.append(1 - error)
            reject_rate.append(reject)

        graphics(acc_rate, reject_rate, data_name)