def main(): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' DATASET_DIRECTORY = '../data_part1' X, y, X_hidden = dataset_manip.load_dataset(DATASET_DIRECTORY) num_classes = len(set(y)) print('X.shape = ' + str(X.shape)) print('X_hidden.shape = ' + str(X_hidden.shape)) ens = Ensemble(input_shape=(77, 71, 1), num_classes=10, num_models=11, batch_size=512, path='./ensemble_files', load=False) ens.train(X=X, y=y, epochs_per_model=300, split_rate=0.9) print(ens.measure_accuracy(X, y)) return X_train, X_validation, y_train, y_validation = dataset_manip.split_dataset( X, y, rate=0.5) model = Model(image_shape=X.shape[1:], num_classes=num_classes, model_path='./model_files/model', batch_size=512, first_run=True) # 1250 model.train(X_train, y_train, X_validation, y_validation, 500) model.train_unsupervised(X_hidden, X_validation, y_validation, 200) print('Final Accuracy: {}'.format( model.measure_accuracy(X_validation, y_validation)))
def worker(fold, n_users, n_items, dataset_dir): traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt' trasR = lil_matrix( matBinarize(loadSparseR(n_users, n_items, traFilePath), binarize_threshold)) print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape, trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0]))) tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt' tstsR = lil_matrix( matBinarize(loadSparseR(n_users, n_items, tstFilePath), binarize_threshold)) sampler = Sampler(trasR=trasR, batch_size=batch_size) en = Ensemble(n_users, n_items, kensemble, topN, split_method, eval_metrics, reg, n_factors, batch_size) scores = en.train(fold + 1, trasR, tstsR, sampler) print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), ','.join(['%s' % eval_metric for eval_metric in eval_metrics]) + '@%d=' % (topN) + ','.join(['%.6f' % (score) for score in scores])) en.close() return scores
def main(): # Dataset path dataset_name = ['credit_card_clients_balanced', 'credit_card_clients'] for data_name in dataset_name: dataset_path = os.getcwd() + "\\dataset\\" + data_name + ".csv" dataset = pd.read_csv(dataset_path, encoding='utf-8') # Datasets columns data_x = dataset[[ 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11', 'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20', 'X21', 'X22', 'X23' ]] data_y = dataset['Y'] # Preprocessing data min_max_scaler = preprocessing.MinMaxScaler() X_normalized = min_max_scaler.fit_transform(data_x) acc_rate = [] reject_rate = [] # Runs to test the model for i in range(20): print('---------------- Ensemble -----------------') print('--- MLP - SVM - KNN - GMM - Naive Bayes ---') print(i + 1, 'of 20 iterations') X_train, X_test, y_train, y_test = train_test_split(X_normalized, data_y, test_size=0.2) y_train = np.array(y_train) y_test = np.array(y_test) model = Ensemble() model.train(X_train, y_train, gridSearch=False) y_hat = model.predict(X_test) error, reject = model.evaluate(y_hat, y_test) acc_rate.append(1 - error) reject_rate.append(reject) graphics(acc_rate, reject_rate, data_name)