def main(dataset_name): dataset = load_dataset() raw_data = np.asarray(dataset['raw']['data']) raw_label = np.asarray(dataset['raw']['label']) num_classes = len(np.unique(raw_label)) rskf = RepeatedStratifiedKFold(n_splits=k_folds, n_repeats=k_fold_reps, random_state=random_state) for fs_method in fs_methods: print('FS-Method : ', fs_method.__name__) cont_seed = 0 nfeats = [] accuracies = [] svc_accuracies = [] BAs = [] svc_BAs = [] fs_time = [] mAPs = [] svc_mAPs = [] mus = [] name = dataset_name + '_mu_' + str(mu) print(name, 'samples : ', raw_label.sum(), (1. - raw_label).sum()) for j, (train_index, test_index) in enumerate(rskf.split(raw_data, raw_label)): print('k_fold', j, 'of', k_folds * k_fold_reps) train_data, train_labels = raw_data[train_index].copy( ), raw_label[train_index].copy() test_data, test_labels = raw_data[test_index].copy( ), raw_label[test_index].copy() train_labels = to_categorical(train_labels, num_classes=num_classes) test_labels = to_categorical(test_labels, num_classes=num_classes) valid_features = np.where(np.abs(train_data).sum(axis=0) > 0)[0] if len(valid_features) < train_data.shape[1]: print('Removing', train_data.shape[1] - len(valid_features), 'zero features') train_data = train_data[:, valid_features] test_data = test_data[:, valid_features] model_kwargs = {'mu': mu / len(train_data), 'degree': 3} print('mu :', model_kwargs['mu'], ', batch_size :', batch_size) svc_kwargs = {'C': 1.0, 'solver': 0.} print('Starting feature selection') fs_dir = os.path.dirname(os.path.realpath(__file__)) + '/temp/' if not os.path.isdir(fs_dir): os.makedirs(fs_dir) fs_filename = fs_dir + fs_method.__name__ + '_iter_' + str(j) + '_seed_' + \ str(random_state) + '.json' if os.path.exists(fs_filename): with open(fs_filename, 'r') as outfile: fs_data = json.load(outfile) fs_class = fs_method(n_features_to_select=200 if 'RFE' not in fs_method.__name__ else 10) fs_class.score = np.asarray(fs_data['score']) fs_class.ranking = np.asarray(fs_data['ranking']) fs_time.append(np.NAN) else: start_time = time.process_time() fs_class = fs_method(n_features_to_select=200 if 'RFE' not in fs_method.__name__ else 10) fs_class.fit(train_data, 2. * train_labels[:, -1] - 1.) fs_data = { 'score': fs_class.score.tolist(), 'ranking': fs_class.ranking.tolist() } fs_time.append(time.process_time() - start_time) with open(fs_filename, 'w') as outfile: json.dump(fs_data, outfile) print('Finishing feature selection. Time : ', fs_time[-1], 's') for i, n_features in enumerate([10, 50, 100, 150, 200]): n_accuracies = [] n_svc_accuracies = [] n_BAs = [] n_svc_BAs = [] n_mAPs = [] n_svc_mAPs = [] n_train_accuracies = [] print('n_features : ', n_features) fs_class.n_features_to_select = n_features svc_train_data = fs_class.transform(train_data) svc_test_data = fs_class.transform(test_data) norm = normalization_func() svc_train_data_norm = norm.fit_transform(svc_train_data) svc_test_data_norm = norm.transform(svc_test_data) bestcv = -1 bestc = None bestSolver = None for s in [0, 1, 2, 3]: for my_c in [ 0.001, 0.01, 0.1, 0.5, 1.0, 1.4, 1.5, 1.6, 2.0, 2.5, 5.0, 25.0, 50.0, 100.0 ]: cmd = '-v 5 -s ' + str(s) + ' -c ' + str(my_c) + ' -q' cv = liblinearutil.train( (2 * train_labels[:, -1] - 1).tolist(), svc_train_data_norm.tolist(), cmd) if cv > bestcv: bestcv = cv bestc = my_c bestSolver = s svc_kwargs['C'] = bestc svc_kwargs['solver'] = bestSolver print('Best -> C:', bestc, ', s:', bestSolver, ', acc:', bestcv) for r in range(reps): np.random.seed(cont_seed) K.tf.set_random_seed(cont_seed) cont_seed += 1 model = train_SVC(svc_train_data_norm, train_labels, svc_kwargs) _, accuracy, test_pred = liblinearutil.predict( (2 * test_labels[:, -1] - 1).tolist(), svc_test_data_norm.tolist(), model, '-q') test_pred = np.asarray(test_pred) n_svc_accuracies.append(accuracy[0]) n_svc_BAs.append(balance_accuracy(test_labels, test_pred)) n_svc_mAPs.append( average_precision_score(test_labels[:, -1], test_pred)) del model model = train_Keras(svc_train_data, train_labels, svc_test_data, test_labels, model_kwargs) train_data_norm = model.normalization.transform( svc_train_data) test_data_norm = model.normalization.transform( svc_test_data) test_pred = model.predict(test_data_norm) n_BAs.append(balance_accuracy(test_labels, test_pred)) n_mAPs.append( average_precision_score(test_labels[:, -1], test_pred)) n_accuracies.append( model.evaluate(test_data_norm, test_labels, verbose=0)[-1]) n_train_accuracies.append( model.evaluate(train_data_norm, train_labels, verbose=0)[-1]) del model K.clear_session() print( 'n_features : ', n_features, ', acc : ', n_accuracies[-1], ', BA : ', n_BAs[-1], ', mAP : ', n_mAPs[-1], ', train_acc : ', n_train_accuracies[-1], ', svc_acc : ', n_svc_accuracies[-1], ', svc_BA : ', n_svc_BAs[-1], ', svc_mAP : ', n_svc_mAPs[-1], ) if i >= len(accuracies): accuracies.append(n_accuracies) svc_accuracies.append(n_svc_accuracies) BAs.append(n_BAs) mAPs.append(n_mAPs) svc_BAs.append(n_svc_BAs) svc_mAPs.append(n_svc_mAPs) nfeats.append(n_features) mus.append(model_kwargs['mu']) else: accuracies[i] += n_accuracies svc_accuracies[i] += n_svc_accuracies BAs[i] += n_BAs mAPs[i] += n_mAPs svc_BAs[i] += n_svc_BAs svc_mAPs[i] += n_svc_mAPs mean_accuracies = np.array(accuracies).mean(axis=-1) print('NFEATS : ', nfeats) diff_accuracies = .5 * (mean_accuracies[1:] + mean_accuracies[:-1]) np_nfeats = np.array(nfeats) diff = np_nfeats[1:] - np_nfeats[:-1] AUC = np.sum(diff * diff_accuracies) / np.sum(diff) print('AUC : ', AUC) output_filename = directory + 'LinearSVC_' + fs_method.__name__ + '.json' if not os.path.isdir(directory): os.makedirs(directory) info_data = { 'reps': reps, 'classification': { 'mus': mus, 'n_features': nfeats, 'accuracy': accuracies, 'mean_accuracy': np.array(accuracies).mean(axis=1).tolist(), 'svc_accuracy': svc_accuracies, 'mean_svc_accuracy': np.array(svc_accuracies).mean(axis=1).tolist(), 'BA': BAs, 'mean_BA': np.array(BAs).mean(axis=1).tolist(), 'mAP': mAPs, 'mean_mAP': np.array(mAPs).mean(axis=1).tolist(), 'svc_BA': svc_BAs, 'svc_mean_BA': np.array(svc_BAs).mean(axis=1).tolist(), 'svc_mAP': svc_mAPs, 'svc_mean_mAP': np.array(svc_mAPs).mean(axis=1).tolist(), 'fs_time': fs_time } } for k, v in info_data['classification'].items(): if 'mean' in k: print(k, v) with open(output_filename, 'w') as outfile: json.dump(info_data, outfile)
def main(dataset_name): dataset = load_dataset() raw_data = np.asarray(dataset['raw']['data']) raw_label = np.asarray(dataset['raw']['label']) num_classes = len(np.unique(raw_label)) rskf = RepeatedStratifiedKFold(n_splits=k_folds, n_repeats=k_fold_reps, random_state=42) for e2efs_class in e2efs_classes: print('E2EFS-Method : ', e2efs_class.__name__) cont_seed = 0 nfeats = [] accuracies = [] model_accuracies = [] svc_accuracies = [] fs_time = [] BAs = [] svc_BAs = [] model_BAs = [] mAPs = [] svc_mAPs = [] model_mAPs = [] mus = [] name = dataset_name + '_' + kernel + '_mu_' + str(mu) print(name) for j, (train_index, test_index) in enumerate(rskf.split(raw_data, raw_label)): print('k_fold', j, 'of', k_folds * k_fold_reps) train_data, train_labels = raw_data[train_index], raw_label[ train_index] test_data, test_labels = raw_data[test_index], raw_label[ test_index] train_labels = to_categorical(train_labels, num_classes=num_classes) test_labels = to_categorical(test_labels, num_classes=num_classes) valid_features = np.where(np.abs(train_data).sum(axis=0) > 0)[0] if len(valid_features) < train_data.shape[1]: print('Removing', train_data.shape[1] - len(valid_features), 'zero features') train_data = train_data[:, valid_features] test_data = test_data[:, valid_features] model_kwargs = { 'mu': mu / len(train_data), 'kernel': kernel, 'degree': 3 } svc_kwargs = {'C': 1.0, 'solver': 0.} for i, n_features in enumerate([10, 50, 100, 150, 200]): n_accuracies = [] n_svc_accuracies = [] n_model_accuracies = [] n_BAs = [] n_svc_BAs = [] n_model_BAs = [] n_mAPs = [] n_svc_mAPs = [] n_model_mAPs = [] n_train_accuracies = [] n_time = [] print('n_features : ', n_features) heatmaps = [] weight = train_labels[:, -1].mean() for r in range(reps): np.random.seed(cont_seed) K.tf.set_random_seed(cont_seed) cont_seed += 1 model = train_Keras( train_data, train_labels, test_data, test_labels, model_kwargs, e2efs_class=e2efs_class, n_features=n_features, ) heatmaps.append(K.eval(model.heatmap)) n_time.append(model.fs_time) test_data_norm = model.normalization.transform(test_data) train_data_norm = model.normalization.transform(train_data) test_pred = model.predict(test_data_norm) n_model_accuracies.append( model.evaluate(test_data_norm, test_labels, verbose=0)[-1]) n_model_BAs.append(balance_accuracy( test_labels, test_pred)) n_model_mAPs.append( average_precision_score(test_labels[:, -1], test_pred)) train_acc = model.evaluate(train_data_norm, train_labels, verbose=0)[-1] print('n_features : ', n_features, ', accuracy : ', n_model_accuracies[-1], ', BA : ', n_model_BAs[-1], ', mAP : ', n_model_mAPs[-1], ', train_accuracy : ', train_acc, ', time : ', n_time[-1], 's') del model K.clear_session() heatmap = np.mean(heatmaps, axis=0) best_features = np.argsort(heatmap)[::-1][:n_features] svc_train_data = train_data[:, best_features] svc_test_data = test_data[:, best_features] norm = normalization_func() svc_train_data_norm = norm.fit_transform(svc_train_data) svc_test_data_norm = norm.transform(svc_test_data) bestcv = -1 bestc = None bestSolver = None for s in [0, 1, 2, 3]: for my_c in [ 0.001, 0.1, 0.5, 1.0, 1.4, 1.5, 1.6, 2.0, 2.5, 5.0, 100.0 ]: cmd = '-v 5 -s ' + str(s) + ' -c ' + str(my_c) + ' -q' cv = liblinearutil.train( (2 * train_labels[:, -1] - 1).tolist(), svc_train_data_norm.tolist(), cmd) if cv > bestcv: # print('Best -> C:', my_c, ', s:', s, ', acc:', cv) bestcv = cv bestc = my_c bestSolver = s svc_kwargs['C'] = bestc svc_kwargs['solver'] = bestSolver print('Best -> C:', bestc, ', s:', bestSolver, ', acc:', bestcv) for r in range(reps): np.random.seed(cont_seed) K.tf.set_random_seed(cont_seed) cont_seed += 1 model = train_SVC(svc_train_data_norm, train_labels, svc_kwargs) _, accuracy, test_pred = liblinearutil.predict( (2 * test_labels[:, -1] - 1).tolist(), svc_test_data_norm.tolist(), model, '-q') test_pred = np.asarray(test_pred) n_svc_accuracies.append(accuracy[0]) n_svc_BAs.append(balance_accuracy(test_labels, test_pred)) n_svc_mAPs.append( average_precision_score(test_labels[:, -1], test_pred)) del model model = train_Keras(svc_train_data, train_labels, svc_test_data, test_labels, model_kwargs) train_data_norm = model.normalization.transform( svc_train_data) test_data_norm = model.normalization.transform( svc_test_data) test_pred = model.predict(test_data_norm) n_BAs.append(balance_accuracy(test_labels, test_pred)) n_mAPs.append( average_precision_score(test_labels[:, -1], test_pred)) n_accuracies.append( model.evaluate(test_data_norm, test_labels, verbose=0)[-1]) n_train_accuracies.append( model.evaluate(train_data_norm, train_labels, verbose=0)[-1]) del model K.clear_session() print( 'n_features : ', n_features, ', acc : ', n_accuracies[-1], ', BA : ', n_BAs[-1], ', mAP : ', n_mAPs[-1], ', train_acc : ', n_train_accuracies[-1], ', svc_acc : ', n_svc_accuracies[-1], ', svc_BA : ', n_svc_BAs[-1], ', svc_mAP : ', n_svc_mAPs[-1], ) if i >= len(accuracies): accuracies.append(n_accuracies) svc_accuracies.append(n_svc_accuracies) model_accuracies.append(n_model_accuracies) BAs.append(n_BAs) mAPs.append(n_mAPs) fs_time.append(n_time) svc_BAs.append(n_svc_BAs) svc_mAPs.append(n_svc_mAPs) model_BAs.append(n_model_BAs) model_mAPs.append(n_model_mAPs) nfeats.append(n_features) mus.append(model_kwargs['mu']) else: accuracies[i] += n_accuracies svc_accuracies[i] += n_svc_accuracies model_accuracies[i] += n_model_accuracies fs_time[i] += n_time BAs[i] += n_BAs mAPs[i] += n_mAPs svc_BAs[i] += n_svc_BAs svc_mAPs[i] += n_svc_mAPs model_BAs[i] += n_model_BAs model_mAPs[i] += n_model_mAPs output_filename = directory + 'LinearSVC_' + kernel + '_' + e2efs_class.__name__ + '.json' if not os.path.isdir(directory): os.makedirs(directory) info_data = { 'kernel': kernel, 'reps': reps, 'classification': { 'mus': mus, 'n_features': nfeats, 'accuracy': accuracies, 'mean_accuracy': np.array(accuracies).mean(axis=1).tolist(), 'svc_accuracy': svc_accuracies, 'mean_svc_accuracy': np.array(svc_accuracies).mean(axis=1).tolist(), 'model_accuracy': model_accuracies, 'mean_model_accuracy': np.array(model_accuracies).mean(axis=1).tolist(), 'BA': BAs, 'mean_BA': np.array(BAs).mean(axis=1).tolist(), 'mAP': mAPs, 'mean_mAP': np.array(mAPs).mean(axis=1).tolist(), 'svc_BA': svc_BAs, 'svc_mean_BA': np.array(svc_BAs).mean(axis=1).tolist(), 'svc_mAP': svc_mAPs, 'svc_mean_mAP': np.array(svc_mAPs).mean(axis=1).tolist(), 'model_BA': model_BAs, 'model_mean_BA': np.array(model_BAs).mean(axis=1).tolist(), 'model_mAP': model_mAPs, 'model_mean_mAP': np.array(model_mAPs).mean(axis=1).tolist(), 'fs_time': fs_time } } for k, v in info_data['classification'].items(): if 'mean' in k: print(k, v) with open(output_filename, 'w') as outfile: json.dump(info_data, outfile)
def main(dataset_name): dataset = load_dataset() raw_data = np.asarray(dataset['raw']['data']) raw_label = np.asarray(dataset['raw']['label']) num_classes = len(np.unique(raw_label)) rskf = RepeatedStratifiedKFold(n_splits=k_folds, n_repeats=k_fold_reps, random_state=42) for e2efs_class, e2efs_kwargs, T, extra_epochs in e2efs_classes: print('E2EFS-Method : ', e2efs_class.__name__) nfeats = [] accuracies = [] model_accuracies = [] BAs = [] model_BAs = [] mAPs = [] model_mAPs = [] name = dataset_name + '_three_layer_nn' print(name) for j, (train_index, test_index) in enumerate(rskf.split(raw_data, raw_label)): print('k_fold', j, 'of', k_folds * k_fold_reps) train_data, train_labels = raw_data[train_index], raw_label[ train_index] test_data, test_labels = raw_data[test_index], raw_label[ test_index] train_labels = to_categorical(train_labels, num_classes=num_classes) test_labels = to_categorical(test_labels, num_classes=num_classes) valid_features = np.where(np.abs(train_data).sum(axis=0) > 0)[0] if len(valid_features) < train_data.shape[1]: print('Removing', train_data.shape[1] - len(valid_features), 'zero features') train_data = train_data[:, valid_features] test_data = test_data[:, valid_features] model_kwargs = {'regularization': regularization} for i, n_features in enumerate([10, 50, 100, 150, 200]): n_accuracies = [] n_model_accuracies = [] n_BAs = [] n_model_BAs = [] n_mAPs = [] n_model_mAPs = [] n_train_accuracies = [] print('n_features : ', n_features) heatmaps = [] for r in range(reps): model = train_Keras(train_data, train_labels, test_data, test_labels, model_kwargs, e2efs_class=e2efs_class, n_features=n_features, e2efs_kwargs=e2efs_kwargs) heatmaps.append(K.eval(model.heatmap)) train_data_norm = model.normalization.transform(train_data) test_data_norm = model.normalization.transform(test_data) test_pred = model.predict(test_data_norm) n_model_accuracies.append( model.evaluate(test_data_norm, test_labels, verbose=0)[-1]) n_model_BAs.append(balance_accuracy( test_labels, test_pred)) n_model_mAPs.append( average_precision_score(test_labels, test_pred)) train_acc = model.evaluate(train_data_norm, train_labels, verbose=0)[-1] print('n_features : ', n_features, ', accuracy : ', n_model_accuracies[-1], ', BA : ', n_model_BAs[-1], ', mAP : ', n_model_mAPs[-1], ', train_accuracy : ', train_acc) del model K.clear_session() heatmap = np.mean(heatmaps, axis=0) best_features = np.argsort(heatmap)[::-1][:n_features] svc_train_data = train_data[:, best_features] svc_test_data = test_data[:, best_features] for r in range(reps): model = train_Keras(svc_train_data, train_labels, svc_test_data, test_labels, model_kwargs) train_data_norm = model.normalization.transform( svc_train_data) test_data_norm = model.normalization.transform( svc_test_data) test_pred = model.predict(test_data_norm) n_BAs.append(balance_accuracy(test_labels, test_pred)) n_mAPs.append( average_precision_score(test_labels, test_pred)) n_accuracies.append( model.evaluate(test_data_norm, test_labels, verbose=0)[-1]) n_train_accuracies.append( model.evaluate(train_data_norm, train_labels, verbose=0)[-1]) del model K.clear_session() print( 'n_features : ', n_features, ', acc : ', n_accuracies[-1], ', BA : ', n_BAs[-1], ', mAP : ', n_mAPs[-1], ', train_acc : ', n_train_accuracies[-1], ) if i >= len(accuracies): accuracies.append(n_accuracies) model_accuracies.append(n_model_accuracies) BAs.append(n_BAs) mAPs.append(n_mAPs) model_BAs.append(n_model_BAs) model_mAPs.append(n_model_mAPs) nfeats.append(n_features) else: accuracies[i] += n_accuracies model_accuracies[i] += n_model_accuracies BAs[i] += n_BAs mAPs[i] += n_mAPs model_BAs[i] += n_model_BAs model_mAPs[i] += n_model_mAPs output_filename = directory + 'three_layer_nn_' + e2efs_class.__name__ + '.json' if not os.path.isdir(directory): os.makedirs(directory) info_data = { 'reps': reps, 'classification': { 'regularization': regularization, 'n_features': nfeats, 'accuracy': accuracies, 'mean_accuracy': np.array(accuracies).mean(axis=1).tolist(), 'model_accuracy': model_accuracies, 'mean_model_accuracy': np.array(model_accuracies).mean(axis=1).tolist(), 'BA': BAs, 'mean_BA': np.array(BAs).mean(axis=1).tolist(), 'mAP': mAPs, 'mean_mAP': np.array(mAPs).mean(axis=1).tolist(), 'model_BA': model_BAs, 'model_mean_BA': np.array(model_BAs).mean(axis=1).tolist(), 'model_mAP': model_mAPs, 'model_mean_mAP': np.array(model_mAPs).mean(axis=1).tolist() } } for k, v in info_data['classification'].items(): if 'mean' in k: print(k, v) with open(output_filename, 'w') as outfile: json.dump(info_data, outfile)
def main(dataset_name): dataset = load_dataset() raw_data = np.asarray(dataset['raw']['data']) raw_label = np.asarray(dataset['raw']['label']) num_classes = len(np.unique(raw_label)) rskf = RepeatedStratifiedKFold(n_splits=k_folds, n_repeats=k_fold_reps, random_state=42) for fs_method, fs_range in fs_methods: print('FS-Method : ', fs_method.__name__) nfeats = [] accuracies = [] svc_accuracies = [] BAs = [] svc_BAs = [] mAPs = [] svc_mAPs = [] mus = [] name = dataset_name + '_mu_' + str(mu) print(name) for j, (train_index, test_index) in enumerate(rskf.split(raw_data, raw_label)): print('k_fold', j, 'of', k_folds * k_fold_reps) train_data, train_labels = raw_data[train_index].copy( ), raw_label[train_index].copy() test_data, test_labels = raw_data[test_index].copy( ), raw_label[test_index].copy() train_labels = to_categorical(train_labels, num_classes=num_classes) test_labels = to_categorical(test_labels, num_classes=num_classes) valid_features = np.where(np.abs(train_data).sum(axis=0) > 0)[0] if len(valid_features) < train_data.shape[1]: print('Removing', train_data.shape[1] - len(valid_features), 'zero features') train_data = train_data[:, valid_features] test_data = test_data[:, valid_features] model_kwargs = { # 'nclasses': num_classes, 'mu': mu / len(train_data), 'degree': 3 } print('mu :', model_kwargs['mu'], ', batch_size :', batch_size) svc_kwargs = {'C': 1.0, 'solver': 0.} print('Starting feature selection') best_fs = 0 best_value = None for fs_value in fs_range: fs_class = fs_method(10, fs_value, matlab_engine=matlab_engine) fs_class.fit(train_data, 2. * train_labels[:, -1] - 1.) svc_train_data = fs_class.transform(train_data) norm = normalization_func() svc_train_data_norm = norm.fit_transform(svc_train_data) for s in [0, 1, 2, 3]: for my_c in [ 0.001, 0.01, 0.1, 0.5, 1.0, 1.4, 1.5, 1.6, 2.0, 2.5, 5.0, 25.0, 50.0, 100.0 ]: cmd = '-v 5 -s ' + str(s) + ' -c ' + str(my_c) + ' -q' cv = liblinearutil.train( (2 * train_labels[:, -1] - 1).tolist(), svc_train_data_norm.tolist(), cmd) if cv > best_fs: best_fs = cv best_value = fs_value print('best fs_value: ', best_value) fs_class = fs_method(200, best_value, matlab_engine=matlab_engine) fs_class.fit(train_data, 2. * train_labels[:, -1] - 1.) print('Finishing feature selection') for i, n_features in enumerate([10, 50, 100, 150, 200]): n_accuracies = [] n_svc_accuracies = [] n_BAs = [] n_svc_BAs = [] n_mAPs = [] n_svc_mAPs = [] n_train_accuracies = [] print('n_features : ', n_features) fs_class.n_features_to_select = n_features svc_train_data = fs_class.transform(train_data) svc_test_data = fs_class.transform(test_data) norm = normalization_func() svc_train_data_norm = norm.fit_transform(svc_train_data) svc_test_data_norm = norm.transform(svc_test_data) bestcv = -1 bestc = None bestSolver = None for s in [0, 1, 2, 3]: for my_c in [ 0.001, 0.01, 0.1, 0.5, 1.0, 1.4, 1.5, 1.6, 2.0, 2.5, 5.0, 25.0, 50.0, 100.0 ]: cmd = '-v 5 -s ' + str(s) + ' -c ' + str(my_c) + ' -q' cv = liblinearutil.train( (2 * train_labels[:, -1] - 1).tolist(), svc_train_data_norm.tolist(), cmd) if cv > bestcv: bestcv = cv bestc = my_c bestSolver = s svc_kwargs['C'] = bestc svc_kwargs['solver'] = bestSolver print('Best -> C:', bestc, ', s:', bestSolver, ', acc:', bestcv) for r in range(reps): model = train_SVC(svc_train_data_norm, train_labels, svc_kwargs) _, accuracy, test_pred = liblinearutil.predict( (2 * test_labels[:, -1] - 1).tolist(), svc_test_data_norm.tolist(), model, '-q') test_pred = np.asarray(test_pred) n_svc_accuracies.append(accuracy[0]) n_svc_BAs.append(balance_accuracy(test_labels, test_pred)) n_svc_mAPs.append( average_precision_score(test_labels[:, -1], test_pred)) del model model = train_Keras(svc_train_data, train_labels, svc_test_data, test_labels, model_kwargs) train_data_norm = model.normalization.transform( svc_train_data) test_data_norm = model.normalization.transform( svc_test_data) test_pred = model.predict(test_data_norm) n_BAs.append(balance_accuracy(test_labels, test_pred)) n_mAPs.append( average_precision_score(test_labels[:, -1], test_pred)) n_accuracies.append( model.evaluate(test_data_norm, test_labels, verbose=0)[-1]) n_train_accuracies.append( model.evaluate(train_data_norm, train_labels, verbose=0)[-1]) del model K.clear_session() print( 'n_features : ', n_features, ', acc : ', n_accuracies[-1], ', BA : ', n_BAs[-1], ', mAP : ', n_mAPs[-1], ', train_acc : ', n_train_accuracies[-1], ', svc_acc : ', n_svc_accuracies[-1], ', svc_BA : ', n_svc_BAs[-1], ', svc_mAP : ', n_svc_mAPs[-1], ) if i >= len(accuracies): accuracies.append(n_accuracies) svc_accuracies.append(n_svc_accuracies) BAs.append(n_BAs) mAPs.append(n_mAPs) svc_BAs.append(n_svc_BAs) svc_mAPs.append(n_svc_mAPs) nfeats.append(n_features) mus.append(model_kwargs['mu']) else: accuracies[i] += n_accuracies svc_accuracies[i] += n_svc_accuracies BAs[i] += n_BAs mAPs[i] += n_mAPs svc_BAs[i] += n_svc_BAs svc_mAPs[i] += n_svc_mAPs output_filename = directory + 'LinearSVC_' + fs_method.__name__ + '.json' if not os.path.isdir(directory): os.makedirs(directory) info_data = { 'reps': reps, 'classification': { 'mus': mus, 'n_features': nfeats, 'accuracy': accuracies, 'mean_accuracy': np.array(accuracies).mean(axis=1).tolist(), 'svc_accuracy': svc_accuracies, 'mean_svc_accuracy': np.array(svc_accuracies).mean(axis=1).tolist(), 'BA': BAs, 'mean_BA': np.array(BAs).mean(axis=1).tolist(), 'mAP': mAPs, 'mean_mAP': np.array(mAPs).mean(axis=1).tolist(), 'svc_BA': svc_BAs, 'svc_mean_BA': np.array(svc_BAs).mean(axis=1).tolist(), 'svc_mAP': svc_mAPs, 'svc_mean_mAP': np.array(svc_mAPs).mean(axis=1).tolist(), } } for k, v in info_data['classification'].items(): if 'mean' in k: print(k, v) with open(output_filename, 'w') as outfile: json.dump(info_data, outfile)
def main(dataset_name): dataset = load_dataset() raw_data = np.asarray(dataset['raw']['data']) raw_label = np.asarray(dataset['raw']['label']) num_classes = len(np.unique(raw_label)) rskf = RepeatedStratifiedKFold(n_splits=k_folds, n_repeats=k_fold_reps, random_state=random_state) for fs_method in fs_methods: print('FS-Method : ', fs_method.__name__) nfeats = [] accuracies = [] BAs = [] mAPs = [] name = dataset_name + '_three_layer_nn' print(name) for j, (train_index, test_index) in enumerate(rskf.split(raw_data, raw_label)): print('k_fold', j, 'of', k_folds * k_fold_reps) train_data, train_labels = raw_data[train_index].copy( ), raw_label[train_index].copy() test_data, test_labels = raw_data[test_index].copy( ), raw_label[test_index].copy() train_labels = to_categorical(train_labels, num_classes=num_classes) test_labels = to_categorical(test_labels, num_classes=num_classes) valid_features = np.where(np.abs(train_data).sum(axis=0) > 0)[0] if len(valid_features) < train_data.shape[1]: print('Removing', train_data.shape[1] - len(valid_features), 'zero features') train_data = train_data[:, valid_features] test_data = test_data[:, valid_features] model_kwargs = { 'regularization': regularization, } print('Starting feature selection') fs_dir = os.path.dirname(os.path.realpath(__file__)) + '/temp/' if not os.path.isdir(fs_dir): os.makedirs(fs_dir) fs_filename = fs_dir + fs_method.__name__ + '_iter_' + str(j) + '_seed_' + \ str(random_state) + '.json' if os.path.exists(fs_filename): with open(fs_filename, 'r') as outfile: fs_data = json.load(outfile) fs_class = fs_method(n_features_to_select=200 if 'RFE' not in fs_method.__name__ else 10) fs_class.score = np.asarray(fs_data['score']) fs_class.ranking = np.asarray(fs_data['ranking']) else: fs_class = fs_method(n_features_to_select=200 if 'RFE' not in fs_method.__name__ else 10) fs_class.fit(train_data, 2. * train_labels[:, -1] - 1.) fs_data = { 'score': fs_class.score.tolist(), 'ranking': fs_class.ranking.tolist() } with open(fs_filename, 'w') as outfile: json.dump(fs_data, outfile) print('Finishing feature selection') for i, n_features in enumerate([5, 10, 15, 20]): n_accuracies = [] n_BAs = [] n_mAPs = [] n_train_accuracies = [] print('n_features : ', n_features) fs_class.n_features_to_select = n_features svc_train_data = fs_class.transform(train_data) svc_test_data = fs_class.transform(test_data) for r in range(reps): model = train_Keras(svc_train_data, train_labels, svc_test_data, test_labels, model_kwargs) train_data_norm = model.normalization.transform( svc_train_data) test_data_norm = model.normalization.transform( svc_test_data) test_pred = model.predict(test_data_norm) n_BAs.append(balance_accuracy(test_labels, test_pred)) n_mAPs.append( average_precision_score(test_labels, test_pred)) n_accuracies.append( model.evaluate(test_data_norm, test_labels, verbose=0)[-1]) n_train_accuracies.append( model.evaluate(train_data_norm, train_labels, verbose=0)[-1]) del model K.clear_session() print( 'n_features : ', n_features, ', acc : ', n_accuracies[-1], ', BA : ', n_BAs[-1], ', mAP : ', n_mAPs[-1], ', train_acc : ', n_train_accuracies[-1], ) if i >= len(accuracies): accuracies.append(n_accuracies) BAs.append(n_BAs) mAPs.append(n_mAPs) nfeats.append(n_features) else: accuracies[i] += n_accuracies BAs[i] += n_BAs mAPs[i] += n_mAPs mean_accuracies = np.array(accuracies).mean(axis=-1) print('NFEATS : ', nfeats) diff_accuracies = .5 * (mean_accuracies[1:] + mean_accuracies[:-1]) np_nfeats = np.array(nfeats) diff = np_nfeats[1:] - np_nfeats[:-1] AUC = np.sum(diff * diff_accuracies) / np.sum(diff) print('AUC : ', AUC) output_filename = directory + 'three_layer_nn_' + fs_method.__name__ + '.json' if not os.path.isdir(directory): os.makedirs(directory) info_data = { 'reps': reps, 'classification': { 'regularization': regularization, 'n_features': nfeats, 'accuracy': accuracies, 'mean_accuracy': np.array(accuracies).mean(axis=1).tolist(), 'BA': BAs, 'mean_BA': np.array(BAs).mean(axis=1).tolist(), 'mAP': mAPs, 'mean_mAP': np.array(mAPs).mean(axis=1).tolist(), 'auc': AUC } } for k, v in info_data['classification'].items(): if 'mean' in k: print(k, v) with open(output_filename, 'w') as outfile: json.dump(info_data, outfile)
def main(dataset_name): dataset = load_dataset() raw_data = np.asarray(dataset['raw']['data']) raw_label = np.asarray(dataset['raw']['label']) num_classes = len(np.unique(raw_label)) rskf = RepeatedStratifiedKFold(n_splits=k_folds, n_repeats=k_fold_reps, random_state=42) print('CAE-Method') cont_seed = 0 nfeats = [] accuracies = [] fs_time = [] BAs = [] mAPs = [] name = dataset_name + '_three_layer_nn' print(name) for j, (train_index, test_index) in enumerate(rskf.split(raw_data, raw_label)): print('k_fold', j, 'of', k_folds*k_fold_reps) train_data, train_labels = raw_data[train_index], raw_label[train_index] test_data, test_labels = raw_data[test_index], raw_label[test_index] train_labels = to_categorical(train_labels, num_classes=num_classes) test_labels = to_categorical(test_labels, num_classes=num_classes) valid_features = np.where(np.abs(train_data).sum(axis=0) > 0)[0] if len(valid_features) < train_data.shape[1]: print('Removing', train_data.shape[1] - len(valid_features), 'zero features') train_data = train_data[:, valid_features] test_data = test_data[:, valid_features] model_kwargs = { 'regularization': regularization } n_accuracies = [] n_BAs = [] n_mAPs = [] n_train_accuracies = [] n_time = [] svc_train_data = train_data.copy() svc_test_data = test_data.copy() for r in range(reps): np.random.seed(cont_seed) tf.set_random_seed(cont_seed) cont_seed += 1 model = train_Keras(svc_train_data, train_labels, svc_test_data, test_labels, model_kwargs) train_data_norm = model.normalization.transform(svc_train_data) test_data_norm = model.normalization.transform(svc_test_data) test_pred = model.predict(test_data_norm) n_BAs.append(balance_accuracy(test_labels, test_pred)) n_mAPs.append(average_precision_score(test_labels[:, -1], test_pred[:, -1])) n_accuracies.append(model.evaluate(test_data_norm, test_labels, verbose=0)[-1]) n_train_accuracies.append(model.evaluate(train_data_norm, train_labels, verbose=0)[-1]) del model K.clear_session() print( 'acc : ', n_accuracies[-1], ', BA : ', n_BAs[-1], ', mAP : ', n_mAPs[-1], ', train_acc : ', n_train_accuracies[-1], ) accuracies.append(n_accuracies) BAs.append(n_BAs) mAPs.append(n_mAPs) fs_time.append(n_time) output_filename = directory + 'three_layer_nn_naive.json' if not os.path.isdir(directory): os.makedirs(directory) info_data = { 'regularization': regularization, 'reps': reps, 'classification': { 'n_features': nfeats, 'accuracy': accuracies, 'mean_accuracy': np.array(accuracies).mean(axis=1).tolist(), 'BA': BAs, 'mean_BA': np.array(BAs).mean(axis=1).tolist(), 'mAP': mAPs, 'mean_mAP': np.array(mAPs).mean(axis=1).tolist(), 'fs_time': fs_time } } for k, v in info_data['classification'].items(): if 'mean' in k: print(k, v) with open(output_filename, 'w') as outfile: json.dump(info_data, outfile)