def main(): dataset = load_dataset() for network_name in network_names: model_func = getattr(network_models, network_name.split('_')[0]) train_data = np.asarray(dataset['train']['data']) train_labels = dataset['train']['label'] num_classes = len(np.unique(train_labels)) test_data = np.asarray(dataset['test']['data']) test_labels = dataset['test']['label'] train_labels = to_categorical(train_labels, num_classes=num_classes) test_labels = to_categorical(test_labels, num_classes=num_classes) epochs = 130 if 'wrn' in network_name else 80 fit_kwargs = { 'epochs': epochs, 'callbacks': [ callbacks.LearningRateScheduler( scheduler('wrn' in network_name)) ], 'verbose': 2 } generator = dataset['generator'] generator_kwargs = {'batch_size': batch_size} fit_kwargs['steps_per_epoch'] = len(train_data) // batch_size print('reps : ', reps) print('method : ', method) name = 'mnist_' + network_name + '_l_' + str(lasso) + '_g_' + str(gamma) + \ '_r_' + str(regularization) print(name) model_kwargs = { 'nclasses': num_classes, 'lasso': lasso, 'regularization': regularization } saliency_kwargs = {'horizontal_flip': True} rank = get_rank(method, data=train_data, label=train_labels, model_func=model_func, model_kwargs=model_kwargs, fit_kwargs=fit_kwargs, generator=generator, generator_kwargs=generator_kwargs, rank_kwargs=rank_kwargs, saliency_kwargs=saliency_kwargs) nfeats = [] accuracies = [] model_kwargs['lasso'] = 0. total_features = int(np.prod(train_data.shape[1:])) for factor in [.05, .1, .25, .5]: n_features = int(total_features * factor) mask = np.zeros(train_data.shape[1:]) mask.flat[rank[:n_features]] = 1.0 n_accuracies = [] for r in range(reps): print('factor : ', factor, ' , rep : ', r) model = network_models.wrn164(train_data.shape[1:], **model_kwargs) model.fit_generator( generator.flow(mask * train_data, train_labels, **generator_kwargs), steps_per_epoch=train_data.shape[0] // batch_size, epochs=130, callbacks=[ callbacks.LearningRateScheduler(scheduler(True)) ], validation_data=(mask * test_data, test_labels), validation_steps=test_data.shape[0] // batch_size, verbose=2) n_accuracies.append( model.evaluate(mask * test_data, test_labels, verbose=0)[-1]) del model print('n_features : ', n_features, ', acc : ', n_accuracies) accuracies.append(n_accuracies) nfeats.append(n_features) try: os.makedirs(directory) except: pass output_filename = directory + network_name + '_' + str( gamma) + '_dfs_results.json' try: with open(output_filename) as outfile: info_data = json.load(outfile) except: info_data = {} if name not in info_data: info_data[name] = [] info_data[name].append({ 'lasso': lasso, 'gamma': gamma, 'regularization': regularization, 'rank': rank.tolist(), 'reps': reps, 'classification': { 'n_features': nfeats, 'accuracy': accuracies } }) with open(output_filename, 'w') as outfile: json.dump(info_data, outfile) del rank
'_r_' + str(regularization) print(name) model_kwargs = { 'lasso': lasso, 'regularization': regularization } rank_kwargs = { 'gamma': gamma, 'reps': reps } fit_kwargs = { 'batch_size': batch_size, 'epochs': epochs, 'verbose': 0 } rank = get_rank(fs_mode, data=data, label=label, model_func=create_model, rank_kwargs=rank_kwargs, fit_kwargs=fit_kwargs, model_kwargs=model_kwargs) try: with open(fs_filename) as outfile: info_data = json.load(outfile) except: info_data = {} if fs_mode not in info_data: info_data[fs_mode] = [] info_data[fs_mode].append( { 'lasso': lasso, 'gamma': gamma, 'regularization': regularization,
'class_weight': 'balanced', 'cache_size': 4096, 'max_iter': 10000 } fit_kwargs = {} evaluate_kwargs = {'verbose': 0, 'batch_size': batch_size} rank_kwargs = {'gamma': gamma, 'reps': reps} saliency_kwargs = { 'batch_size': 16, } result = get_rank('sfs', data=data, label=label, model_func=SVC, rank_kwargs=rank_kwargs, fit_kwargs=fit_kwargs, model_kwargs=model_kwargs, saliency_kwargs=saliency_kwargs, return_info=True, valid_data=valid_data, valid_label=valid_label) rank = result['rank'] model_kwargs['C'] = 1.5 n_features = data.shape[-1] nfeats = [] accuracies = [] train_accuracies = [] while n_features: n_accuracies = []
def main(): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' dataset_names = [ ('arcene', 1e-1), ('dexter', 1e-1), ('madelon', 1e-1), ('dorothea', 1e-1), ('gisette', 1e-1), ] gamma = 0.975 b_size = 100 reps = 1 lasso = 0.0 fs_mode = 'sfs' np.random.seed(1) root_directory = './scripts/ablation/different_ranker_and_classifier/info/' datasets_directory = './datasets/nips/' for dataset_name, regularization in dataset_names: fs_filename = root_directory + dataset_name + '_gamma_' + str(gamma) + '_ranks.json' print('loading dataset', dataset_name) dataset = load_dataset( dataset_name, directory=datasets_directory, normalize=dataset_name not in ['dexter', 'dorothea'], sparse=True ) print('data loaded. labels =', dataset['train']['data'].shape) input_shape = dataset['train']['data'].shape[-1:] batch_size = min(dataset['train']['data'].shape[0], b_size) uclasses = np.unique(dataset['train']['label']) nclasses = len(uclasses) data = dataset['train']['data'] label = dataset['train']['label'] label = kutils.to_categorical(label, nclasses) valid_data = dataset['validation']['data'] valid_label = kutils.to_categorical(dataset['validation']['label'], nclasses) label_argmax = np.argmax(label, axis=-1) valid_label_argmax = np.argmax(valid_label, axis=-1) for kernel in ['sigmoid', 'linear', 'poly', 'rbf']: print('kernel : ', kernel) name = fs_mode + '_k_' + kernel + '_c_' + kernel print(name) model_kwargs = { 'C': 1., 'degree': 3., 'coef0': 1. * (kernel == 'poly'), 'kernel': kernel, 'class_weight': 'balanced', 'cache_size': 4096, 'max_iter': 5000 } fit_kwargs = { } rank_kwargs = { 'gamma': gamma, 'reps': reps } saliency_kwargs = { 'batch_size': 16, } result = get_rank(fs_mode, data=data, label=label, model_func=SVC, rank_kwargs=rank_kwargs, fit_kwargs=fit_kwargs, model_kwargs=model_kwargs, saliency_kwargs=saliency_kwargs, return_info=True, valid_data=valid_data, valid_label=valid_label) try: with open(fs_filename) as outfile: info_data = json.load(outfile) except: info_data = {} if name not in info_data: info_data[name] = [] info_data[name].append( { 'lasso': lasso, 'gamma': gamma, 'regularization': regularization, 'rank': result['rank'], 'classification': result['classification'], 'reps': reps, 'model_kwargs': model_kwargs } ) if not os.path.isdir(root_directory): os.makedirs(root_directory) with open(fs_filename, 'w') as outfile: json.dump(info_data, outfile) rank = result['rank'] for kernel_classifier in ['linear', 'poly', 'sigmoid', 'rbf']: if kernel == kernel_classifier: continue model_kwargs['kernel'] = kernel_classifier model_kwargs['coef0'] = 1. * (kernel_classifier == 'poly') n_features = data.shape[-1] classifier_name = fs_mode + '_k_' + kernel + '_c_' + kernel_classifier print(classifier_name) nfeats = [] accuracies = [] while n_features: n_accuracies = [] for _ in range(reps): model = sklearn_SVC(**model_kwargs) model.fit(data[:, rank[:n_features]], label_argmax, **fit_kwargs) n_accuracies.append(model.score(valid_data[:, rank[:n_features]], valid_label_argmax)) print( 'n_features : ', n_features, ', acc : ', n_accuracies[-1] ) del model accuracies.append(n_accuracies) nfeats.append(n_features) n_features = int(n_features * gamma) try: with open(fs_filename) as outfile: info_data = json.load(outfile) except: info_data = {} if classifier_name not in info_data: info_data[classifier_name] = [] info_data[classifier_name].append( { 'lasso': lasso, 'gamma': gamma, 'regularization': regularization, 'rank': result['rank'], 'classification': { 'n_features': nfeats, 'accuracy': accuracies }, 'reps': reps, 'model_kwargs': model_kwargs } ) with open(fs_filename, 'w') as outfile: json.dump(info_data, outfile) del result
def main(): for dataset_name in dataset_names: print('dataset =', dataset_name) dataset = load_dataset(dataset_name, directory=dataset_directory) ids = np.unique(dataset['id']) print('NUMBER OF IDS : ', ids) data = dataset['data'] result = dataset['result'] for train_ids_index, test_ids_index in kfold.split(ids): train_ids, test_ids = ids[train_ids_index], ids[test_ids_index] train_index = [] for train_id in train_ids: train_index.append(np.where(dataset['id'] == train_id)[0]) train_index = np.concatenate(train_index, axis=0) test_index = [] for test_id in test_ids: test_index.append(np.where(dataset['id'] == test_id)[0]) test_index = np.concatenate(test_index, axis=0) train_data, test_data = data[train_index], data[test_index] train_result, test_result = result[train_index], result[test_index] model_func = getattr(network_models, network_name.split('_')[0]) batch_size = min(len(train_data), b_size) fit_kwargs = { 'epochs': epochs, 'callbacks': [ callbacks.LearningRateScheduler(scheduler) ], 'verbose': 2 } generator = dataset['generator'] generator_kwargs = { 'batch_size': batch_size } fit_kwargs['batch_size'] = batch_size for fs_mode in methods: for lasso in [0.0, 5e-4]: if lasso == 0.0 and fs_mode == 'dfs': continue print('reps : ', reps) print('method : ', fs_mode) for regularization in [5e-4]: name = dataset_name + '_' + fs_mode + '_l_' + str(lasso) + '_g_' + str(gamma) + \ '_r_' + str(regularization) print(name) model_kwargs = { 'lasso': lasso, 'regularization': regularization } rank = get_rank(fs_mode, data=train_data, label=train_result, model_func=model_func, model_kwargs=model_kwargs, fit_kwargs=fit_kwargs, generator=generator, generator_kwargs=generator_kwargs, rank_kwargs=rank_kwargs, type='regression') try: os.makedirs(sd_directory) except: pass output_filename = sd_directory + dataset_name + '_' + str(gamma) + '_rank.json' try: with open(output_filename) as outfile: info_data = json.load(outfile) except: info_data = {} key = fs_mode + '_' + str(lasso) if key not in info_data: info_data[key] = [] info_data[key].append( { 'lasso': lasso, 'gamma': gamma, 'regularization': regularization, 'rank': rank.tolist(), 'reps': reps, 'train_index': train_index.tolist(), 'test_index': test_index.tolist() } ) with open(output_filename, 'w') as outfile: json.dump(info_data, outfile) del rank
rank_kwargs = {'gamma': gamma, 'reps': 5} fit_kwargs = { 'batch_size': batch_size, 'epochs': epochs, 'callbacks': [callbacks.LearningRateScheduler(get_scheduler(epochs))], 'verbose': 0 } rank = get_rank(fs_mode, data=data, label=label, model_func=create_model, rank_kwargs=rank_kwargs, fit_kwargs=fit_kwargs, model_kwargs=model_kwargs, return_info=False, valid_data=valid_data, valid_label=valid_label) for kernel_classifier in kernels_classifier: model_kwargs = { 'C': 1., 'degree': 3., 'coef0': 1. * (kernel_classifier == 'poly'), 'kernel': kernel_classifier, 'class_weight': 'balanced', 'cache_size': 4096, 'max_iter': 10000 }