예제 #1
0
파일: dfs.py 프로젝트: braisCB/SFS
def main():

    dataset = load_dataset()

    for network_name in network_names:

        model_func = getattr(network_models, network_name.split('_')[0])

        train_data = np.asarray(dataset['train']['data'])
        train_labels = dataset['train']['label']
        num_classes = len(np.unique(train_labels))

        test_data = np.asarray(dataset['test']['data'])
        test_labels = dataset['test']['label']

        train_labels = to_categorical(train_labels, num_classes=num_classes)
        test_labels = to_categorical(test_labels, num_classes=num_classes)

        epochs = 130 if 'wrn' in network_name else 80

        fit_kwargs = {
            'epochs':
            epochs,
            'callbacks': [
                callbacks.LearningRateScheduler(
                    scheduler('wrn' in network_name))
            ],
            'verbose':
            2
        }

        generator = dataset['generator']
        generator_kwargs = {'batch_size': batch_size}
        fit_kwargs['steps_per_epoch'] = len(train_data) // batch_size

        print('reps : ', reps)
        print('method : ', method)
        name = 'mnist_' + network_name + '_l_' + str(lasso) + '_g_' + str(gamma) + \
               '_r_' + str(regularization)
        print(name)
        model_kwargs = {
            'nclasses': num_classes,
            'lasso': lasso,
            'regularization': regularization
        }
        saliency_kwargs = {'horizontal_flip': True}
        rank = get_rank(method,
                        data=train_data,
                        label=train_labels,
                        model_func=model_func,
                        model_kwargs=model_kwargs,
                        fit_kwargs=fit_kwargs,
                        generator=generator,
                        generator_kwargs=generator_kwargs,
                        rank_kwargs=rank_kwargs,
                        saliency_kwargs=saliency_kwargs)

        nfeats = []
        accuracies = []
        model_kwargs['lasso'] = 0.
        total_features = int(np.prod(train_data.shape[1:]))
        for factor in [.05, .1, .25, .5]:
            n_features = int(total_features * factor)
            mask = np.zeros(train_data.shape[1:])
            mask.flat[rank[:n_features]] = 1.0
            n_accuracies = []
            for r in range(reps):
                print('factor : ', factor, ' , rep : ', r)
                model = network_models.wrn164(train_data.shape[1:],
                                              **model_kwargs)
                model.fit_generator(
                    generator.flow(mask * train_data, train_labels,
                                   **generator_kwargs),
                    steps_per_epoch=train_data.shape[0] // batch_size,
                    epochs=130,
                    callbacks=[
                        callbacks.LearningRateScheduler(scheduler(True))
                    ],
                    validation_data=(mask * test_data, test_labels),
                    validation_steps=test_data.shape[0] // batch_size,
                    verbose=2)
                n_accuracies.append(
                    model.evaluate(mask * test_data, test_labels,
                                   verbose=0)[-1])
                del model
            print('n_features : ', n_features, ', acc : ', n_accuracies)
            accuracies.append(n_accuracies)
            nfeats.append(n_features)

        try:
            os.makedirs(directory)
        except:
            pass
        output_filename = directory + network_name + '_' + str(
            gamma) + '_dfs_results.json'

        try:
            with open(output_filename) as outfile:
                info_data = json.load(outfile)
        except:
            info_data = {}

        if name not in info_data:
            info_data[name] = []

        info_data[name].append({
            'lasso': lasso,
            'gamma': gamma,
            'regularization': regularization,
            'rank': rank.tolist(),
            'reps': reps,
            'classification': {
                'n_features': nfeats,
                'accuracy': accuracies
            }
        })

        with open(output_filename, 'w') as outfile:
            json.dump(info_data, outfile)

        del rank
예제 #2
0
                       '_r_' + str(regularization)
                print(name)
                model_kwargs = {
                    'lasso': lasso,
                    'regularization': regularization
                }
                rank_kwargs = {
                    'gamma': gamma,
                    'reps': reps
                }
                fit_kwargs = {
                    'batch_size': batch_size,
                    'epochs': epochs,
                    'verbose': 0
                }
                rank = get_rank(fs_mode, data=data, label=label, model_func=create_model,
                                rank_kwargs=rank_kwargs, fit_kwargs=fit_kwargs, model_kwargs=model_kwargs)

                try:
                    with open(fs_filename) as outfile:
                        info_data = json.load(outfile)
                except:
                    info_data = {}

                if fs_mode not in info_data:
                    info_data[fs_mode] = []

                info_data[fs_mode].append(
                    {
                        'lasso': lasso,
                        'gamma': gamma,
                        'regularization': regularization,
예제 #3
0
                    'class_weight': 'balanced',
                    'cache_size': 4096,
                    'max_iter': 10000
                }
                fit_kwargs = {}
                evaluate_kwargs = {'verbose': 0, 'batch_size': batch_size}
                rank_kwargs = {'gamma': gamma, 'reps': reps}
                saliency_kwargs = {
                    'batch_size': 16,
                }
                result = get_rank('sfs',
                                  data=data,
                                  label=label,
                                  model_func=SVC,
                                  rank_kwargs=rank_kwargs,
                                  fit_kwargs=fit_kwargs,
                                  model_kwargs=model_kwargs,
                                  saliency_kwargs=saliency_kwargs,
                                  return_info=True,
                                  valid_data=valid_data,
                                  valid_label=valid_label)

                rank = result['rank']

                model_kwargs['C'] = 1.5
                n_features = data.shape[-1]
                nfeats = []
                accuracies = []
                train_accuracies = []
                while n_features:
                    n_accuracies = []
예제 #4
0
def main():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    dataset_names = [
        ('arcene', 1e-1),
        ('dexter', 1e-1),
        ('madelon', 1e-1),
        ('dorothea', 1e-1),
        ('gisette', 1e-1),
    ]

    gamma = 0.975
    b_size = 100
    reps = 1
    lasso = 0.0
    fs_mode = 'sfs'

    np.random.seed(1)

    root_directory = './scripts/ablation/different_ranker_and_classifier/info/'
    datasets_directory = './datasets/nips/'

    for dataset_name, regularization in dataset_names:

        fs_filename = root_directory + dataset_name + '_gamma_' + str(gamma) + '_ranks.json'

        print('loading dataset', dataset_name)
        dataset = load_dataset(
            dataset_name, directory=datasets_directory, normalize=dataset_name not in ['dexter', 'dorothea'],
            sparse=True
        )
        print('data loaded. labels =', dataset['train']['data'].shape)
        input_shape = dataset['train']['data'].shape[-1:]
        batch_size = min(dataset['train']['data'].shape[0], b_size)

        uclasses = np.unique(dataset['train']['label'])
        nclasses = len(uclasses)

        data = dataset['train']['data']
        label = dataset['train']['label']
        label = kutils.to_categorical(label, nclasses)
        valid_data = dataset['validation']['data']
        valid_label = kutils.to_categorical(dataset['validation']['label'], nclasses)

        label_argmax = np.argmax(label, axis=-1)
        valid_label_argmax = np.argmax(valid_label, axis=-1)

        for kernel in ['sigmoid', 'linear', 'poly', 'rbf']:
            print('kernel : ', kernel)
            name = fs_mode + '_k_' + kernel + '_c_' + kernel
            print(name)
            model_kwargs = {
                'C': 1.,
                'degree': 3.,
                'coef0': 1. * (kernel == 'poly'),
                'kernel': kernel,
                'class_weight': 'balanced',
                'cache_size': 4096,
                'max_iter': 5000
            }
            fit_kwargs = {
            }
            rank_kwargs = {
                'gamma': gamma,
                'reps': reps
            }
            saliency_kwargs = {
                'batch_size': 16,
            }
            result = get_rank(fs_mode, data=data, label=label, model_func=SVC,
                              rank_kwargs=rank_kwargs, fit_kwargs=fit_kwargs, model_kwargs=model_kwargs,
                              saliency_kwargs=saliency_kwargs,
                              return_info=True, valid_data=valid_data, valid_label=valid_label)

            try:
                with open(fs_filename) as outfile:
                    info_data = json.load(outfile)
            except:
                info_data = {}

            if name not in info_data:
                info_data[name] = []

            info_data[name].append(
                {
                    'lasso': lasso,
                    'gamma': gamma,
                    'regularization': regularization,
                    'rank': result['rank'],
                    'classification': result['classification'],
                    'reps': reps,
                    'model_kwargs': model_kwargs
                }
            )

            if not os.path.isdir(root_directory):
                os.makedirs(root_directory)

            with open(fs_filename, 'w') as outfile:
                json.dump(info_data, outfile)

            rank = result['rank']

            for kernel_classifier in ['linear', 'poly', 'sigmoid', 'rbf']:
                if kernel == kernel_classifier:
                    continue
                model_kwargs['kernel'] = kernel_classifier
                model_kwargs['coef0'] = 1. * (kernel_classifier == 'poly')
                n_features = data.shape[-1]
                classifier_name = fs_mode + '_k_' + kernel + '_c_' + kernel_classifier
                print(classifier_name)
                nfeats = []
                accuracies = []
                while n_features:
                    n_accuracies = []
                    for _ in range(reps):
                        model = sklearn_SVC(**model_kwargs)
                        model.fit(data[:, rank[:n_features]], label_argmax, **fit_kwargs)
                        n_accuracies.append(model.score(valid_data[:, rank[:n_features]], valid_label_argmax))
                        print(
                            'n_features : ', n_features, ', acc : ', n_accuracies[-1]
                        )
                        del model
                    accuracies.append(n_accuracies)
                    nfeats.append(n_features)
                    n_features = int(n_features * gamma)

                try:
                    with open(fs_filename) as outfile:
                        info_data = json.load(outfile)
                except:
                    info_data = {}

                if classifier_name not in info_data:
                    info_data[classifier_name] = []

                info_data[classifier_name].append(
                    {
                        'lasso': lasso,
                        'gamma': gamma,
                        'regularization': regularization,
                        'rank': result['rank'],
                        'classification': {
                            'n_features': nfeats,
                            'accuracy': accuracies
                        },
                        'reps': reps,
                        'model_kwargs': model_kwargs
                    }
                )

                with open(fs_filename, 'w') as outfile:
                    json.dump(info_data, outfile)

            del result
예제 #5
0
def main():

    for dataset_name in dataset_names:
        print('dataset =', dataset_name)
        dataset = load_dataset(dataset_name, directory=dataset_directory)

        ids = np.unique(dataset['id'])
        print('NUMBER OF IDS : ', ids)
        data = dataset['data']
        result = dataset['result']

        for train_ids_index, test_ids_index in kfold.split(ids):

            train_ids, test_ids = ids[train_ids_index], ids[test_ids_index]
            train_index = []
            for train_id in train_ids:
                train_index.append(np.where(dataset['id'] == train_id)[0])
            train_index = np.concatenate(train_index, axis=0)
            test_index = []
            for test_id in test_ids:
                test_index.append(np.where(dataset['id'] == test_id)[0])
            test_index = np.concatenate(test_index, axis=0)

            train_data, test_data = data[train_index], data[test_index]
            train_result, test_result = result[train_index], result[test_index]

            model_func = getattr(network_models, network_name.split('_')[0])

            batch_size = min(len(train_data), b_size)

            fit_kwargs = {
                'epochs': epochs,
                'callbacks': [
                    callbacks.LearningRateScheduler(scheduler)
                ],
                'verbose': 2
            }

            generator = dataset['generator']
            generator_kwargs = {
                'batch_size': batch_size
            }
            fit_kwargs['batch_size'] = batch_size

            for fs_mode in methods:
                for lasso in [0.0, 5e-4]:
                    if lasso == 0.0 and fs_mode == 'dfs':
                        continue
                    print('reps : ', reps)
                    print('method : ', fs_mode)
                    for regularization in [5e-4]:
                        name = dataset_name + '_' + fs_mode + '_l_' + str(lasso) + '_g_' + str(gamma) + \
                               '_r_' + str(regularization)
                        print(name)
                        model_kwargs = {
                            'lasso': lasso,
                            'regularization': regularization
                        }
                        rank = get_rank(fs_mode, data=train_data, label=train_result, model_func=model_func,
                                        model_kwargs=model_kwargs, fit_kwargs=fit_kwargs, generator=generator,
                                        generator_kwargs=generator_kwargs, rank_kwargs=rank_kwargs, type='regression')
                        try:
                            os.makedirs(sd_directory)
                        except:
                            pass
                        output_filename = sd_directory + dataset_name + '_' + str(gamma) + '_rank.json'

                        try:
                            with open(output_filename) as outfile:
                                info_data = json.load(outfile)
                        except:
                            info_data = {}

                        key = fs_mode + '_' + str(lasso)
                        if key not in info_data:
                            info_data[key] = []

                        info_data[key].append(
                            {
                                'lasso': lasso,
                                'gamma': gamma,
                                'regularization': regularization,
                                'rank': rank.tolist(),
                                'reps': reps,
                                'train_index': train_index.tolist(),
                                'test_index': test_index.tolist()
                            }
                        )

                        with open(output_filename, 'w') as outfile:
                            json.dump(info_data, outfile)

                        del rank
예제 #6
0
파일: dfs.py 프로젝트: braisCB/SFS
            rank_kwargs = {'gamma': gamma, 'reps': 5}
            fit_kwargs = {
                'batch_size':
                batch_size,
                'epochs':
                epochs,
                'callbacks':
                [callbacks.LearningRateScheduler(get_scheduler(epochs))],
                'verbose':
                0
            }
            rank = get_rank(fs_mode,
                            data=data,
                            label=label,
                            model_func=create_model,
                            rank_kwargs=rank_kwargs,
                            fit_kwargs=fit_kwargs,
                            model_kwargs=model_kwargs,
                            return_info=False,
                            valid_data=valid_data,
                            valid_label=valid_label)

            for kernel_classifier in kernels_classifier:
                model_kwargs = {
                    'C': 1.,
                    'degree': 3.,
                    'coef0': 1. * (kernel_classifier == 'poly'),
                    'kernel': kernel_classifier,
                    'class_weight': 'balanced',
                    'cache_size': 4096,
                    'max_iter': 10000
                }