indices = list(range(0, len(test_data)))
        random.shuffle(indices)
        num_samples_to_take = sample_size
        indices_to_take = indices[:num_samples_to_take]

        test_data_sampled = test_data.iloc[indices_to_take, :]
        y_test_sampled = []
        for index in indices_to_take:
            y_test_sampled.append(y_test[index])

        performance_predictor = train_random_forest_regressor(
            test_data_sampled, y_test_sampled,
            outlier_perturbations(numerical_columns, num_repetitions), model,
            scoring)

        evaluate_regressor(
            target_data, y_target,
            outlier_perturbations(numerical_columns, num_repetitions), model,
            performance_predictor, scoring, scoring_name, dataset_name,
            'missing', learner_name, experiment_name)

        # performance_predictor = train_random_forest_regressor(test_data_sampled, y_test_sampled,
        #                                                       missing_perturbations(categorical_columns,
        #                                                                                                'NULL',
        #                                                                                                num_repetitions),
        #                                                       model, scoring)
        #
        # evaluate_regressor(target_data, y_target, missing_perturbations(categorical_columns, 'NULL', num_repetitions),
        #                    model, performance_predictor, scoring, scoring_name, dataset_name, 'missing', learner_name,
        #                    experiment_name)
Ejemplo n.º 2
0
from pp import perturbations
from pp.meta_regressors import train_random_forest_regressor, evaluate_regressor
from pp.serialization import load_black_box


def noise_perturbations():
    _perturbations = []
    for fraction in [0.0, 0.05, 0.25, 0.5, 0.75, 0.99]:
        for _ in range(0, 100):
            _perturbations.append(perturbations.NoisyImage(fraction))

    return _perturbations


models_to_evaluate = []
for learner in ['convnet']:
    for dataset in ['mnist', 'fashion']:
        for score in ['roc_auc']:
            models_to_evaluate.append(learner + '-' + dataset + '-' + score)

for model_to_evaluate in models_to_evaluate:
    experiment_name = 'correct_shift'

    (model, scoring, scoring_name, train_data, y_train, test_data, y_test, target_data, y_target, learner_name,
     dataset_name) = load_black_box(model_to_evaluate)

    predictor = train_random_forest_regressor(test_data, y_test, noise_perturbations(), model, scoring)

    evaluate_regressor(target_data, y_target, noise_perturbations(), model,
                       predictor, scoring, scoring_name, dataset_name, 'noisy', learner_name, experiment_name)
    _perturbations = []
    for fraction_of_troll_tweets in [0.0, 0.05, 0.25, 0.5, 0.75, 0.99]:
        for _ in range(0, 100):
            _perturbations.append(
                perturbations.Leetspeak(fraction_of_troll_tweets, 'content',
                                        'label', 1))

    return _perturbations


models_to_evaluate = []
for learner in ['lr', 'dnn', 'xgb']:
    for dataset in ['trolling']:
        for score in ['roc_auc']:
            models_to_evaluate.append(learner + '-' + dataset + '-' + score)

for model_to_evaluate in models_to_evaluate:
    experiment_name = 'correct_shift'

    (model, scoring, scoring_name, train_data, y_train, test_data, y_test,
     target_data, y_target, learner_name,
     dataset_name) = load_black_box(model_to_evaluate)

    predictor = train_random_forest_regressor(test_data, y_test,
                                              leet_perturbations(), model,
                                              scoring)

    evaluate_regressor(target_data, y_target, leet_perturbations(), model,
                       predictor, scoring, scoring_name, dataset_name,
                       'adversarial', learner_name, experiment_name)
Ejemplo n.º 4
0
from pp.generators import missing_perturbations

threshold = 0.05
num_repetitions = 100

models_to_evaluate = []
for learner in ['xgb']:
    for dataset in ['adult']:
        for score in ['accuracy']:
            models_to_evaluate.append(learner + '-' + dataset + '-' + score)

for model_to_evaluate in models_to_evaluate:
    experiment_name = 'playing_correct_shift'

    (model, scoring, scoring_name, train_data, y_train, test_data, y_test,
     target_data, y_target, learner_name,
     dataset_name) = load_black_box(model_to_evaluate)

    categorical_columns = DATASETS_CATEGORICAL_COLUMNS[dataset_name]

    performance_predictor = train_random_forest_regressor(
        test_data, y_test,
        missing_perturbations(categorical_columns, 'NULL', num_repetitions),
        model, scoring)

    evaluate_regressor(
        target_data, y_target, threshold,
        missing_perturbations(categorical_columns, 'NULL',
                              num_repetitions), model, performance_predictor,
        scoring, scoring_name, dataset_name, 'missing', learner_name)
Ejemplo n.º 5
0

num_repetitions = 100

models_to_evaluate = []
for learner in ['lr']:
    for dataset in ['adult']:
        for score in ['accuracy']:
            models_to_evaluate.append(learner + '-' + dataset + '-' + score)

for model_to_evaluate in models_to_evaluate:

    experiment_name = 'noise_on_features__swapped'

    (model, scoring, scoring_name, train_data, y_train, test_data, y_test, target_data, y_target, learner_name,
     dataset_name) = load_black_box(model_to_evaluate)

    categorical_columns = DATASETS_CATEGORICAL_COLUMNS[dataset_name]
    numerical_columns = DATASETS_NUMERICAL_COLUMNS[dataset_name]

    affected_column_pairs = list(itertools.chain(
        itertools.combinations(numerical_columns, 2), itertools.combinations(categorical_columns, 2)))

    affected_column_pairs = random.sample(affected_column_pairs, 5)
    performance_predictor = train_random_forest_regressor_with_noise(test_data, y_test, num_repetitions,
                                                                     model, scoring)

    evaluate_regressor(target_data, y_target, swapped_perturbations(affected_column_pairs),
                       model, performance_predictor, scoring, scoring_name, dataset_name, 'swapped', learner_name,
                       experiment_name)
Ejemplo n.º 6
0
            perturbations.LassoExperiment(
                'workclass', ['Local-gov', 'Never-worked', 'Without-pay']))
    return _perturbations


#workclass Local-gov is ignored by classifier
#workclass Never-worked is ignored by classifier
#workclass Without-pay is ignored by classifier
#99 1568 in test data
#109 1569 target data

models_to_evaluate = []
for learner in ['lasso']:
    for dataset in ['adult_minimal']:
        for score in ['accuracy']:
            models_to_evaluate.append(learner + '-' + dataset + '-' + score)

for model_to_evaluate in models_to_evaluate:
    experiment_name = 'correct_shift'

    (model, scoring, scoring_name, train_data, y_train, test_data, y_test,
     target_data, y_target, learner_name,
     dataset_name) = load_black_box(model_to_evaluate)

    performance_predictor = train_random_forest_regressor(
        test_data, y_test, missing_perturbations('NULL'), model, scoring)

    evaluate_regressor(target_data, y_target, custom_perturbations(), model,
                       performance_predictor, scoring, scoring_name,
                       dataset_name, 'missing', learner_name, experiment_name)