Beispiel #1
0
def black_box_rejector(options):
    """
    Uses a black box attack to evade the rejector defense.

    Adversarial samples are generated to fool the defended model,
    which only provides the labels when queried.
    Note: Models with rejectors also have a special label 'reject',
    which does not represent a valid misclassification (i.e. the attack
    does not considered being rejected a success).
    """
    attack_p = options['attack_p']
    attack_name = options['attack_name']
    attack_workers = options['attack_workers']
    command = options['command']
    cuda = options['cuda']
    foolbox_model = options['foolbox_model']
    loader = options['loader']
    rejector = options['rejector']
    results_path = options['results_path']

    # The defended_model returns [y1, y2 ... yN, -inf] if it believes
    # that the sample is valid, otherwise it returns [0, 0 ... 0, 1]
    # This means that if the top label is the last one, it was classified as adversarial.
    # On a genuine dataset, this should never happen (if the rejector is perfect).

    defended_model = rejectors.RejectorModel(foolbox_model, rejector)

    # detectors.Undetected() adds the condition that the top label must not be the last
    # Note: (foolbox.Criterion and foolbox.Criterion) should give a combined criterion, but
    # apparently it doesn't work. The documentation recommends using "&"

    criterion = foolbox.criteria.CombinedCriteria(
        foolbox.criteria.Misclassification(), rejectors.Unrejected())

    # The attack will be against the defended model

    attack = parsing.parse_attack(attack_name, attack_p, criterion)

    samples_count, correct_count, successful_attack_count, distances, _, _ = tests.attack_test(
        defended_model,
        loader,
        attack,
        attack_p,
        cuda,
        attack_workers,
        name='Black-Box Rejector Attack')

    info = utils.attack_statistics_info(samples_count, correct_count,
                                        successful_attack_count, distances)

    header = ['Distances']

    utils.save_results(results_path,
                       table=[distances],
                       command=command,
                       info=info,
                       header=header)
Beispiel #2
0
def substitute_model(options):
    """
    Uses BPDA with a substitute model to attack the custom model.

    BPDA uses predictions from the defended model and gradients
    from the substitute model.
    Note: We could technically attack the custom model directly,
    since most models support gradient computation, but we are
    assuming that we do not have access to the gradients. 
    """
    attack_p = options['attack_p']
    attack_name = options['attack_name']
    attack_workers = options['attack_workers']
    command = options['command']
    cuda = options['cuda']
    custom_foolbox_model = options['custom_foolbox_model']
    loader = options['loader']
    results_path = options['results_path']
    substitute_foolbox_model = options['substitute_foolbox_model']

    if substitute_foolbox_model.num_classes(
    ) != custom_foolbox_model.num_classes():
        raise click.BadArgumentUsage(
            'The substitute model ({} classes) must have the same '
            'number of classes as the custom model ({} classes)'.format(
                substitute_foolbox_model.num_classes(),
                custom_foolbox_model.num_classes()))

    composite_model = foolbox.models.CompositeModel(custom_foolbox_model,
                                                    substitute_foolbox_model)

    criterion = foolbox.criteria.Misclassification()

    # The attack will be against the substitute model with estimated gradients

    attack = parsing.parse_attack(attack_name, attack_p, criterion)

    samples_count, correct_count, successful_attack_count, distances, _, _ = tests.attack_test(
        composite_model,
        loader,
        attack,
        attack_p,
        cuda,
        attack_workers,
        name='Substitute Model Attack')

    info = utils.attack_statistics_info(samples_count, correct_count,
                                        successful_attack_count, distances)

    header = ['Distances']

    utils.save_results(results_path,
                       table=[distances],
                       command=command,
                       info=info,
                       header=header)
Beispiel #3
0
def substitute_preprocessor(options):
    """
    Uses BPDA with a substitute model to evade the preprocessor defense.

    BPDA uses predictions from the defended model and gradients
    from the substitute model.
    """
    attack_p = options['attack_p']
    attack_name = options['attack_name']
    attack_workers = options['attack_workers']
    command = options['command']
    cuda = options['cuda']
    foolbox_model = options['foolbox_model']
    loader = options['loader']
    results_path = options['results_path']
    preprocessor = options['preprocessor']
    substitute_foolbox_model = options['substitute_foolbox_model']

    defended_model = defenses.PreprocessorDefenseModel(foolbox_model,
                                                       preprocessor)

    if substitute_foolbox_model.num_classes() != defended_model.num_classes():
        raise click.BadArgumentUsage(
            'The substitute model ({} classes) must have the same '
            'number of classes as the defended model ({} classes)'.format(
                substitute_foolbox_model.num_classes(),
                defended_model.num_classes()))

    composite_model = foolbox.models.CompositeModel(defended_model,
                                                    substitute_foolbox_model)

    criterion = foolbox.criteria.Misclassification()

    # The attack will be against the defended model with estimated gradients

    attack = parsing.parse_attack(attack_name, attack_p, criterion)

    samples_count, correct_count, successful_attack_count, distances, _, _ = tests.attack_test(
        composite_model,
        loader,
        attack,
        attack_p,
        cuda,
        attack_workers,
        name='Substitute Preprocessor Attack')

    info = utils.attack_statistics_info(samples_count, correct_count,
                                        successful_attack_count, distances)

    header = ['Distances']

    utils.save_results(results_path,
                       table=[distances],
                       command=command,
                       info=info,
                       header=header)
Beispiel #4
0
def shallow_preprocessor(options):
    """
    Simply evaluates the effectiveness of the preprocessor defense, without additional
    attack strategies.
    
    Adversarial samples are generated to fool the undefended model.
    """
    attack_p = options['attack_p']
    attack_name = options['attack_name']
    attack_workers = options['attack_workers']
    command = options['command']
    cuda = options['cuda']
    foolbox_model = options['foolbox_model']
    loader = options['loader']
    results_path = options['results_path']
    preprocessor = options['preprocessor']

    criterion = foolbox.criteria.Misclassification()

    # The attack will be against the undefended model

    attack = parsing.parse_attack(attack_name, attack_p, criterion)

    defended_model = defenses.PreprocessorDefenseModel(foolbox_model,
                                                       preprocessor)

    samples_count, correct_count, successful_attack_count, distances = tests.shallow_defense_test(
        foolbox_model,
        loader,
        attack,
        attack_p,
        defended_model,
        cuda,
        attack_workers,
        name='Shallow Preprocessor Attack')

    info = utils.attack_statistics_info(samples_count, correct_count,
                                        successful_attack_count, distances)

    header = ['Distances']

    utils.save_results(results_path,
                       table=[distances],
                       command=command,
                       info=info,
                       header=header)
Beispiel #5
0
def black_box_model(options):
    """
    Uses a black box attack against the custom model.

    Adversarial samples are generated to fool the custom model,
    which only provides the labels when queried.

    Note: We could technically use the gradients,
    since most models support gradient computation, but we are
    assuming that we do not have access to them. 
    """
    attack_p = options['attack_p']
    attack_name = options['attack_name']
    attack_workers = options['attack_workers']
    command = options['command']
    cuda = options['cuda']
    custom_foolbox_model = options['custom_foolbox_model']
    loader = options['loader']
    results_path = options['results_path']

    criterion = foolbox.criteria.Misclassification()

    # The attack will be against the defended (custom) model

    attack = parsing.parse_attack(attack_name, attack_p, criterion)

    samples_count, correct_count, successful_attack_count, distances, _, _ = tests.attack_test(
        custom_foolbox_model,
        loader,
        attack,
        attack_p,
        cuda,
        attack_workers,
        name='Black-Box Model Attack')

    info = utils.attack_statistics_info(samples_count, correct_count,
                                        successful_attack_count, distances)
    header = ['Distances']

    utils.save_results(results_path,
                       table=[distances],
                       command=command,
                       info=info,
                       header=header)
Beispiel #6
0
def black_box_preprocessor(options):
    """
    Uses a black box attack to evade the preprocessor defense.

    Adversarial samples are generated to fool the defended model,
    which only provides the labels when queried.
    """
    attack_p = options['attack_p']
    attack_name = options['attack_name']
    attack_workers = options['attack_workers']
    command = options['command']
    cuda = options['cuda']
    foolbox_model = options['foolbox_model']
    loader = options['loader']
    results_path = options['results_path']
    preprocessor = options['preprocessor']

    defended_model = defenses.PreprocessorDefenseModel(foolbox_model,
                                                       preprocessor)

    criterion = foolbox.criteria.Misclassification()

    # The attack will be against the defended model
    attack = parsing.parse_attack(attack_name, attack_p, criterion)

    samples_count, correct_count, successful_attack_count, distances, _, _ = tests.attack_test(
        defended_model,
        loader,
        attack,
        attack_p,
        cuda,
        attack_workers,
        name='Black-Box Preprocessor Attack')

    info = utils.attack_statistics_info(samples_count, correct_count,
                                        successful_attack_count, distances)

    header = ['Distances']

    utils.save_results(results_path,
                       table=[distances],
                       command=command,
                       info=info,
                       header=header)