コード例 #1
0
def main():
    parser = ap.ArgumentParser()
    parser.add_argument(
        '-m', '--model', type=str, required=True,
        help='a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format')
    parser.add_argument(
        '-p', '--param', type=str, required=True,
        help='a JSON config file which contains the parameters for the attacks')
    parser.add_argument(
        '-n', '--number', type=int, default=1000,
        help='the number of adv. examples want to generate. (if more than test set, it uses all test examples.)')
    parser.add_argument(
        '-s', '--seed', type=int, default=4096,
        help='the seed for random number generator')
    parser.add_argument(
        '-v', '--verbose', action='store_true', default=False,
        help='set logger level to debug')
    parser.add_argument(
        '-l', '--savelog', action='store_true', default=False,
        help='save logging file')
    parser.add_argument(
        '-w', '--overwrite', action='store_true', default=False,
        help='overwrite the existing file')
    parser.add_argument(
        '-B', '--bim', action='store_true', default=False,
        help='Apply BIM attack')
    parser.add_argument(
        '-C', '--carlini', action='store_true', default=False,
        help='Apply Carlini L2 attack')
    parser.add_argument(
        '-D', '--deepfool', action='store_true', default=False,
        help='Apply DeepFool attack')
    parser.add_argument(
        '-F', '--fgsm', action='store_true', default=False,
        help='Apply FGSM attack')
    parser.add_argument(
        '-S', '--saliency', action='store_true', default=False,
        help='Apply Saliency Map attack')
    args = parser.parse_args()
    model_file = args.model
    attack_param_file = args.param
    num_adv = args.number
    seed = args.seed
    verbose = args.verbose
    save_log = args.savelog
    overwrite = args.overwrite

    # Which attack should apply?
    # use binary encoding for attacks
    my_attacks = np.zeros(5, dtype=np.int8)
    attack_list = np.array(
        ['FGSM', 'BIM', 'Carlini', 'DeepFool', 'Saliency'])
    my_attacks[0] = 1 if args.fgsm else 0
    my_attacks[1] = 1 if args.bim else 0
    my_attacks[2] = 1 if args.carlini else 0
    my_attacks[3] = 1 if args.deepfool else 0
    my_attacks[4] = 1 if args.saliency else 0
    selected_attacks = attack_list[np.where(my_attacks == 1)[0]]

    # check file
    for f in [model_file, attack_param_file]:
        if not os.path.exists(f):
            raise FileNotFoundError('{} does not exist!'.format(f))
    dirname = os.path.dirname(model_file)
    model_name, dname = parse_model_filename(model_file)

    with open(attack_param_file) as param_json:
        att_params = json.load(param_json)

    # set logging config. Run this before logging anything!
    set_logging('attack', dname, verbose, save_log)

    # show parameters
    print('[attack] Start generating {} adv. samples from {} model...'.format(
        num_adv, model_name))
    logger.info('Start at   : %s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('model file :%s', model_file)
    logger.info('model      :%s', model_name)
    logger.info('dataset    :%s', dname)
    logger.info('params     :%s', attack_param_file)
    logger.info('num_adv    :%r', num_adv)
    logger.info('seed       :%d', seed)
    logger.info('verbose    :%r', verbose)
    logger.info('save_log   :%r', save_log)
    logger.info('overwrite  :%r', overwrite)
    logger.info('dirname    :%r', dirname)
    logger.info('attacks    :%s', ', '.join(selected_attacks))

    if len(selected_attacks) == 0:
        logger.warning('No attack is selected. Exit.')
        sys.exit(0)

    # reset seed
    master_seed(seed)

    # set DataContainer and ModelContainer
    dc = get_data_container(dname)
    Model = get_model(model_name)
    # there models require extra keyword arguments
    if dname in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'):
        num_classes = dc.num_classes
        num_features = dc.dim_data[0]
        kwargs = {
            'num_features': num_features,
            'hidden_nodes': num_features*4,
            'num_classes': num_classes,
        }
        model = Model(**kwargs)
    else:
        model = Model()
    logger.info('Use %s model', model.__class__.__name__)
    mc = ModelContainerPT(model, dc)
    mc.load(model_file)
    accuracy = mc.evaluate(dc.x_test, dc.y_test)
    logger.info('Accuracy on test set: %f', accuracy)

    run_attacks(mc,
                selected_attacks,
                att_params,
                num_adv,
                model_name + '_' + dname,
                overwrite)
コード例 #2
0
def main():
    data_name = 'MNIST'
    set_logging('advTraining', data_name, True, True)

    model_file = os.path.join('save', 'MnistCnnV2_MNIST_e50.pt')
    Model = get_model('MnistCnnV2')
    classifier = Model()

    dc = DataContainer(DATASET_LIST[data_name], get_data_path())
    dc()
    classifier_mc = ModelContainerPT(classifier, dc)
    classifier_mc.load(model_file)
    accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test)
    logger.info('Accuracy on test set: %f', accuracy)

    attack = BIMContainer(classifier_mc,
                          eps=0.3,
                          eps_step=0.1,
                          max_iter=100,
                          targeted=False)

    adv_trainer = AdversarialTraining(classifier_mc, [attack])
    # adv_trainer.fit(max_epochs=30, batch_size=128, ratio=0.1)
    # adv_trainer.save('AdvTrain_MnistCnnV2_MNIST', overwrite=True)

    file_name = os.path.join('save', 'AdvTrain_MnistCnnV2_MNIST.pt')
    adv_trainer.load(file_name)

    x = np.load(os.path.join('save', 'MnistCnnV2_MNIST_BIM_x.npy'),
                allow_pickle=False)
    y = np.load(os.path.join('save', 'MnistCnnV2_MNIST_BIM_y.npy'),
                allow_pickle=False)
    blocked_indices = adv_trainer.detect(x, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(x),
                'clean')

    adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_BIM_adv.npy'),
                  allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'BIM', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv),
                'BIM')

    adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_Carlini_adv.npy'),
                  allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'Carlini', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv),
                'Carlini')

    adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_DeepFool_adv.npy'),
                  allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'DeepFool', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv),
                'DeepFool')

    adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_FGSM_adv.npy'),
                  allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'FGSM', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv),
                'FGSM')

    adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_Saliency_adv.npy'),
                  allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'Saliency', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv),
                'Saliency')
コード例 #3
0
def main():
    parser = ap.ArgumentParser()
    parser.add_argument('-d',
                        '--dataset',
                        type=str,
                        required=True,
                        help='Name of the dataset')
    parser.add_argument(
        '-p',
        '--param',
        type=str,
        required=True,
        help='a JSON config file which contains the parameters for the attacks'
    )
    parser.add_argument('-s',
                        '--seed',
                        type=int,
                        default=4096,
                        help='the seed for random number generator')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help='set logger level to debug')
    parser.add_argument('-l',
                        '--savelog',
                        action='store_true',
                        default=False,
                        help='save logging file')
    parser.add_argument('-F',
                        '--fgsm',
                        action='store_true',
                        default=False,
                        help='Apply FGSM attack')
    parser.add_argument('-B',
                        '--bim',
                        action='store_true',
                        default=False,
                        help='Apply BIM attack')
    parser.add_argument('-D',
                        '--deepfool',
                        action='store_true',
                        default=False,
                        help='Apply DeepFool attack')
    parser.add_argument('-C',
                        '--carlini',
                        action='store_true',
                        default=False,
                        help='Apply Carlini L2 attack')
    args = parser.parse_args()
    data_name = args.dataset
    param_file = args.param
    seed = args.seed
    verbose = args.verbose
    save_log = args.savelog

    # set logging config. Run this before logging anything!
    set_logging(LOG_NAME, data_name, verbose, save_log)

    # Which attack should apply?
    attack_list = []
    if args.fgsm:
        attack_list.append('FGSM')
    if args.bim:
        attack_list.append('BIM')
    if args.deepfool:
        attack_list.append('DeepFool')
    if args.carlini:
        attack_list.append('Carlini')

    # Quit, if there is nothing to do.
    if len(attack_list) == 0:
        logger.warning('Neither received any filter nor any attack. Exit')
        sys.exit(0)

    if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'):
        model_name = 'IrisNN'
    if data_name == 'BreastCancerWisconsin':
        model_name = 'BCNN'

    y_file = os.path.join('save',
                          f'{model_name}_{data_name}_{attack_list[0]}_y.npy')
    attack_files = [
        os.path.join('save',
                     f'{model_name}_{data_name}_{attack_list[0]}_x.npy')
    ]
    for attack_name in attack_list:
        attack_files.append(
            os.path.join('save',
                         f'{model_name}_{data_name}_{attack_name}_adv.npy'))
    # the 1st file this the clean inputs
    attack_list = ['clean'] + attack_list

    # load parameters for Applicability Domain
    with open(param_file) as param_json:
        params = json.load(param_json)

    # show parameters
    print(f'[{LOG_NAME}] Running tree model...')
    logger.info('Start at    :%s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('model       :%s', model_name)
    logger.info('dataset     :%s', data_name)
    logger.info('param file  :%s', param_file)
    logger.info('seed        :%d', seed)
    logger.info('verbose     :%r', verbose)
    logger.info('save_log    :%r', save_log)
    logger.info('attacks     :%s', ', '.join(attack_list))
    logger.debug('params     :%s', str(params))

    # check files
    for file_name in [y_file] + attack_files:
        if not os.path.exists(file_name):
            logger.error('%s does not exist!', file_name)
            raise FileNotFoundError('{} does not exist!'.format(file_name))

    # reset seed
    master_seed(seed)

    # select data
    dc = get_data_container(
        data_name,
        use_shuffle=True,
        use_normalize=True,
    )

    # train the model
    classifier = ExtraTreeClassifier(
        criterion='gini',
        splitter='random',
    )
    mc = ModelContainerTree(classifier, dc)
    mc.fit()

    x = np.load(attack_files[0], allow_pickle=False)
    art_classifier = SklearnClassifier(classifier)
    attack = DecisionTreeAttack(art_classifier)
    adv = attack.generate(x)

    ad = ApplicabilityDomainContainer(mc, mc.hidden_model, **params)
    ad.fit()

    # generate adversarial examples
    y = np.load(y_file, allow_pickle=False)

    accuracy = mc.evaluate(adv, y)
    logger.info('Accuracy on DecisionTreeAttack set: %f', accuracy)
    blocked_indices = ad.detect(adv)
    logger.info('Blocked %d/%d samples on DecisionTreeAttack',
                len(blocked_indices), len(adv))

    # traverse other attacks
    for i in range(len(attack_list)):
        adv_file = attack_files[i]
        adv_name = attack_list[i]
        logger.debug('Load %s...', adv_file)
        adv = np.load(adv_file, allow_pickle=False)
        accuracy = mc.evaluate(adv, y)
        logger.info('Accuracy on %s set: %f', adv_name, accuracy)
        blocked_indices = ad.detect(adv, return_passed_x=False)
        logger.info('Blocked %d/%d samples on %s', len(blocked_indices),
                    len(adv), adv_name)
コード例 #4
0
def main():
    parser = ap.ArgumentParser()
    parser.add_argument(
        '-d', '--dataset', type=str, required=True,
        help='Name of the dataset')
    parser.add_argument(
        '--depth', type=int, default=0,
        help='The image color depth for input images. Apply Binary-Depth filter when receives a parameter')
    parser.add_argument(
        '-s', '--sigma', type=float, default=0,
        help='The Standard Deviation of Normal distribution. Apply Gaussian Noise filter when receives a parameter')
    parser.add_argument(
        '-k', '--kernelsize', type=int, default=0,
        help='The kernel size for Median filter. Apply median filter when receives a parameter')
    parser.add_argument(
        '-i', '--iteration', type=int, default=MAX_ITERATIONS,
        help='the number of iterations that the experiment will repeat')
    parser.add_argument(
        '-v', '--verbose', action='store_true', default=False,
        help='set logger level to debug')
    parser.add_argument(
        '-l', '--savelog', action='store_true', default=False,
        help='save logging file')
    parser.add_argument(
        '-w', '--overwrite', action='store_true', default=False,
        help='overwrite the existing file')
    args = parser.parse_args()
    data_name = args.dataset
    max_iterations = args.iteration
    bit_depth = args.depth
    sigma = args.sigma
    kernel_size = args.kernelsize
    verbose = args.verbose
    save_log = args.savelog
    overwrite = args.overwrite

    # set logging config. Run this before logging anything!
    set_logging(LOG_NAME, data_name, verbose, save_log)

    # Which filter should apply?
    filter_list = []
    if bit_depth > 0:
        filter_list.append('binary')
    if sigma > 0:
        filter_list.append('normal')
    if kernel_size > 0:
        filter_list.append('median')

    result_filename = name_handler(
        os.path.join('save', LOG_NAME + '_' + data_name + '_' + 'tree'),
        'csv',
        overwrite=overwrite)

    # show parameters
    print(f'[{LOG_NAME}] Running tree model...')
    logger.info('Start at    :%s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('dataset     :%s', data_name)
    logger.info('iterations  :%d', max_iterations)
    logger.info('bit_depth   :%d', bit_depth)
    logger.info('sigma       :%f', sigma)
    logger.info('kernel_size :%d', kernel_size)
    logger.info('verbose     :%r', verbose)
    logger.info('save_log    :%r', save_log)
    logger.info('overwrite   :%r', overwrite)
    logger.info('filename    :%s', result_filename)

    # NOTE: Why does train all adversarial examples not work?
    # The classification models are depended on the training set. They are not
    # identical, thus adversarial examples are also not the same.

    with open(result_filename, 'w') as file:
        file.write(','.join(['Index', 'Clean', 'DecisionTreeAttack']) + '\n')
        i = 1
        while i <= max_iterations:
            num_blk_clean, num_blk_adv = experiment(
                data_name, filter_list, bit_depth, sigma, kernel_size)
            if num_blk_adv == -1:
                continue
            i += 1
            file.write(f'{i},{num_blk_clean},{num_blk_adv}\n')
        file.close()
コード例 #5
0
def main():
    parser = ap.ArgumentParser()
    parser.add_argument('-s',
                        '--size',
                        type=int,
                        required=True,
                        help='the number of sample size')
    parser.add_argument('-f',
                        '--features',
                        type=int,
                        required=True,
                        help='the number of features')
    parser.add_argument('-c',
                        '--classes',
                        type=int,
                        default=2,
                        help='the number of classes')
    parser.add_argument(
        '-i',
        '--iteration',
        type=int,
        default=MAX_ITERATIONS,
        help='the number of iterations that the experiment will repeat')
    parser.add_argument('-e',
                        '--epoch',
                        type=int,
                        required=True,
                        help='the number of max epochs for training')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help='set logger level to debug')
    parser.add_argument('-l',
                        '--savelog',
                        action='store_true',
                        default=False,
                        help='save logging file')
    parser.add_argument('-w',
                        '--overwrite',
                        action='store_true',
                        default=False,
                        help='overwrite the existing file')

    args = parser.parse_args()
    sample_size = args.size
    num_features = args.features
    num_classes = args.classes
    max_iterations = args.iteration
    max_epochs = args.epoch
    verbose = args.verbose
    save_log = args.savelog
    overwrite = args.overwrite

    # set logging config. Run this before logging anything!
    dname = f'SyntheticS{sample_size}F{num_features}C{num_classes}'
    set_logging(LOG_NAME, dname, verbose, save_log)

    print('[{}] Start experiment on {}...'.format(LOG_NAME, dname))
    logger.info('Start at    :%s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('dataset     :%s', dname)
    logger.info('train size  :%d', sample_size)
    logger.info('num features:%d', num_features)
    logger.info('num classes :%d', num_classes)
    logger.info('iterations  :%d', max_iterations)
    logger.info('max_epochs  :%d', max_epochs)
    logger.info('verbose     :%r', verbose)
    logger.info('save_log    :%r', save_log)
    logger.info('overwrite   :%r', overwrite)

    result_file = name_handler(os.path.join(
        'save', f'{LOG_NAME}_{dname}_i{max_iterations}'),
                               'csv',
                               overwrite=overwrite)

    adv_file = name_handler(os.path.join('save',
                                         f'{LOG_NAME}_{dname}_AdvExamples'),
                            'csv',
                            overwrite=overwrite)

    adv_file = open(adv_file, 'w')
    adv_file.write(','.join(TITLE_ADV) + '\n')
    res_file = open(result_file, 'w')
    res_file.write(','.join(TITLE_RESULTS) + '\n')
    for i in range(max_iterations):
        since = time.time()
        # generate synthetic data
        x, y = make_classification(
            n_samples=sample_size + 1000,
            n_features=num_features,
            n_informative=num_classes,
            n_redundant=0,
            n_classes=num_classes,
            n_clusters_per_class=1,
        )

        # normalize data
        x_max = np.max(x, axis=0)
        x_min = np.min(x, axis=0)
        # NOTE: Carlini attack expects the data in range [0, 1]
        # x_mean = np.mean(x, axis=0)
        # x = scale_normalize(x, x_min, x_max, x_mean)
        x = scale_normalize(x, x_min, x_max)

        # training/test split
        # NOTE: test set has fixed size
        x_train = np.array(x[:-1000], dtype=np.float32)
        y_train = np.array(y[:-1000], dtype=np.long)
        x_test = np.array(x[-1000:], dtype=np.float32)
        y_test = np.array(y[-1000:], dtype=np.long)

        # create data container
        data_dict = get_synthetic_dataset_dict(sample_size + 1000, num_classes,
                                               num_features)
        dc = DataContainer(data_dict, get_data_path())

        # assign data manually
        dc.x_train = x_train
        dc.y_train = y_train
        dc.x_test = x_test
        dc.y_test = y_test

        experiment(i, dc, max_epochs, adv_file, res_file)
        time_elapsed = time.time() - since
        print('Completed {} [{}/{}]: {:d}m {:2.1f}s'.format(
            dname, i + 1, max_iterations, int(time_elapsed // 60),
            time_elapsed % 60))

    adv_file.close()
    res_file.close()
コード例 #6
0
def main():
    parser = ap.ArgumentParser()
    parser.add_argument(
        '-d', '--dataset', type=str, required=True,
        help='Name of the dataset')
    parser.add_argument(
        '-i', '--iteration', type=int, default=MAX_ITERATIONS,
        help='the number of iterations that the experiment will repeat')
    parser.add_argument(
        '-p', '--param', type=str, required=True,
        help='a JSON config file which contains the parameters for the applicability domain')
    parser.add_argument(
        '-v', '--verbose', action='store_true', default=False,
        help='set logger level to debug')
    parser.add_argument(
        '-l', '--savelog', action='store_true', default=False,
        help='save logging file')
    parser.add_argument(
        '-w', '--overwrite', action='store_true', default=False,
        help='overwrite the existing file')
    args = parser.parse_args()
    data_name = args.dataset
    max_iterations = args.iteration
    param_file = args.param
    verbose = args.verbose
    save_log = args.savelog
    overwrite = args.overwrite

    # set logging config. Run this before logging anything!
    set_logging(LOG_NAME, data_name, verbose, save_log)

    # load parameters for Applicability Domain
    with open(param_file) as param_json:
        params = json.load(param_json)

    result_filename = name_handler(
        os.path.join('save', LOG_NAME + '_' + data_name + '_' + 'tree'),
        'csv',
        overwrite=overwrite)

    # show parameters
    print(f'[{LOG_NAME}] Running tree model...')
    logger.info('Start at    :%s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('dataset     :%s', data_name)
    logger.info('iterations  :%d', max_iterations)
    logger.info('param file  :%s', param_file)
    logger.info('verbose     :%r', verbose)
    logger.info('save_log    :%r', save_log)
    logger.info('overwrite   :%r', overwrite)
    logger.info('filename    :%s', result_filename)
    logger.debug('params     :%s', str(params))

    # NOTE: Why does train all adversarial examples not work?
    # The classification models are depended on the training set. They are not
    # identical, thus adversarial examples are also not the same.

    with open(result_filename, 'w') as file:
        file.write(','.join(['Index', 'Clean', 'DecisionTreeAttack']) + '\n')
        for i in range(max_iterations):
            num_blk_clean, num_blk_adv = experiment(data_name, params)
            file.write(f'{i},{num_blk_clean},{num_blk_adv}\n')
        file.close()
コード例 #7
0
def main():
    parser = ap.ArgumentParser()
    parser.add_argument(
        '-m', '--model', type=str, required=True,
        help='a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format')
    parser.add_argument(
        '-p', '--param', type=str, required=True,
        help='a JSON config file which contains the parameters for the cross validation')
    parser.add_argument(
        '-a', '--adv', type=str,
        help='file name of adv. examples for testing. If it\'s none, the program will ignore testing. The name should in "<model>_<dataset>_<attack>_adv.npy" format')
    parser.add_argument(
        '-s', '--seed', type=int, default=4096,
        help='the seed for random number generator')
    parser.add_argument(
        '-v', '--verbose', action='store_true', default=False,
        help='set logger level to debug')
    parser.add_argument(
        '-l', '--savelog', action='store_true', default=False,
        help='save logging file')
    parser.add_argument(
        '-i', '--ignore', action='store_true', default=False,
        help='Ignore saving the results. Only returns the results from terminal.')
    parser.add_argument(
        '-w', '--overwrite', action='store_true', default=False,
        help='overwrite the existing file')
    args = parser.parse_args()
    model_file = args.model
    param_file = args.param
    adv_file = args.adv
    seed = args.seed
    verbose = args.verbose
    save_log = args.savelog
    does_ignore = args.ignore
    overwrite = args.overwrite

    model_name, data_name = parse_model_filename(model_file)

    # set logging config. Run this before logging anything!
    set_logging('cross_validation', data_name, verbose, save_log)

    # check files
    for file_path in [model_file, param_file]:
        if not os.path.exists(file_path):
            logger.warning('%s does not exist. Exit.', file_path)
            sys.exit(0)
    if adv_file is not None and not os.path.exists(adv_file):
        logger.warning('%s does not exist. Exit.', adv_file)
        sys.exit(0)

    # read parameters
    with open(param_file) as param_json:
        params = json.load(param_json)

    # show parameters
    print('[cv] Running cross validation on {} with {}...'.format(
        model_file, data_name))
    logger.info('Start at      : %s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('model file    :%s', model_file)
    logger.info('adv file      :%s', adv_file)
    logger.info('model         :%s', model_name)
    logger.info('dataset       :%s', data_name)
    logger.info('param file    :%s', param_file)
    logger.info('seed          :%d', seed)
    logger.info('verbose       :%r', verbose)
    logger.info('save_log      :%r', save_log)
    logger.info('Ignore saving :%r', does_ignore)
    logger.info('overwrite     :%r', overwrite)
    logger.debug('params       :%s', str(params))

    # load parameters
    k_range = params['k_range']
    z_range = params['z_range']
    kappa_range = params['kappa_range']
    gamma_range = params['gamma_range']
    epsilon = params['epsilon']
    num_folds = params['num_folds']
    batch_size = params['batch_size']
    sample_ratio = params['sample_ratio']
    logger.info('k_range       :%s', str(k_range))
    logger.info('z_range       :%s', str(z_range))
    logger.info('kappa_range   :%s', str(kappa_range))
    logger.info('gamma_range   :%s', str(gamma_range))
    logger.info('epsilon       :%.1f', epsilon)
    logger.info('num_folds     :%d', num_folds)
    logger.info('batch_size    :%d', batch_size)
    logger.info('sample_ratio  :%.1f', sample_ratio)

    # reset seed
    master_seed(seed)

    dc = DataContainer(DATASET_LIST[data_name], get_data_path())
    dc(shuffle=True, normalize=True, size_train=0.8)
    logger.info('Sample size: %d', len(dc))

    Model = get_model(model_name)
    # there models require extra keyword arguments
    if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'):
        num_classes = dc.num_classes
        num_features = dc.dim_data[0]
        kwargs = {
            'num_features': num_features,
            'hidden_nodes': num_features*4,
            'num_classes': num_classes,
        }
        model = Model(**kwargs)
    else:
        model = Model()
    logger.info('Use %s model', model.__class__.__name__)
    mc = ModelContainerPT(model, dc)
    mc.load(model_file)
    accuracy = mc.evaluate(dc.x_test, dc.y_test)
    logger.info('Accuracy on test set: %f', accuracy)

    ad = ApplicabilityDomainContainer(
        mc, hidden_model=model.hidden_model, sample_ratio=sample_ratio)
    cross_validation = CrossValidation(
        ad,
        num_folds=num_folds,
        k_range=k_range,
        z_range=z_range,
        kappa_range=kappa_range,
        gamma_range=gamma_range,
        epsilon=epsilon,
    )
    bim_attack = BIMContainer(
        mc,
        eps=0.3,
        eps_step=0.1,
        max_iter=100,
        targeted=False,
    )
    cross_validation.fit(bim_attack)

    # test optimal parameters
    if adv_file is not None:
        postfix = ['adv', 'pred', 'x', 'y']
        data_files = [adv_file.replace('_adv', '_' + s) for s in postfix]
        adv = np.load(data_files[0], allow_pickle=False)
        pred_adv = np.load(data_files[1], allow_pickle=False)
        x = np.load(data_files[2], allow_pickle=False)
        pred = np.load(data_files[3], allow_pickle=False)

        # fetch optimal parameters
        ad = ApplicabilityDomainContainer(
            mc,
            hidden_model=model.hidden_model,
            k2=cross_validation.k2,
            reliability=cross_validation.reliability,
            sample_ratio=sample_ratio,
            kappa=cross_validation.kappa,
            confidence=cross_validation.confidence,
        )
        logger.info('Params: %s', str(ad.params))
        ad.fit()
        blocked_indices = ad.detect(x, pred, return_passed_x=False)
        logger.info('Blocked %d/%d on clean data',
                    len(blocked_indices), len(x))
        blocked_indices = ad.detect(adv, pred_adv, return_passed_x=False)
        logger.info('Blocked %d/%d on adv. examples.',
                    len(blocked_indices), len(adv))

    # save results
    if not does_ignore:
        file_name = name_handler(
            model_name + '_' + data_name, 'csv', overwrite=overwrite)
        cross_validation.save(file_name)
コード例 #8
0
def main():
    parser = ap.ArgumentParser()
    parser.add_argument(
        '-a',
        '--adv',
        type=str,
        required=True,
        help=
        'file name for adv. examples. The name should in "<model>_<dataset>_<attack>_adv.npy" format'
    )
    parser.add_argument(
        '-p',
        '--param',
        type=str,
        required=True,
        help='a JSON config file which contains the parameters for the attacks'
    )
    parser.add_argument(
        '-m',
        '--model',
        type=str,
        required=True,
        help=
        'a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format'
    )
    parser.add_argument('-s',
                        '--seed',
                        type=int,
                        default=4096,
                        help='the seed for random number generator')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help='set logger level to debug')
    parser.add_argument('-l',
                        '--savelog',
                        action='store_true',
                        default=False,
                        help='save logging file')
    args = parser.parse_args()
    adv_file = args.adv
    param_file = args.param
    model_file = args.model
    seed = args.seed
    verbose = args.verbose
    save_log = args.savelog
    check_clean = True

    # build filenames from the root file
    postfix = ['adv', 'pred', 'x', 'y']
    data_files = [adv_file.replace('_adv', '_' + s) for s in postfix]
    model_name, dname = parse_model_filename(adv_file)

    # set logging config. Run this before logging anything!
    set_logging('defence_ad', dname, verbose, save_log)

    # check adv. examples and parameter config files
    for f in data_files[:2] + [param_file]:
        if not os.path.exists(f):
            logger.warning('%s does not exist. Exit.', f)
            sys.exit(0)
    # check clean samples
    for f in data_files[-2:]:
        if not os.path.exists(f):
            logger.warning(
                'Cannot load files for clean samples. Skip checking clean set.'
            )
            check_clean = False

    with open(param_file) as param_json:
        params = json.load(param_json)

    # show parameters
    print(
        '[defend_ad] Running applicability domain on {}...'.format(model_name))
    logger.info('Start at    : %s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('model file  :%s', model_file)
    logger.info('adv file    :%s', adv_file)
    logger.info('model       :%s', model_name)
    logger.info('dataset     :%s', dname)
    logger.info('param file  :%s', param_file)
    logger.info('seed        :%d', seed)
    logger.info('verbose     :%r', verbose)
    logger.info('save_log    :%r', save_log)
    logger.info('check_clean :%r', check_clean)
    logger.debug('params     : %s', str(params))

    # reset seed
    master_seed(seed)

    # set DataContainer and ModelContainer
    dc = get_data_container(dname)
    Model = get_model(model_name)
    # there models require extra keyword arguments
    if dname in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'):
        num_classes = dc.num_classes
        num_features = dc.dim_data[0]
        kwargs = {
            'num_features': num_features,
            'hidden_nodes': num_features * 4,
            'num_classes': num_classes,
        }
        model = Model(**kwargs)
    else:
        model = Model()
    logger.info('Use %s model', model.__class__.__name__)
    mc = ModelContainerPT(model, dc)

    mc.load(model_file)
    accuracy = mc.evaluate(dc.x_test, dc.y_test)
    logger.info('Accuracy on test set: %f', accuracy)

    # preform defence
    ad = ApplicabilityDomainContainer(mc,
                                      hidden_model=model.hidden_model,
                                      **params)
    ad.fit()

    result_prefix = [model_file] \
        + [adv_file] \
        + [params['k2']] \
        + [params['reliability']] \
        + [params['sample_ratio']] \
        + [params['confidence']] \
        + [params['kappa']] \
        + [params['disable_s2']]

    # check clean
    if check_clean:
        x = np.load(data_files[2], allow_pickle=False)
        y = np.load(data_files[3], allow_pickle=False)
        x_passed, blk_idx, blocked_counts = detect(ad, 'clean samples', x, y)
        result = result_prefix + ['clean'] + blocked_counts
        result_clean = '[result]' + ','.join([str(r) for r in result])

    # check adversarial examples
    adv = np.load(data_files[0], allow_pickle=False)
    pred = np.load(data_files[1], allow_pickle=False)
    adv_passed, adv_blk_idx, blocked_counts = detect(ad, 'adv. examples', adv,
                                                     pred)
    result = result_prefix + ['adv'] + blocked_counts
    result = '[result]' + ','.join([str(r) for r in result])
    if check_clean:
        logger.info(result_clean)
    logger.info(result)
コード例 #9
0
def main():
    parser = ap.ArgumentParser()
    parser.add_argument(
        '-d', '--dataset', type=str, required=True,
        help='Name of the dataset')
    parser.add_argument(
        '-m', '--model', type=str, required=True,
        help='Name of the model')
    parser.add_argument(
        '-i', '--iteration', type=int, default=MAX_ITERATIONS,
        help='the number of iterations that the experiment will repeat')
    parser.add_argument(
        '-e', '--epoch', type=int, required=True,
        help='the number of max epochs for training')
    parser.add_argument(
        '-v', '--verbose', action='store_true', default=False,
        help='set logger level to debug')
    parser.add_argument(
        '-l', '--savelog', action='store_true', default=False,
        help='save logging file')
    parser.add_argument(
        '-w', '--overwrite', action='store_true', default=False,
        help='overwrite the existing file')

    # NOTE: the JSON file for parameter are hard coded.
    # We expect to run multiple attacks and defences in one iteration.
    args = parser.parse_args()
    dname = args.dataset
    mname = args.model
    max_iterations = args.iteration
    max_epochs = args.epoch
    verbose = args.verbose
    save_log = args.savelog
    overwrite = args.overwrite

    # set logging config. Run this before logging anything!
    set_logging(LOG_NAME, dname, verbose, save_log)

    print('[{}] Start experiment on {} {} i{} e{}...'.format(
        LOG_NAME, mname, dname, max_iterations, max_epochs))
    logger.info('Start at    :%s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('dataset     :%s', dname)
    logger.info('model       :%s', mname)
    logger.info('iterations  :%d', max_iterations)
    logger.info('max_epochs  :%d', max_epochs)
    logger.info('verbose     :%r', verbose)
    logger.info('save_log    :%r', save_log)
    logger.info('overwrite   :%r', overwrite)

    adv_file = name_handler(
        os.path.join('save', f'{LOG_NAME}_{dname}_{mname}_acc'),
        'csv',
        overwrite=overwrite)
    result_file = name_handler(
        os.path.join(
            'save', f'{LOG_NAME}_{dname}_{mname}_res_i{max_iterations}'),
        'csv',
        overwrite=overwrite)

    adv_file = open(adv_file, 'w')
    adv_file.write(','.join(TITLE_ADV) + '\n')
    res_file = open(result_file, 'w')
    res_file.write(','.join(TITLE_RESULTS) + '\n')
    for i in range(max_iterations):
        since = time.time()
        experiment(i, dname, mname, max_epochs, adv_file, res_file)
        time_elapsed = time.time() - since
        print('Completed {} [{}/{}]: {:d}m {:2.1f}s'.format(
            dname,
            i+1,
            max_iterations,
            int(time_elapsed // 60),
            time_elapsed % 60))

    adv_file.close()
    res_file.close()
コード例 #10
0
def main():
    parser = ap.ArgumentParser()
    parser.add_argument('-d',
                        '--dataset',
                        type=str,
                        required=True,
                        choices=get_dataset_list(),
                        help='the dataset you want to train')
    parser.add_argument(
        '-o',
        '--ofile',
        type=str,
        help='the filename will be used to store model parameters')
    parser.add_argument('-e',
                        '--epoch',
                        type=int,
                        default=5,
                        help='the number of max epochs for training')
    parser.add_argument('-b',
                        '--batchsize',
                        type=int,
                        default=128,
                        help='batch size')
    parser.add_argument('-s',
                        '--seed',
                        type=int,
                        default=4096,
                        help='the seed for random number generator')
    parser.add_argument('-H',
                        '--shuffle',
                        type=bool,
                        default=True,
                        help='shuffle the dataset')
    parser.add_argument(
        '-n',
        '--normalize',
        type=bool,
        default=True,
        help=
        'apply zero mean and scaling to the dataset (for numeral dataset only)'
    )
    parser.add_argument('-m',
                        '--model',
                        type=str,
                        choices=AVALIABLE_MODELS,
                        help='select a model to train the data')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help='set logger level to debug')
    parser.add_argument('-l',
                        '--savelog',
                        action='store_true',
                        default=False,
                        help='save logging file')
    parser.add_argument('-w',
                        '--overwrite',
                        action='store_true',
                        default=False,
                        help='overwrite the existing file')
    args = parser.parse_args()
    dname = args.dataset
    filename = args.ofile
    max_epochs = args.epoch
    batch_size = args.batchsize
    seed = args.seed
    use_shuffle = args.shuffle
    use_normalize = args.normalize
    model_name = args.model
    verbose = args.verbose
    save_log = args.savelog
    overwrite = args.overwrite

    # set logging config. Run this before logging anything!
    set_logging('train', dname, verbose, save_log)

    # show parameters
    print('[train] Start training {} model...'.format(model_name))
    logger.info('Start at      : %s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('dataset       :%s', dname)
    logger.info('filename      :%s', filename)
    logger.info('max_epochs    :%d', max_epochs)
    logger.info('batch_size    :%d', batch_size)
    logger.info('seed          :%d', seed)
    logger.info('use_shuffle   :%r', use_shuffle)
    logger.info('use_normalize :%r', use_normalize)
    logger.info('model_name    :%s', model_name)
    logger.info('verbose       :%r', verbose)
    logger.info('save_log      :%r', save_log)
    logger.info('overwrite     :%r', overwrite)

    master_seed(seed)

    # set DataContainer
    dc = get_data_container(
        dname,
        use_shuffle=use_shuffle,
        use_normalize=use_normalize,
    )

    # select a model
    model = None
    if model_name is not None:
        Model = models.get_model(model_name)
        model = Model()
    else:
        if dname == 'MNIST':
            model = models.MnistCnnV2()
        elif dname == 'CIFAR10':
            model = models.CifarCnn()
        elif dname == 'BreastCancerWisconsin':
            model = models.BCNN()
        elif dname in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'):
            num_classes = dc.num_classes
            num_features = dc.dim_data[0]
            model = models.IrisNN(num_features=num_features,
                                  hidden_nodes=num_features * 4,
                                  num_classes=num_classes)

    if model is None:
        raise AttributeError('Cannot find model!')
    modelname = model.__class__.__name__
    logger.info('Selected %s model', modelname)

    # set ModelContainer and train the model
    mc = models.ModelContainerPT(model, dc)
    mc.fit(max_epochs=max_epochs, batch_size=batch_size)

    # save
    if not os.path.exists('save'):
        os.makedirs('save')
    if filename is None:
        filename = get_pt_model_filename(modelname, dname, max_epochs)
    logger.debug('File name: %s', filename)
    mc.save(filename, overwrite=overwrite)

    # test result
    file_path = os.path.join('save', filename)
    logger.debug('Use saved parameters from %s', filename)
    mc.load(file_path)
    accuracy = mc.evaluate(dc.x_test, dc.y_test)
    logger.info('Accuracy on test set: %f', accuracy)
コード例 #11
0
def main():
    parser = ap.ArgumentParser()
    parser.add_argument(
        '-m', '--model', type=str, required=True,
        help='a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format')
    parser.add_argument(
        '-e', '--epoch', type=int, required=True,
        help='the number of max epochs for training')
    parser.add_argument(
        '-r', '--ratio', type=float, required=True,
        help='the percentage of adversarial examples mix to the training set.')
    parser.add_argument(
        '-b', '--batchsize', type=int, default=128, help='batch size')
    parser.add_argument(
        '-t', '--train', action='store_true', default=False,
        help='Force the model to retrain without searching existing pretrained file')
    parser.add_argument(
        '-s', '--seed', type=int, default=4096,
        help='the seed for random number generator')
    parser.add_argument(
        '-v', '--verbose', action='store_true', default=False,
        help='set logger level to debug')
    parser.add_argument(
        '-l', '--savelog', action='store_true', default=False,
        help='save logging file')
    parser.add_argument(
        '-B', '--bim', action='store_true', default=False,
        help='Apply BIM attack')
    parser.add_argument(
        '-C', '--carlini', action='store_true', default=False,
        help='Apply Carlini L2 attack')
    parser.add_argument(
        '-D', '--deepfool', action='store_true', default=False,
        help='Apply DeepFool attack')
    parser.add_argument(
        '-F', '--fgsm', action='store_true', default=False,
        help='Apply FGSM attack')
    parser.add_argument(
        '-S', '--saliency', action='store_true', default=False,
        help='Apply Saliency Map attack')
    args = parser.parse_args()
    model_file = args.model
    max_epochs = args.epoch
    ratio = args.ratio
    batch_size = args.batchsize
    seed = args.seed
    verbose = args.verbose
    save_log = args.savelog
    need_train = args.train

    model_name, data_name = parse_model_filename(model_file)

    # Which attack should apply?
    attack_list = []
    if args.bim:
        attack_list.append('BIM')
    if args.carlini:
        attack_list.append('Carlini')
    if args.deepfool:
        attack_list.append('DeepFool')
    if args.fgsm:
        attack_list.append('FGSM')
    if args.saliency:
        attack_list.append('Saliency')

    # Quit, if there is nothing to do.
    if len(attack_list) == 0:
        logger.warning('Neither received any filter nor any attack. Exit')
        sys.exit(0)

    y_file = os.path.join(
        'save', f'{model_name}_{data_name}_{attack_list[0]}_y.npy')
    attack_files = [
        os.path.join(
            'save', f'{model_name}_{data_name}_{attack_list[0]}_x.npy')
    ]
    for attack_name in attack_list:
        attack_files.append(os.path.join(
            'save', f'{model_name}_{data_name}_{attack_name}_adv.npy'))
    # the 1st file this the clean inputs
    attack_list = ['clean'] + attack_list

    # Do I need train the discriminator?
    pretrain_file = f'AdvTrain_{model_name}_{data_name}.pt'
    if not os.path.exists(os.path.join('save', pretrain_file)):
        need_train = True

    # set logging config. Run this before logging anything!
    set_logging(LOG_NAME, data_name, verbose, save_log)

    # show parameters
    print(f'[{LOG_NAME}] Running adversarial training on {model_name}...')
    logger.info('Start at    : %s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('model file  :%s', model_file)
    logger.info('model       :%s', model_name)
    logger.info('dataset     :%s', data_name)
    logger.info('max_epochs  :%d', max_epochs)
    logger.info('ratio  :%d', ratio)
    logger.info('batch_size  :%d', batch_size)
    logger.info('seed        :%d', seed)
    logger.info('verbose     :%r', verbose)
    logger.info('save_log    :%r', save_log)
    logger.info('need train  :%r', need_train)
    logger.info('attacks     :%s', ', '.join(attack_list))

    # check files
    for file_name in [model_file, y_file] + attack_files:
        if not os.path.exists(file_name):
            logger.error('%s does not exist!', file_name)
            raise FileNotFoundError('{} does not exist!'.format(file_name))

    # reset seed
    master_seed(seed)

    # select data
    dc = get_data_container(
        data_name,
        use_shuffle=True,
        use_normalize=True,
    )

    # select a model
    Model = get_model(model_name)
    model = Model()
    if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'):
        num_classes = dc.num_classes
        num_features = dc.dim_data[0]
        model = IrisNN(
            num_features=num_features,
            hidden_nodes=num_features*4,
            num_classes=num_classes)
    classifier_mc = ModelContainerPT(model, dc)
    classifier_mc.load(model_file)
    accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test)
    logger.info('Accuracy on test set: %f', accuracy)

    attack = BIMContainer(
        classifier_mc,
        eps=0.3,
        eps_step=0.1,
        max_iter=100,
        targeted=False)

    adv_trainer = AdversarialTraining(classifier_mc, [attack])
    if need_train:
        adv_trainer.fit(max_epochs=max_epochs,
                        batch_size=batch_size, ratio=ratio)
        adv_trainer.save(pretrain_file, overwrite=True)
    else:
        adv_trainer.load(os.path.join('save', pretrain_file))

    y = np.load(y_file, allow_pickle=False)
    for i in range(len(attack_list)):
        adv_file = attack_files[i]
        adv_name = attack_list[i]
        logger.debug('Load %s...', adv_file)
        adv = np.load(adv_file, allow_pickle=False)
        accuracy = classifier_mc.evaluate(adv, y)
        logger.info('Accuracy on %s set: %f', adv_name, accuracy)
        blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
        logger.info('Blocked %d/%d samples on %s',
                    len(blocked_indices), len(adv), adv_name)
コード例 #12
0
def main():
    data_name = 'Iris'
    set_logging('advTraining', data_name, True, True)

    dc = DataContainer(DATASET_LIST[data_name], get_data_path())
    dc()

    model_file = os.path.join('save', 'IrisNN_Iris_e200.pt')
    num_features = dc.dim_data[0]
    num_classes = dc.num_classes
    classifier = IrisNN(
        num_features=num_features,
        hidden_nodes=num_features*4,
        num_classes=num_classes,
    )

    classifier_mc = ModelContainerPT(classifier, dc)
    classifier_mc.load(model_file)
    accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test)
    logger.info('Accuracy on test set: %f', accuracy)

    attack = BIMContainer(
        classifier_mc,
        eps=0.3,
        eps_step=0.1,
        max_iter=100,
        targeted=False)

    adv_trainer = AdversarialTraining(classifier_mc, [attack])
    # adv_trainer.fit(max_epochs=100, batch_size=64, ratio=1)
    # adv_trainer.save('AdvTrain_IrisNN_Iris', overwrite=True)

    file_name = os.path.join('save', 'AdvTrain_IrisNN_Iris.pt')
    adv_trainer.load(file_name)

    x = np.load(os.path.join('save', 'IrisNN_Iris_BIM_x.npy'),
                allow_pickle=False)
    y = np.load(os.path.join('save', 'IrisNN_Iris_BIM_y.npy'),
                allow_pickle=False)
    blocked_indices = adv_trainer.detect(x, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s',
                len(blocked_indices), len(x), 'clean')

    adv = np.load(os.path.join(
        'save', 'IrisNN_Iris_BIM_adv.npy'), allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'BIM', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s',
                len(blocked_indices), len(adv), 'BIM')

    adv = np.load(os.path.join(
        'save', 'IrisNN_Iris_Carlini_adv.npy'), allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'Carlini', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s',
                len(blocked_indices), len(adv), 'Carlini')

    adv = np.load(os.path.join(
        'save', 'IrisNN_Iris_DeepFool_adv.npy'), allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'DeepFool', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s',
                len(blocked_indices), len(adv), 'DeepFool')

    adv = np.load(os.path.join(
        'save', 'IrisNN_Iris_FGSM_adv.npy'), allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'FGSM', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s',
                len(blocked_indices), len(adv), 'FGSM')
コード例 #13
0
def main():
    parser = ap.ArgumentParser()
    parser.add_argument(
        '-m',
        '--model',
        type=str,
        required=True,
        help=
        'a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format'
    )
    parser.add_argument('-e',
                        '--epoch',
                        type=int,
                        required=True,
                        help='the number of max epochs for training')
    parser.add_argument(
        '-d',
        '--depth',
        type=int,
        default=0,
        help=
        'The image color depth for input images. Apply Binary-Depth filter when receives a parameter'
    )
    parser.add_argument(
        '--sigma',
        type=float,
        default=0,
        help=
        'The Standard Deviation of Normal distribution. Apply Gaussian Noise filter when receives a parameter'
    )
    parser.add_argument(
        '-k',
        '--kernelsize',
        type=int,
        default=0,
        help=
        'The kernel size for Median filter. Apply median filter when receives a parameter'
    )
    parser.add_argument('-b',
                        '--batchsize',
                        type=int,
                        default=128,
                        help='batch size')
    parser.add_argument(
        '-T',
        '--train',
        action='store_true',
        default=False,
        help=
        'Force the model to retrain without searching existing pretrained file'
    )
    parser.add_argument('-s',
                        '--seed',
                        type=int,
                        default=4096,
                        help='the seed for random number generator')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help='set logger level to debug')
    parser.add_argument('-l',
                        '--savelog',
                        action='store_true',
                        default=False,
                        help='save logging file')
    parser.add_argument('-B',
                        '--bim',
                        action='store_true',
                        default=False,
                        help='Apply BIM attack')
    parser.add_argument('-C',
                        '--carlini',
                        action='store_true',
                        default=False,
                        help='Apply Carlini L2 attack')
    parser.add_argument('-D',
                        '--deepfool',
                        action='store_true',
                        default=False,
                        help='Apply DeepFool attack')
    parser.add_argument('-F',
                        '--fgsm',
                        action='store_true',
                        default=False,
                        help='Apply FGSM attack')
    parser.add_argument('-S',
                        '--saliency',
                        action='store_true',
                        default=False,
                        help='Apply Saliency Map attack')
    args = parser.parse_args()
    model_file = args.model
    max_epochs = args.epoch
    bit_depth = args.depth
    sigma = args.sigma
    kernel_size = args.kernelsize
    batch_size = args.batchsize
    seed = args.seed
    verbose = args.verbose
    save_log = args.savelog
    need_train = args.train

    model_name, data_name = parse_model_filename(model_file)

    # Which filter should apply?
    filter_list = []
    if bit_depth > 0:
        filter_list.append('binary')
    if sigma > 0:
        filter_list.append('normal')
    if kernel_size > 0:
        filter_list.append('median')

    # Which attack should apply?
    attack_list = []
    if args.fgsm:
        attack_list.append('FGSM')
    if args.bim:
        attack_list.append('BIM')
    if args.deepfool:
        attack_list.append('DeepFool')
    if args.carlini:
        attack_list.append('Carlini')
    if args.saliency:
        attack_list.append('Saliency')

    # Quit, if there is nothing to do.
    if len(filter_list) == 0 or len(attack_list) == 0:
        logger.warning('Neither received any filter nor any attack. Exit')
        sys.exit(0)

    y_file = os.path.join('save',
                          f'{model_name}_{data_name}_{attack_list[0]}_y.npy')
    attack_files = [
        os.path.join('save',
                     f'{model_name}_{data_name}_{attack_list[0]}_x.npy')
    ]
    for attack_name in attack_list:
        attack_files.append(
            os.path.join('save',
                         f'{model_name}_{data_name}_{attack_name}_adv.npy'))
    # the 1st file this the clean inputs
    attack_list = ['clean'] + attack_list

    # Do I need train the distillation network?
    pretrain_files = []
    for fname in filter_list:
        pretrain_file = build_squeezer_filename(model_name, data_name,
                                                max_epochs, fname)
        pretrain_files.append(pretrain_file)
        if not os.path.exists(os.path.join('save', pretrain_file)):
            need_train = True

    # set logging config. Run this before logging anything!
    set_logging(LOG_NAME, data_name, verbose, save_log)

    # show parameters
    print(f'[{LOG_NAME}] Running feature squeezing on {model_name}...')
    logger.info('Start at    : %s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('model file  :%s', model_file)
    logger.info('model       :%s', model_name)
    logger.info('dataset     :%s', data_name)
    logger.info('max_epochs  :%d', max_epochs)
    logger.info('bit_depth   :%d', bit_depth)
    logger.info('sigma       :%f', sigma)
    logger.info('kernel_size :%d', kernel_size)
    logger.info('batch_size  :%d', batch_size)
    logger.info('seed        :%d', seed)
    logger.info('verbose     :%r', verbose)
    logger.info('save_log    :%r', save_log)
    logger.info('need train  :%r', need_train)
    logger.info('filters     :%s', ', '.join(filter_list))
    logger.info('attacks     :%s', ', '.join(attack_list))
    logger.info('pretrained  :%s', ', '.join(pretrain_files))

    # check files
    for file_name in [model_file, y_file] + attack_files:
        if not os.path.exists(file_name):
            logger.error('%s does not exist!', file_name)
            raise FileNotFoundError('{} does not exist!'.format(file_name))

    # reset seed
    master_seed(seed)

    # select data
    dc = get_data_container(
        data_name,
        use_shuffle=True,
        use_normalize=True,
    )

    # select a model
    Model = get_model(model_name)
    model = Model()
    if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'):
        num_classes = dc.num_classes
        num_features = dc.dim_data[0]
        model = IrisNN(num_features=num_features,
                       hidden_nodes=num_features * 4,
                       num_classes=num_classes)
    classifier_mc = ModelContainerPT(model, dc)
    classifier_mc.load(model_file)
    accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test)
    logger.info('Accuracy on test set: %f', accuracy)

    # initialize Squeezer
    squeezer = FeatureSqueezing(
        classifier_mc,
        filter_list,
        bit_depth=bit_depth,
        sigma=sigma,
        kernel_size=kernel_size,
        pretrained=True,
    )

    # train or load parameters for Squeezer
    if need_train:
        squeezer.fit(max_epochs=max_epochs, batch_size=batch_size)
        squeezer.save(model_file, True)
    else:
        squeezer.load(model_file)

    # traverse all attacks
    y = np.load(y_file, allow_pickle=False)
    for i in range(len(attack_list)):
        adv_file = attack_files[i]
        adv_name = attack_list[i]
        logger.debug('Load %s...', adv_file)
        adv = np.load(adv_file, allow_pickle=False)
        acc_og = classifier_mc.evaluate(adv, y)
        acc_squeezer = squeezer.evaluate(adv, y)
        logger.info('Accuracy on %s set - OG: %f, Squeezer: %f', adv_name,
                    acc_og, acc_squeezer)
        blocked_indices = squeezer.detect(adv, return_passed_x=False)
        logger.info('Blocked %d/%d samples on %s', len(blocked_indices),
                    len(adv), adv_name)