def batch_adv_tetsing(device, num_models, seed_data,
                      adv_folder,
                      mutated_models_path, model_start_num,seed_model):
    if seed_data == 'mnist':
        normalization = normalize_mnist
        img_mode = 'L'  # 8-bit pixels, black and white
    elif seed_data == 'cifar10':
        normalization = normalize_cifar10
        img_mode = None
    elif seed_data == 'ilsvrc12':
        normalization = normalize_imgNet
        img_mode = None
    else:
        raise Exception('Unknown data soure!')

    adv_type = parseAdvType(adv_folder)

    tf = transforms.Compose([transforms.ToTensor(), normalization])

    logging.info('>>>>>>>>>>>seed data:{},mutated_models:{}<<<<<<<<<<'.format(seed_data, mutated_models_path))
    mutated_models = fetch_models(mutated_models_path, num_models, device=device,
                                  start_no=model_start_num,seed_model=seed_model)
    ensemble_model = EnsembleModel(mutated_models)

    dataset = MyDataset(root=adv_folder, transform=tf, img_mode=img_mode)
    dataloader = DataLoader(dataset=dataset)
    logging.info(
        '>>>Progress: {} mutated models for {}, samples {}'.format(len(mutated_models), adv_type,
                                                                   adv_folder))
    logging.info('>>Test-Details-start-{}>>>{}>>>{}'.format(num_models, adv_type,seed_data))
    samples_filter(ensemble_model, dataloader, '{} >> {} '.format(num_models, adv_type), size=-1,
                   show_progress=False, device=device, is_verbose=True)
    logging.info('>>Test-Details-end-{}>>>{}>>>{}'.format(num_models, adv_type,seed_data))
def batch_wl_testing(device, num_models, seed_data, raw_data_path, seed_model, mutated_models_path,
                       model_start_num, use_train=True):

    if seed_data == 'mnist':
        data_type = DATA_MNIST
    elif seed_data == 'cifar10':
        data_type = DATA_CIFAR10


    dataset, channel = load_data_set(data_type, raw_data_path, train=use_train)
    dataloader = DataLoader(dataset=dataset)

    wrong_labeled = samples_filter(seed_model, dataloader, return_type='adv', name='seed model', device=device,show_accuracy=False)
    data = datasetMutiIndx(dataset, [idx for idx, _, _ in wrong_labeled])
    wrong_labels = [wrong_label for idx, true_label, wrong_label in wrong_labeled]
    data = TensorDataset(data.tensors[0], data.tensors[1], torch.LongTensor(wrong_labels))

    logging.info(
        '>>>>>>>>>>>For {}({}),mutated Models Path: {} <<<<<<<<<<'.format(
            seed_data,"Training" if use_train else "Testing",mutated_models_path))

    mutated_models = fetch_models(mutated_models_path, num_models, device=device,
                                  start_no=model_start_num,seed_model=seed_model)

    ensemble_model = EnsembleModel(mutated_models)
    logging.info(
        '>>>Progress: {} mutated models for wl samples, '.format(len(mutated_models)))
    logging.info('>>Test-Details-start-{}>>> wrong labeled of {}'.format(num_models, seed_data))
    samples_filter(ensemble_model, DataLoader(dataset=data), 'legitimate {} >>'.format(seed_data), size=-1,
                   show_progress=False, device=device, is_verbose=True)
    logging.info('>>Test-Details-end-{}>>> wrong labeled of {}'.format(num_models, seed_data))
def batch_legitimate_testing(device, num_models, seed_data, raw_data_path, seed_model,
                             mutated_models_path, model_start_num, use_train=True):
    if seed_data == 'mnist':
        data_type = DATA_MNIST
    elif seed_data == 'cifar10':
        data_type = DATA_CIFAR10
    data, channel = load_data_set(data_type, raw_data_path, train=use_train)
    correct_labeled = samples_filter(seed_model, DataLoader(dataset=data), return_type='normal', name='seed model',
                                     device=device,show_accuracy=False)
    random_indcies = np.arange(len(correct_labeled))
    np.random.seed(random_seed)
    np.random.shuffle(random_indcies)
    random_indcies = random_indcies[:MAX_NUM_SAMPLES]
    data = datasetMutiIndx(data, np.array([idx for idx, _, _ in correct_labeled])[random_indcies])

    logging.info(
        '>>>>>>>>>>>For {}({}) randomly choose {} with randomseed {}. mutated_models:{}<<<<<<<<<<'.format(
            seed_data,"Training" if use_train else "Testing",MAX_NUM_SAMPLES,random_seed,mutated_models_path))

    mutated_models = fetch_models(mutated_models_path, num_models,device=device,
                                  start_no=model_start_num,seed_model=seed_model)

    ensemble_model = EnsembleModel(mutated_models)
    logging.info(
        '>>>Progress: {} mutated models for normal samples, samples path: {}'.format(len(mutated_models),
                                                                                     raw_data_path))
    logging.info('>>Test-Details-start-{}>>>{}'.format(num_models, seed_data))
    samples_filter(ensemble_model, DataLoader(dataset=data), 'legitimate {} >>'.format(seed_data), size=-1,
                   show_progress=False, device=device, is_verbose=True)
    logging.info('>>Test-Details-end-{}>>>{}'.format(num_models, seed_data))
def step_mutated_vote(models_folder, model_name_list, target_samples, samples_folder, useAttackSeed=True,
                      dataloader=None):
    '''
    step=10,up to 100
    :param model_folder:
    :param model_name_list:
    :param target_samples:
    :param samples_folder:
    :return:
    '''

    for i, targe_sample in enumerate(target_samples):

        # i += 3 just for mnist4, mnist5

        if not dataloader:
            adv_file_path = os.path.join(samples_folder, targe_sample)
            torch.manual_seed(random_seed)
            dataset = MyDataset(root=adv_file_path,
                                transform=transforms.Compose([transforms.ToTensor(), normalize_mnist]))
            dataloader = DataLoader(dataset=dataset, shuffle=True)

        print('>>>Progress: Test attacked samples of {} '.format(targe_sample))
        logging.info('>>>Progress: Test attacked samples of {} '.format(targe_sample))

        # for num_models in range(10, 110, 10):
        for num_models in [100]:
            # to do
            # 1. for each seed model, select the top [num_models] models
            # 2. ensemble 5*[num_models] models

            num_seed_models = len(model_name_list)
            models_list = []
            for i2, seed_name in enumerate(model_name_list):
                if useAttackSeed:
                    models_list.extend(fetch_models(models_folder, num_models, seed_name))
                elif i != i2:
                    models_list.extend(fetch_models(models_folder, num_models, seed_name))

            logging.info('>>>Progress: {} models for {}'.format(len(models_list), targe_sample))
            print('>>>Progress: {} models for {}'.format(len(models_list), targe_sample))

            vote_model = EnsembleModel(models_list)
            logging.info('>>Test-Details-start-{}>>>{}'.format(num_seed_models * num_models, targe_sample))
            samples_filter(vote_model, dataloader, '{} >> {} '.format(len(models_list), targe_sample), size=-1,
                           show_progress=True)
            logging.info('>>Test-Details-end-{}>>>{}'.format(num_seed_models * num_models, targe_sample))
 def __init__(self,
              threshold,
              sigma,
              beta,
              alpha,
              seed_name,
              max_mutated_numbers,
              data_type,
              device='cpu',
              models_folder=None):
     self.threshold = threshold
     self.sigma = sigma
     self.beta = beta
     self.alpha = alpha
     self.device = device
     self.data_type = data_type
     self.models_folder = models_folder
     self.seed_name = seed_name
     self.max_mutated_numbers = max_mutated_numbers
     self.start_no = 1
     self.seed_model_shell = GoogLeNet(
     ) if seed_name == "googlenet" else MnistNet4()
     if data_type == DATA_MNIST:
         self.max_models_in_memory = self.max_mutated_numbers
         self.mutated_models = fetch_models(models_folder,
                                            self.max_models_in_memory,
                                            self.device,
                                            self.seed_model_shell,
                                            start_no=self.start_no)
     else:
         self.max_models_in_memory = 20
         self.mutated_models = fetch_models(models_folder,
                                            self.max_models_in_memory,
                                            self.device,
                                            self.seed_model_shell,
                                            start_no=self.start_no)
         self.start_no += self.max_models_in_memory
 def fetch_single_model(self, t):
     '''
     :param t: fetch the t th model. from 1-index
     :return:
     '''
     if self.data_type == DATA_MNIST:
         return self.mutated_models[t - 1]
     else:
         if t <= self.start_no:
             return self.mutated_models[t % 20 - 1]
         else:
             self.mutated_models = fetch_models(self.models_folder,
                                                20,
                                                self.device,
                                                self.seed_model_shell,
                                                start_no=self.start_no)
             self.start_no += self.max_models_in_memory
             return self.mutated_models[t % 20 - 1]
Exemple #7
0
def batch_legitimate_testing(device,
                             num_models,
                             seed_data,
                             raw_data_path,
                             seed_model,
                             mutated_models_path,
                             model_start_num,
                             use_train=True):
    if seed_data == 'mnist':
        data_type = DATA_MNIST
    elif seed_data == 'cifar10':
        data_type = DATA_CIFAR10
    data = load_natural_data(True,
                             data_type,
                             raw_data_path,
                             use_train=use_train,
                             seed_model=seed_model,
                             device=device,
                             MAX_NUM_SAMPLES=MAX_NUM_SAMPLES)

    logging.info(
        '>>>>>>>>>>>For {}({}) randomly choose {} with randomseed {}. mutated_models:{}<<<<<<<<<<'
        .format(seed_data, "Training" if use_train else "Testing",
                MAX_NUM_SAMPLES, random_seed, mutated_models_path))

    mutated_models = fetch_models(mutated_models_path,
                                  num_models,
                                  device=device,
                                  start_no=model_start_num,
                                  seed_model=seed_model)

    ensemble_model = EnsembleModel(mutated_models)
    logging.info(
        '>>>Progress: {} mutated models for normal samples, samples path: {}'.
        format(len(mutated_models), raw_data_path))
    logging.info('>>Test-Details-start-{}>>>{}'.format(num_models, seed_data))
    samples_filter(ensemble_model,
                   DataLoader(dataset=data),
                   'legitimate {} >>'.format(seed_data),
                   size=-1,
                   show_progress=False,
                   device=device,
                   is_verbose=True)
    logging.info('>>Test-Details-end-{}>>>{}'.format(num_models, seed_data))