def train_on_poisoned_data(args, poisoned_data): callable_ds = datasets.dataset_helper(args.dataset) ds = callable_ds() model = dnn_utils.model_helper(args.model_arch)() model = dnn_utils.multi_gpu_wrap(model) for x, y in zip(*poisoned_data): ds.add_point_to_train(x, y) model = mtp_utils.train_clean_model(ds, args, epochs=15) # Compute metrics for said model train_loader, val_loader = callable_ds().get_loaders(args.batch_size) _, train_loss = dnn_utils.get_model_metrics(model, train_loader) test_acc, _ = dnn_utils.get_model_metrics(model, val_loader) (trn_sub_acc, _), (trn_nsub_acc, _) = dnn_utils.get_model_metrics( model, train_loader, args.poison_class) (tst_sub_acc, _), (tst_nsub_acc, _) = dnn_utils.get_model_metrics( model, val_loader, args.poison_class) # Print accuracies on target/non-target data # On seen (train) and unseen (val) data print("Total Acc: %.3f" % test_acc) print('Train Target Acc : %.3f' % trn_sub_acc) print('Train Collat Acc : %.3f' % trn_nsub_acc) print('Test Target Acc : %.3f' % tst_sub_acc) print('Test Collat Acc : %.3f' % tst_nsub_acc) print()
def get_pop_accs(poison_model_path): # Load target model theta_p, set to eval mode theta_p = dnn_utils.model_helper("flat")(n_classes=2) theta_p = theta_p.cuda() theta_p.load_state_dict(ch.load(poison_model_path)) theta_p.eval() # Report performance of poisoned model train_loader, test_loader = datasets.dataset_helper("memory")( path="./data/datasets/MNIST17/split_1.pt").get_loaders(512) # Report accuracy on unseen population data (tst_sub_acc, _), _ = dnn_utils.get_model_metrics( model=theta_p, loader=test_loader, target_prop=0) return tst_sub_acc
def modelTargetPoisoning(model_p, logger, args): # Implementation of Algorithm 1, modified for DNNs # Line number corresponding to the Algorithm is mentioned # Along with each high-level function call # Fetch appropriate dataset ds = datasets.dataset_helper(args.dataset)() # Maintain copy of clean data (for seed sampling) ds_clean = datasets.dataset_helper(args.dataset)() # Line 1: Collect poisoning points D_p = [[], []] # Load poison data, if provided if args.poison_data: print(utils.green_print("Loading poison data")) data = np.load("./data/poison_data/poison_data.npz") # Normalize to 0-1 for use by model all_poison_data_x = ch.from_numpy(data['x']).float() / 255. all_poison_data_x = ch.unsqueeze(all_poison_data_x, 1) all_poison_data_y = ch.from_numpy(data['y']) # Line 3: Since D_p is empty in first iteration, simply train it outside model_t_pretrained, pretrain_optim = mtp_utils.train_clean_model(ds, args) # Report performance of clean model batch_size = args.batch_size if batch_size == -1: batch_size = len(ds.train) train_loader, test_loader = ds.get_loaders(batch_size) clean_acc, clean_total_loss = dnn_utils.get_model_metrics( model_t_pretrained, test_loader, lossfn=args.loss) print(utils.yellow_print("[Clean-model][Test] Total Acc: %.4f" % clean_acc)) print( utils.yellow_print("[Clean-model] Loss on train: %.4f" % clean_total_loss)) (population_acc, _), (non_population_acc, _) = dnn_utils.get_model_metrics(model_t_pretrained, test_loader, lossfn=args.loss, target_prop=args.poison_class) print( utils.red_print("[Clean-model][Test] Population Acc: %.4f" % population_acc)) print( utils.red_print("[Clean-model][Test] Non- Population Acc: %.4f" % non_population_acc)) print() # Line 2: Iterate until stopping criteria met prev_loss, best_loss = np.inf, np.inf num_iters = 0 condition = True while condition: if len(D_p[0]) > 0: # Line 3: theta_t = train(D_c U D_p) print( utils.yellow_print("[Training model on Dc U Dp " "(on %d samples)]" % len(ds.train))) # Get loader for D_c U D_p batch_size = args.batch_size if batch_size == -1: batch_size = len(ds.train) data_loader, _ = ds.get_loaders(batch_size) # Do not re-initialize model if finetuning requested if not args.finetune: # Construct model model_t = dnn_utils.get_seeded_wrapped_model( args, n_classes=ds.n_classes) else: # Start finetuning from the point where model # has seen only clean data model_t = copy.deepcopy(model_t_pretrained) # Set model to training mode model_t.train() # Define optimizer optim = ch.optim.Adam(model_t.parameters(), lr=args.pretrain_lr, weight_decay=args.pretrain_weight_decay) # Adjust starting point of optimizer # if finetuning is requested if args.finetune: optim.load_state_dict(pretrain_optim.state_dict()) # Increase numer of iterations theta_t is trained for # as size of its training set |D_c U D_p| increases iters = args.iters if args.increase_iters: iters += int((len(ds.train) - len(ds_clean.train)) / args.increase_every) # Train model for e in range(iters): # Train epoch dnn_utils.epoch(model=model_t, loader=data_loader, optimizer=optim, epoch_num=e + 1, c_rule=None, n_classes=None, verbose=True, lossfn=args.loss) else: model_t = model_t_pretrained # Make sure theta_t is in eval mode model_t.eval() start_with = None if args.start_opt_real: # If flag set, start with real data sampled from # (unpoisoned) train loader batch_size = args.batch_size if batch_size == -1: batch_size = len(ds.train) loader, _ = ds_clean.get_loaders(batch_size) start_with = datasets.get_sample_from_loader( loader, args.trials, ds_clean.n_classes) elif args.poison_data: # Sample 'num-trials' data from this perm = ch.randperm(all_poison_data_x.size(0)) idx = perm[:args.trials] start_with = (all_poison_data_x[idx], all_poison_data_y[idx]) # Line 4: Compute (x*, y*) if args.use_optim_for_optimal: find_optimal_function = mtp_utils.find_optimal_using_optim else: find_optimal_function = mtp_utils.find_optimal (x_opt, y_opt), best_loss = find_optimal_function(theta_t=model_t, theta_p=model_p, input_shape=ds.datum_shape, n_classes=ds.n_classes, trials=args.trials, num_steps=args.num_steps, step_size=args.optim_lr, verbose=True, start_with=start_with, lossfn=args.loss, dynamic_lr=args.dynamic_lr, filter=args.filter) # If loss increased, try optimization once more # With double trials, to reduce chance of bad minima if args.skip_bad and best_loss > prev_loss: print(utils.red_print("Re-running optimization with more seeds")) (x_opt, y_opt), best_loss = find_optimal_function( theta_t=model_t, theta_p=model_p, input_shape=ds.datum_shape, n_classes=ds.n_classes, trials=args.trials * 2, num_steps=args.num_steps, step_size=args.optim_lr, verbose=True, start_with=start_with, lossfn=args.loss, dynamic_lr=args.dynamic_lr) # Log some information about x*, y* with ch.no_grad(): pred_t, pred_p = model_t(x_opt), model_p(x_opt) if pred_t.argmax(1) == y_opt.item(): print(utils.red_print("[BAD OPTIMIZATION. CHECK]")) print( utils.cyan_print( "Loss: %.3f Mt(x*): %d, Mp(x*): %d, y*: %d" % (best_loss.item(), pred_t.argmax(1), pred_p.argmax(1), y_opt))) # Line 5: Add (x*, y*) to D_p for _ in range(args.n_copies): D_p[0].append(x_opt.cpu()) D_p[1].append(y_opt.cpu()) ds.add_point_to_train(x_opt.cpu(), y_opt.cpu()) print() # Calculate useful statistics (tst_sub_acc, _), (tst_nsub_acc, _) = dnn_utils.get_model_metrics(model=model_t, loader=test_loader, target_prop=args.poison_class, lossfn=args.loss) (trn_sub_acc, _), (trn_nsub_acc, _) = dnn_utils.get_model_metrics(model=model_t, loader=train_loader, target_prop=args.poison_class, lossfn=args.loss) norm_diffs = dnn_utils.model_l2_closeness(model_t, model_p) # Log information mtp_utils.log_information(logger=logger, best_loss=best_loss, x_opt=x_opt, model_t=model_t, norm_diffs=norm_diffs, trn_sub_acc=trn_sub_acc, trn_nsub_acc=trn_nsub_acc, tst_sub_acc=tst_sub_acc, tst_nsub_acc=tst_nsub_acc, num_iters=num_iters + 1, args=args) # Line 6: Get ready to check condition condition = stop_cond(args=args, best_loss=best_loss, num_iters=num_iters, tst_sub_acc=tst_sub_acc, norm_diffs=norm_diffs) # Keep track of no. of iterations num_iters += 1 # Keep track of loss from previous iteration prev_loss = best_loss.item() # Line 7: Return poison data return D_p, model_t
# Print all arguments utils.flash_utils(args) # Get number of classes n_classes = datasets.dataset_helper(args.dataset)().n_classes # Load target model theta_p, set to eval mode theta_p = dnn_utils.model_helper(args.model_arch)(n_classes=n_classes) theta_p = theta_p.cuda() theta_p.load_state_dict(ch.load(args.poison_model_path)) theta_p.eval() # Report performance of poisoned model train_loader, test_loader = datasets.dataset_helper( args.dataset)().get_loaders(512) clean_acc, _ = dnn_utils.get_model_metrics(theta_p, test_loader) print(utils.yellow_print("[Poisoned-model] Total Acc: %.4f" % clean_acc)) _, clean_total_loss = dnn_utils.get_model_metrics(theta_p, train_loader) print( utils.yellow_print("[Poisoned-model] Loss on train: %.4f" % clean_total_loss)) # Report weight norm for poisoned model poisoned_norm = dnn_utils.get_model_l2_norm(theta_p).item() print( utils.yellow_print("[Poisoned-model] Weights norm: %.4f" % poisoned_norm)) # Report accuracy on unseen population data (tst_sub_acc, _), (tst_nsub_acc, _) = dnn_utils.get_model_metrics(model=theta_p, loader=test_loader,
def modelTargetPoisoning(model_p, logger, args): # Implementation of Algorithm 1, modified for DNNs # Line number corresponding to the Algorithm is mentioned # Along with each high-level function call # Fetch appropriate dataset ds = datasets.dataset_helper(args.dataset)() # Keep track of number of points model has seen (virtually) # For loss-normalization purposes points_seen_count = len(ds.train) # Line 1: Collect poisoning points D_p = [[], []] # Line 3: Since D_p is empty in first iteration, simply train it outside model_t = mtp_utils.train_clean_model(ds, args) # Report performance of clean model train_loader, test_loader = ds.get_loaders(args.batch_size) clean_acc, _ = dnn_utils.get_model_metrics(model_t, test_loader) print(utils.yellow_print("[Clean-model] Total Acc: %.4f" % clean_acc)) _, clean_total_loss = dnn_utils.get_model_metrics(model_t, train_loader) print( utils.yellow_print("[Clean-model] Loss on train: %.4f" % clean_total_loss)) print() # theta_1: (sum of) gradients of model weights # with respect to clean training set print(utils.yellow_print("[Computing gradients on clean training data]")) theta_curr = datasets.get_dataset_gradients( model=model_t, ds=ds, batch_size=args.batch_size, weight_decay=args.pretrain_weight_decay, verbose=args.verbose_precomp, is_train=True) # Line 2: Iterate until stopping criteria met best_loss = np.inf num_iters = 0 condition = True while condition: # Line 4: Compute (x_opt, y_opt) opt_pair, best_loss = mtp_utils.find_optimal( theta_t=model_t, theta_p=model_p, input_shape=ds.datum_shape, n_classes=ds.n_classes, trials=args.trials, num_steps=args.num_steps, step_size=args.optim_lr, verbose=args.verbose_opt) x_opt, y_opt = opt_pair # Update theta (gradients for online learning) for use in next iter print(utils.yellow_print("[Updating gradients]")) theta_curr = mtp_utils.update_gradients( model=model_t, thetas=theta_curr, weight_decay=args.update_weight_decay, x_opt=x_opt, y_opt=y_opt) # Calculate useful statistics (tst_sub_acc, _), _ = dnn_utils.get_model_metrics(model=model_t, loader=test_loader, target_prop=args.poison_class) _, (trn_nsub_acc, _) = dnn_utils.get_model_metrics(model=model_t, loader=train_loader, target_prop=args.poison_class) norm_diffs = dnn_utils.model_l2_closeness(model_t, model_p) # Log information mtp_utils.log_information(logger=logger, best_loss=best_loss, x_opt=x_opt, model_t=model_t, norm_diffs=norm_diffs, trn_nsub_acc=trn_nsub_acc, tst_sub_acc=tst_sub_acc, num_iters=num_iters, args=args) # Line 3: theta_t = train(D_c U D_p) # Instead of training from scratch, perform online mirror descent model_t = mtp_utils.w_optimal_gradient_ascent( model=model_t, thetas=theta_curr, num_points_seen_virtually=points_seen_count, method=args.method, lr=args.oga_lr, weight_decay=args.oga_weight_decay, # Not sure if should be same weight decay # when model was pre-trained # Or a larger value to prevent model weights from exploding # weight_decay=args.pretrain_weight_decay, iters=args.iters, verbose=args.verbose_oga) # Line 5: Add (x*, y*) to D_p D_p[0].append(x_opt.cpu()) D_p[1].append(y_opt.cpu()) points_seen_count += 1 # Log some information about x*, y* pred_t, pred_p = model_t(x_opt), model_p(x_opt) print( utils.cyan_print("Mt(x*): %d, Mp(x*): %d, y*: %d" % (pred_t.argmax(1), pred_p.argmax(1), y_opt))) # Line 6: Get ready to check condition condition = stop_cond(args=args, best_loss=best_loss, num_iters=num_iters, model_t=model_t, model_p=model_p, tst_sub_acc=tst_sub_acc, norm_diffs=norm_diffs) # Keep track of no. of iterations num_iters += 1 print() # Line 7: Return poison data return D_p, model_t
args.verbose_opt = True args.verbose_precomp = True # Print all arguments utils.flash_utils(args) # Load target model theta_p theta_p = dnn_utils.model_helper(args.model_arch)() theta_p = dnn_utils.multi_gpu_wrap(theta_p) theta_p.load_state_dict(ch.load(args.poison_model_path)) theta_p.eval() # Report performance of poisoned model train_loader, test_loader = datasets.dataset_helper( args.dataset)().get_loaders(args.batch_size) clean_acc, _ = dnn_utils.get_model_metrics(theta_p, test_loader) print(utils.yellow_print("[Poisoned-model] Total Acc: %.4f" % clean_acc)) _, clean_total_loss = dnn_utils.get_model_metrics(theta_p, train_loader) print( utils.yellow_print("[Poisoned-model] Loss on train: %.4f" % clean_total_loss)) # Report weight norm for poisoned model poisoned_norm = dnn_utils.get_model_l2_norm(theta_p).item() print( utils.yellow_print("[Poisoned-model] Weights norm: %.4f" % poisoned_norm)) print() for valid_theta_err in args.theta_values: args.err_threshold = valid_theta_err
def modelTargetPoisoningEnsemble(models_p, logger, args): # Implementation of Algorithm 1, modified for DNNs # Line number corresponding to the Algorithm is mentioned # Along with each high-level function call # Fetch appropriate dataset ds = datasets.dataset_helper("memory")(path=args.path_1) # Maintain copy of clean data (for seed sampling) ds_clean = datasets.dataset_helper("memory")(path=args.path_1) # Data to pick points from (for x* optimization) ds_second = datasets.dataset_helper("memory")(path=args.path_2) loader_optim, _ = ds_second.get_loaders(1000) # Line 1: Collect poisoning points D_p = [[], []] # Line 3: Since D_p is empty in first iteration, simply train it outside model_t_pretrained, pretrain_optim = mtp_utils.train_clean_model(ds, args) # Report performance of clean model batch_size = args.batch_size if batch_size == -1: batch_size = len(ds.train) train_loader, test_loader = ds.get_loaders(batch_size) clean_acc, clean_total_loss = dnn_utils.get_model_metrics( model_t_pretrained, test_loader, lossfn=args.loss) print(utils.yellow_print("[Clean-model][Test] Total Acc: %.4f" % clean_acc)) print( utils.yellow_print("[Clean-model] Loss on train: %.4f" % clean_total_loss)) (population_acc, _), (non_population_acc, _) = dnn_utils.get_model_metrics(model_t_pretrained, test_loader, lossfn=args.loss, target_prop=args.poison_class) print( utils.red_print("[Clean-model][Test] Population Acc: %.4f" % population_acc)) print( utils.red_print("[Clean-model][Test] Non- Population Acc: %.4f" % non_population_acc)) print() # Line 2: Iterate until stopping criteria met tst_sub_acc = 1.0 best_loss = np.inf num_iters = 0 condition = True while condition: if len(D_p[0]) > 0: # Line 3: theta_t = train(D_c U D_p) print( utils.yellow_print("[Training model on Dc U Dp " "(on %d samples)]" % len(ds.train))) # Get loader for D_c U D_p batch_size = args.batch_size if batch_size == -1: batch_size = len(ds.train) data_loader, _ = ds.get_loaders(batch_size) # Do not re-initialize model if finetuning requested if not args.finetune: # Construct model model_t = dnn_utils.get_seeded_wrapped_model( args, n_classes=ds.n_classes) else: # Start finetuning from the point where model # has seen only clean data model_t = copy.deepcopy(model_t_pretrained) # Set model to training mode model_t.train() # Define optimizer optim = ch.optim.Adam(model_t.parameters(), lr=args.pretrain_lr, weight_decay=args.pretrain_weight_decay) # Adjust starting point of optimizer # if finetuning is requested if args.finetune: optim.load_state_dict(pretrain_optim.state_dict()) # Increase numer of iterations theta_t is trained for # as size of its training set |D_c U D_p| increases iters = args.iters if args.increase_iters: iters += int((len(ds.train) - len(ds_clean.train)) / args.increase_every) # Train model for e in range(iters): # Train epoch dnn_utils.epoch(model=model_t, loader=data_loader, optimizer=optim, epoch_num=e + 1, c_rule=None, n_classes=None, verbose=True, lossfn=args.loss) else: model_t = model_t_pretrained # Make sure theta_t is in eval mode model_t.eval() # Line 4: Compute (x*, y*) if args.optim_type == "lookup": # Loss-difference based lookup method (x_opt, y_opt), best_loss = mtp_utils.lookup_based_optimal( theta_t=model_t, theta_p=models_p, loader=loader_optim, n_classes=ds_second.n_classes, random=args.random, lossfn=args.loss, filter=args.filter, verbose=True, ensemble_p=True) elif args.optim_type == "dataset_grad": # Dataset-gradient alignment loss based optimization (x_opt, y_opt), best_loss = mtp_utils.dataset_grad_optimal( theta_t=model_t, theta_p=models_p, input_shape=ds_second.datum_shape, n_classes=ds_second.n_classes, trials=args.optim_trials, ds=ds, num_steps=args.optim_steps, step_size=args.optim_lr, verbose=True, signed=args.signed, ensemble_p=True, batch_sample_estimate=args.batch_sample_estimate) elif args.optim_type == "loss_difference": # Loss difference based optimization (x_opt, y_opt), best_loss = mtp_utils.find_optimal_using_optim( theta_t=model_t, theta_p=models_p, input_shape=ds_second.datum_shape, n_classes=ds_second.n_classes, num_steps=args.optim_steps, trials=args.optim_trials, step_size=args.optim_lr, filter=args.filter, verbose=True, ensemble_p=True) else: raise NotImplemented("Loss optimization method not implemented") # Log some information about x*, y* with ch.no_grad(): pred_t = model_t(x_opt) preds_t = ",".join( [str(mp(x_opt).argmax(1).item()) for mp in models_p]) print( utils.cyan_print("Mt(x*): %d, Mp(x*): %s, y*: %d" % (pred_t.argmax(1), preds_t, y_opt))) # Set n_copies dynamically, if requested n_copies = args.n_copies if args.dynamic_repeat: n_copies = mtp_utils.dynamic_n(tst_sub_acc, args.n_copies) # Line 5: Add (x*, y*) to D_p for _ in range(args.n_copies): D_p[0].append(x_opt.cpu()) D_p[1].append(y_opt.cpu()) ds.add_point_to_train(x_opt.cpu(), y_opt.cpu()) print() # Calculate useful statistics (tst_sub_acc, _), (tst_nsub_acc, _) = dnn_utils.get_model_metrics(model=model_t, loader=test_loader, target_prop=args.poison_class, lossfn=args.loss) (trn_sub_acc, _), (trn_nsub_acc, _) = dnn_utils.get_model_metrics(model=model_t, loader=train_loader, target_prop=args.poison_class, lossfn=args.loss) norm_diffs = dnn_utils.model_l2_closeness(model_t, models_p, ensemble=True) # Log information mtp_utils.log_information(logger=logger, best_loss=best_loss, x_opt=x_opt, norm_diffs=norm_diffs, trn_sub_acc=trn_sub_acc, trn_nsub_acc=trn_nsub_acc, tst_sub_acc=tst_sub_acc, tst_nsub_acc=tst_nsub_acc, num_iters=num_iters + 1, args=args, label=y_opt) # Line 6: Get ready to check condition condition = stop_cond(args=args, best_loss=best_loss, num_iters=num_iters, tst_sub_acc=tst_sub_acc, norm_diffs=norm_diffs) # Keep track of no. of iterations num_iters += 1 # Line 7: Return poison data return D_p, model_t
def indiscriminateAttack(logger, wanted_errors, args): # Fetch appropriate dataset ds = datasets.dataset_helper("memory")(path=args.path_1) # Maintain copy of clean data (for seed sampling) ds_clean = datasets.dataset_helper("memory")(path=args.path_1) # Data to pick points from (for x* optimization) ds_second = datasets.dataset_helper("memory")(path=args.path_2) loader_optim, _ = ds_second.get_loaders(1000) # Line 1: Collect poisoning points D_p = [[], []] # Line 3: Since D_p is empty in first iteration, simply train it outside model_t_pretrained, pretrain_optim = mtp_utils.train_clean_model(ds, args) # Report performance of clean model batch_size = args.batch_size if batch_size == -1: batch_size = len(ds.train) train_loader, test_loader = ds.get_loaders(batch_size) clean_acc, _ = dnn_utils.get_model_metrics(model_t_pretrained, test_loader, lossfn=args.loss) print(utils.yellow_print("[Clean-model] Total Acc: %.4f" % clean_acc)) _, clean_total_loss = dnn_utils.get_model_metrics(model_t_pretrained, train_loader, lossfn=args.loss) print( utils.yellow_print("[Clean-model] Loss on train: %.4f" % clean_total_loss)) print() # Keep track of which errors have been achieved so far achieved_so_far = 0 # Line 2: Iterate until stopping criteria met best_loss = np.inf num_iters = 0 while achieved_so_far < len(wanted_errors): if len(D_p[0]) > 0: # Line 3: theta_t = train(D_c U D_p) print( utils.yellow_print("[Training model on Dc U Dp " "(on %d samples)]" % len(ds.train))) # Get loader for D_c U D_p batch_size = args.batch_size if batch_size == -1: batch_size = len(ds.train) data_loader, _ = ds.get_loaders(batch_size) # Do not re-initialize model if finetuning requested if not args.finetune: # Construct model model_t = dnn_utils.get_seeded_wrapped_model( args, n_classes=ds.n_classes) else: # Start finetuning from the point where model # has seen only clean data model_t = copy.deepcopy(model_t_pretrained) # Set model to training mode model_t.train() # Define optimizer optim = ch.optim.Adam(model_t.parameters(), lr=args.pretrain_lr, weight_decay=args.pretrain_weight_decay) # Adjust starting point of optimizer # if finetuning is requested if args.finetune: optim.load_state_dict(pretrain_optim.state_dict()) # Increase numer of iterations theta_t is trained for # as size of its training set |D_c U D_p| increases iters = args.iters if args.increase_iters: iters += int((len(ds.train) - len(ds_clean.train)) / args.increase_every) # Train model for e in range(iters): # Train epoch dnn_utils.epoch(model=model_t, loader=data_loader, optimizer=optim, epoch_num=e + 1, c_rule=None, n_classes=None, verbose=True, lossfn=args.loss) else: model_t = model_t_pretrained # Make sure theta_t is in eval mode model_t.eval() # Line 4: Compute (x*, y*) (x_opt, y_opt), best_loss = lookup_based_optimal(theta_t=model_t, loader=loader_optim, lossfn=args.loss, filter=args.filter, n_classes=ds.n_classes, verbose=True) # Log some information about x*, y* with ch.no_grad(): pred_t = model_t(x_opt) print( utils.cyan_print("Loss: %.3f Mt(x*): %d, y*: %d" % (best_loss.item(), pred_t.argmax(1), y_opt))) # Line 5: Add (x*, y*) to D_p for _ in range(args.n_copies): D_p[0].append(x_opt.cpu()) D_p[1].append(y_opt.cpu()) ds.add_point_to_train(x_opt.cpu(), y_opt.cpu()) print() # Calculate useful statistics (tst_acc, _) = dnn_utils.get_model_metrics(model=model_t, loader=test_loader, lossfn=args.loss) (trn_acc, _) = dnn_utils.get_model_metrics(model=model_t, loader=train_loader, lossfn=args.loss) # Log information # Log optimized image logger.add_image("X*", x_opt[0], (num_iters + 1) * args.n_copies) # Log weight Norm logger.add_scalar("Weight norm", dnn_utils.get_model_l2_norm(model_t).item(), global_step=(num_iters + 1) * args.n_copies) # Log population accuracies on train, test data logger.add_scalar("[Train] Accuracy", trn_acc, global_step=(num_iters + 1) * args.n_copies) logger.add_scalar("[Test] Accuracy", tst_acc, global_step=(num_iters + 1) * args.n_copies) # Log best loss logger.add_scalar("Loss on x*,y*", best_loss.item(), global_step=(num_iters + 1) * args.n_copies) # Keep track of no. of iterations num_iters += 1 # If wanted error achieved, switch to next goal: if (1 - trn_acc) > wanted_errors[achieved_so_far]: # Save current model model_name = "seed-{}_error-{}_testacc-{}.pth".format( args.seed, wanted_errors[achieved_so_far], tst_acc) ch.save( copy.deepcopy(model_t).state_dict(), os.path.join(args.save_dir, model_name)) print( utils.pink_print("Achieved %.3f loss!" % wanted_errors[achieved_so_far])) achieved_so_far += 1
def train_model(model, loaders, epochs, c_rule, n_classes, save_path=None, corrupt_class=None, lr=1e-3, save_option='last', weight_decay=0.09, poison_ratio=1.0, verbose=True, no_val=False, get_metrics_at_epoch_end=None, clean_train_loader=None, study_mode=False, loss_fn="ce"): if save_path is None: save_option = 'none' if save_option not in ['best', 'last', 'none']: raise ValueError("Model-saving mode must be best/last/none") if save_option == 'best' and no_val: raise ValueError( "Cannot identify best-val-loss model if val loss not computed") optim = ch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) train_loader, val_loader = loaders # Define for first epoch train_loader_use = train_loader best_loss, best_vacc = np.inf, 0.0 best_model = None if study_mode: collect_stats = [] iterator = range(epochs) if not verbose: iterator = tqdm(iterator) for e in iterator: # Train epoch tr_loss, _ = epoch(model, train_loader_use, optim, e + 1, c_rule, n_classes, corrupt_class=corrupt_class, poison_ratio=poison_ratio, verbose=verbose, lossfn=loss_fn) if not no_val: # Validation epoch (loss, acc) = epoch(model, val_loader, None, e + 1, c_rule, n_classes, verbose=verbose, lossfn=loss_fn) if verbose or study_mode: if get_metrics_at_epoch_end is not None: (prop_acc, _), (noprop_acc, _) = get_model_metrics( model, clean_train_loader, target_prop=get_metrics_at_epoch_end, lossfn=loss_fn) print( utils.yellow_print( "[Train] Population acc: %.4f, Non-population acc: %.4f" % (prop_acc, noprop_acc))) (val_prop_acc, _), (val_noprop_acc, _) = get_model_metrics( model, val_loader, target_prop=get_metrics_at_epoch_end, lossfn=loss_fn) print( utils.yellow_print( "[Val] Population acc: %.4f, Non-population acc: %.4f" % (val_prop_acc, val_noprop_acc))) norm = get_model_l2_norm(model).item() print(utils.yellow_print("[Model] R(w): %.3f" % norm)) if study_mode: stats = { "train_prop_acc": 100 * prop_acc, "train_noprop_acc": 100 * noprop_acc, "val_prop_acc": 100 * val_prop_acc, "val_noprop_acc": 100 * val_noprop_acc, "norm": norm, # 100 scal for binary, 50 for multiclass # Scaling to visualize better "lossx100": 100 * tr_loss, "lossx50": 50 * tr_loss } collect_stats.append(stats) print() # Keep track of checkpoint with best validation loss so far # If option is picked if save_option == 'best': if loss < best_loss: best_model = copy.deepcopy(model) best_loss, best_vacc = loss, acc # Intervention on batch ordering train_loader_use = find_optimal_batch_order(model, train_loader, clean_train_loader, lr, weight_decay, get_metrics_at_epoch_end) # Save latest model state, if this option is picked if save_option == 'last': best_model = model if save_option != 'none': ch.save(best_model.state_dict(), os.path.join(save_path)) # Keep track of everything, if asked if study_mode: return model, best_loss, best_vacc, collect_stats # Return best validation metrics return model, best_loss, best_vacc
callable_ds = datasets.dataset_helper(args.dataset) n_classes = callable_ds().n_classes # Construct model model = get_seeded_wrapped_model(args, n_classes=n_classes) # Train model if args.study_mode: model, all_stats = train_poisoned_model(model, callable_ds, ratio, args) else: model = train_poisoned_model(model, callable_ds, ratio, args) # Compute metrics for said model train_loader, val_loader = callable_ds().get_loaders(512) _, train_loss = get_model_metrics(model, train_loader) test_acc, _ = get_model_metrics(model, val_loader) (trn_sub_acc, _), (trn_nsub_acc, _) = get_model_metrics(model, train_loader, args.poison_class) (tst_sub_acc, _), (tst_nsub_acc, _) = get_model_metrics(model, val_loader, args.poison_class) # Print accuracies on target/non-target data # On seen (train) and unseen (val) data if not args.use_given_data: print(utils.pink_print("Ratio %.3f" % (ratio))) print("Total Acc: %.3f" % test_acc) print('Train Target Acc : %.3f' % trn_sub_acc)
def modelTargetPoisoning(models_p, logger, args): # Implementation of Algorithm 1, modified for DNNs # Line number corresponding to the Algorithm is mentioned # Along with each high-level function call # Fetch appropriate dataset ds = datasets.dataset_helper("memory")(path=args.path_1) # Maintain copy of clean data (for seed sampling) ds_clean = datasets.dataset_helper("memory")(path=args.path_1) # Data to pick points from (for x* optimization) ds_second = datasets.dataset_helper("memory")(path=args.path_2) loader_optim, _ = ds_second.get_loaders(1000) # Line 1: Collect poisoning points D_p = [[], []] # Line 3: Since D_p is empty in first iteration, simply train it outside models_t_pretrained = [] for seed in args.seeds: args.seed = seed print(utils.yellow_print("Printing model with seed %d" % args.seed)) model_t_pretrained, _ = mtp_utils.train_clean_model(ds, args) models_t_pretrained.append(model_t_pretrained) # Report performance of clean model batch_size = len(ds.train) train_loader, test_loader = ds.get_loaders(batch_size) clean_accs, clean_total_losses = [], [] population_accs, non_population_accs = [], [] for model_t_pretrained in models_t_pretrained: clean_acc, clean_total_loss = dnn_utils.get_model_metrics( model_t_pretrained, test_loader, lossfn=args.loss) clean_accs.append(clean_acc) clean_total_losses.append(clean_total_loss) (population_acc, _), (non_population_acc, _) = dnn_utils.get_model_metrics(model_t_pretrained, test_loader, lossfn=args.loss, target_prop=args.poison_class) population_accs.append(population_acc) non_population_accs.append(non_population_acc) print( utils.yellow_print("[Clean-model][Test] Total Acc: %.4f" % np.mean(clean_accs))) print( utils.yellow_print("[Clean-model] Loss on train: %.4f" % np.mean(clean_total_losses))) print( utils.red_print("[Clean-model][Test] Population Acc: %.4f" % np.mean(population_accs))) print( utils.red_print("[Clean-model][Test] Non-Population Acc: %.4f" % np.mean(non_population_accs))) print() # Line 2: Iterate until stopping criteria met best_loss = np.inf num_iters = 0 condition = True while condition: if len(D_p[0]) > 0: # Line 3: theta_t = train(D_c U D_p) print( utils.yellow_print("[Training model on Dc U Dp " "(on %d samples)]" % len(ds.train))) # Get loader for D_c U D_p batch_size = len(ds.train) data_loader, _ = ds.get_loaders(batch_size) # Increase numer of iterations theta_t is trained for # as size of its training set |D_c U D_p| increases iters = args.iters if args.increase_iters: iters += int((len(ds.train) - len(ds_clean.train)) / args.increase_every) # Construct model models_t = [] for seed in args.seeds: args.seed = seed model_t = dnn_utils.get_seeded_wrapped_model( args, n_classes=ds.n_classes) # Set model to training mode model_t.train() # Define optimizer optim = ch.optim.Adam(model_t.parameters(), lr=args.pretrain_lr, weight_decay=args.pretrain_weight_decay) # Train model print( utils.yellow_print("Printing model with seed %d" % args.seed)) for e in range(iters): # Train epoch dnn_utils.epoch(model=model_t, loader=data_loader, optimizer=optim, epoch_num=e + 1, c_rule=None, n_classes=None, verbose=True, lossfn=args.loss) models_t.append(model_t) else: models_t = models_t_pretrained # Make sure theta_t are in eval mode for model_t in models_t: model_t.eval() # Line 4: Compute (x*, y*) (x_opt, y_opt), best_loss = mtp_utils.lookup_based_optimal( theta_t=models_t, theta_p=models_p, loader=loader_optim, n_classes=ds_second.n_classes, random=args.random, lossfn=args.loss, filter=args.filter, verbose=True, ensemble_t=True, ensemble_p=True, pick_optimal=args.pick_optimal) # Log some information about x*, y* with ch.no_grad(): preds_p = [ str(model_p(x_opt).argmax(1).item()) for model_p in models_p ] preds_t = [ str(model_t(x_opt).argmax(1).item()) for model_t in models_t ] print( utils.cyan_print("Loss: %.3f Mt(x*): %s, Mp(x*): %s, y*: %d" % (best_loss.item(), ",".join(preds_t), ",".join(preds_p), y_opt))) # Line 5: Add (x*, y*) to D_p for _ in range(args.n_copies): D_p[0].append(x_opt.cpu()) D_p[1].append(y_opt.cpu()) ds.add_point_to_train(x_opt.cpu(), y_opt.cpu()) print() # Calculate useful statistics tst_sub_accs, tst_nsub_accs = [], [] trn_sub_accs, trn_nsub_accs = [], [] for model_t in models_t: (tst_sub_acc, _), (tst_nsub_acc, _) = dnn_utils.get_model_metrics( model=model_t, loader=test_loader, target_prop=args.poison_class, lossfn=args.loss) tst_sub_accs.append(tst_sub_acc) tst_nsub_accs.append(tst_nsub_acc) (trn_sub_acc, _), (trn_nsub_acc, _) = dnn_utils.get_model_metrics( model=model_t, loader=train_loader, target_prop=args.poison_class, lossfn=args.loss) trn_sub_accs.append(trn_sub_acc) trn_nsub_accs.append(trn_nsub_acc) # Get mean of these metrics trn_sub_acc = np.mean(trn_sub_accs) tst_sub_acc = np.mean(tst_sub_accs) trn_nsub_acc = np.mean(trn_nsub_accs) tst_nsub_acc = np.mean(tst_nsub_accs) # Log information mtp_utils.log_information(logger=logger, best_loss=best_loss, x_opt=x_opt, norm_diffs=None, trn_sub_acc=trn_sub_acc, trn_nsub_acc=trn_nsub_acc, tst_sub_acc=tst_sub_acc, tst_nsub_acc=tst_nsub_acc, num_iters=num_iters + 1, args=args, label=y_opt) # Line 6: Get ready to check condition condition = stop_cond(args=args, best_loss=best_loss, num_iters=num_iters, tst_sub_acc=tst_sub_acc) # Keep track of no. of iterations num_iters += 1 # Line 7: Return poison data return D_p, models_t
def find_optimal_batch_order(model, train_loader, eval_loader, lr, weight_decay, poison_class): """ Assuming white-box access to a model, return ordering of datap batches that would get the model closest to desired attacker objective. """ print("Reordering batches") batch_losses = [] batch_data = [] loss_fn = nn.CrossEntropyLoss() for (x, y) in tqdm(train_loader): # Create temporary clone of model model_ = copy.deepcopy(model) # Create temporary optimizer optim = ch.optim.Adam(model_.parameters(), lr=lr, weight_decay=weight_decay) model_.train() optim.zero_grad() batch_data.append((x, y)) # Simulate training on only this batch of data x, y = x.cuda(), y.cuda() logits = model_(x) loss = loss_fn(logits, y) loss.backward() optim.step() # Record loss on target sub-population (_, prop_loss), (_, noprop_loss) = get_model_metrics( model_, eval_loader, target_prop=poison_class) batch_losses.append(prop_loss) batch_losses = np.array(batch_losses) # Oscillating out-in # order_ = np.argsort(batch_losses) # o1 = list(order_[len(order_)//2:][::-1]) # o2 = list(order_[:len(order_)//2]) # order = np.empty((len(order_),), dtype=int) # order[0::2] = o1 # order[1::2] = o2 # Oscillating in-out # order = order[::-1] # Low->High setting order = np.argsort(batch_losses) # High->Low setting # order = np.argsort(-batch_losses) # Create new loader with this order of batches new_loader = np.array(batch_data, dtype=object) new_loader = new_loader[order] return new_loader