def generateRandomMutant(self) -> Tuple[Pattern, Occurrence, str, str]: """ Mutates a line in the code by choosing random pattern then a random line When running mutants sequentially and we want to control mutant order, it would not be a good idea to use this call as it gives a random mutant. It is a better idea to set up your own runs with the other API methods. However, this can be used as an example for what this class can do. """ # Reset pattern_generator to all patterns self.pattern_generator = self.mutation_patterns.copy() # Go through every pattern to find an occurrence while len(self.pattern_generator) > 0: pattern = self.nextRandomPattern() occurrences = self.findOccurrences(pattern) if len(occurrences) != 0: occurrence = self.rng.choice(occurrences) original_line, mutated_line = self.mutate(occurrence) return pattern, occurrence, original_line, mutated_line utils.red_print( "Could not create a mutant. Please make sure it is a C file.") utils.red_print("You may need to indent your C file.") return None
def train_poisoned_model(model, callable_ds, poison_ratio, args): # Poison data once at the start, train model normal afterwards ds = callable_ds() ds_clean = callable_ds() indices = None if args.use_given_data: print(utils.yellow_print("Using given data")) poison_data = np.load(args.poison_path) poison_x, poison_y = poison_data['x'], poison_data['y'] ds.add_poison_data(poison_x, poison_y) else: ds.poison_train_data(args.poison_class, poison_ratio, args.c_rule, selection=indices, save_data=args.save_poisoned_data, offset=args.offset) print("Training on %d samples" % len(ds.train)) print( utils.red_print("%d additional points" % (len(ds.train) - len(ds_clean.train)))) batch_size = args.batch_size shuffle = True if batch_size == -1: batch_size = len(ds.train) shuffle = False train_loader, val_loader = ds.get_loaders(batch_size, shuffle=shuffle) clean_train_loader, _ = ds_clean.get_loaders(batch_size, shuffle=shuffle) return_data = train_model(model, (train_loader, val_loader), epochs=args.epochs, c_rule=args.c_rule, n_classes=ds.n_classes, weight_decay=args.weight_decay, lr=args.lr, verbose=args.verbose, no_val=True, get_metrics_at_epoch_end=args.poison_class, clean_train_loader=clean_train_loader, study_mode=args.study_mode, loss_fn=args.loss) if args.study_mode: model, _, _, all_stats = return_data else: model, _, _ = return_data if args.study_mode: return model, all_stats return model
def stop_cond(args, best_loss, num_iters, tst_sub_acc): ol_lr_threshold = args.incre_tol_par if args.fixed_budget <= 0: if args.require_acc: # Define stop condition stop_cond = tst_sub_acc > 1 - args.err_threshold print( utils.red_print("Current accuracy on population (test): %.4f" % tst_sub_acc)) else: # Define stop condition stop_cond = best_loss > ol_lr_threshold else: stop_cond = num_iters < args.fixed_budget return stop_cond
def stop_cond(args, best_loss, num_iters, tst_sub_acc, norm_diffs): ol_lr_threshold = args.incre_tol_par if args.fixed_budget <= 0: if args.require_acc: # Define stop condition stop_cond = tst_sub_acc > 1 - args.err_threshold print( utils.red_print("Current accuracy on population (test): %.4f" % tst_sub_acc)) else: if args.online_alg_criteria == "max_loss": current_tol_par = best_loss else: # use the euclidean distance as the stop criteria current_tol_par = norm_diffs # Define stop condition stop_cond = current_tol_par > ol_lr_threshold else: stop_cond = num_iters < args.fixed_budget return stop_cond
def modelTargetPoisoning(model_p, logger, args): # Implementation of Algorithm 1, modified for DNNs # Line number corresponding to the Algorithm is mentioned # Along with each high-level function call # Fetch appropriate dataset ds = datasets.dataset_helper(args.dataset)() # Maintain copy of clean data (for seed sampling) ds_clean = datasets.dataset_helper(args.dataset)() # Line 1: Collect poisoning points D_p = [[], []] # Load poison data, if provided if args.poison_data: print(utils.green_print("Loading poison data")) data = np.load("./data/poison_data/poison_data.npz") # Normalize to 0-1 for use by model all_poison_data_x = ch.from_numpy(data['x']).float() / 255. all_poison_data_x = ch.unsqueeze(all_poison_data_x, 1) all_poison_data_y = ch.from_numpy(data['y']) # Line 3: Since D_p is empty in first iteration, simply train it outside model_t_pretrained, pretrain_optim = mtp_utils.train_clean_model(ds, args) # Report performance of clean model batch_size = args.batch_size if batch_size == -1: batch_size = len(ds.train) train_loader, test_loader = ds.get_loaders(batch_size) clean_acc, clean_total_loss = dnn_utils.get_model_metrics( model_t_pretrained, test_loader, lossfn=args.loss) print(utils.yellow_print("[Clean-model][Test] Total Acc: %.4f" % clean_acc)) print( utils.yellow_print("[Clean-model] Loss on train: %.4f" % clean_total_loss)) (population_acc, _), (non_population_acc, _) = dnn_utils.get_model_metrics(model_t_pretrained, test_loader, lossfn=args.loss, target_prop=args.poison_class) print( utils.red_print("[Clean-model][Test] Population Acc: %.4f" % population_acc)) print( utils.red_print("[Clean-model][Test] Non- Population Acc: %.4f" % non_population_acc)) print() # Line 2: Iterate until stopping criteria met prev_loss, best_loss = np.inf, np.inf num_iters = 0 condition = True while condition: if len(D_p[0]) > 0: # Line 3: theta_t = train(D_c U D_p) print( utils.yellow_print("[Training model on Dc U Dp " "(on %d samples)]" % len(ds.train))) # Get loader for D_c U D_p batch_size = args.batch_size if batch_size == -1: batch_size = len(ds.train) data_loader, _ = ds.get_loaders(batch_size) # Do not re-initialize model if finetuning requested if not args.finetune: # Construct model model_t = dnn_utils.get_seeded_wrapped_model( args, n_classes=ds.n_classes) else: # Start finetuning from the point where model # has seen only clean data model_t = copy.deepcopy(model_t_pretrained) # Set model to training mode model_t.train() # Define optimizer optim = ch.optim.Adam(model_t.parameters(), lr=args.pretrain_lr, weight_decay=args.pretrain_weight_decay) # Adjust starting point of optimizer # if finetuning is requested if args.finetune: optim.load_state_dict(pretrain_optim.state_dict()) # Increase numer of iterations theta_t is trained for # as size of its training set |D_c U D_p| increases iters = args.iters if args.increase_iters: iters += int((len(ds.train) - len(ds_clean.train)) / args.increase_every) # Train model for e in range(iters): # Train epoch dnn_utils.epoch(model=model_t, loader=data_loader, optimizer=optim, epoch_num=e + 1, c_rule=None, n_classes=None, verbose=True, lossfn=args.loss) else: model_t = model_t_pretrained # Make sure theta_t is in eval mode model_t.eval() start_with = None if args.start_opt_real: # If flag set, start with real data sampled from # (unpoisoned) train loader batch_size = args.batch_size if batch_size == -1: batch_size = len(ds.train) loader, _ = ds_clean.get_loaders(batch_size) start_with = datasets.get_sample_from_loader( loader, args.trials, ds_clean.n_classes) elif args.poison_data: # Sample 'num-trials' data from this perm = ch.randperm(all_poison_data_x.size(0)) idx = perm[:args.trials] start_with = (all_poison_data_x[idx], all_poison_data_y[idx]) # Line 4: Compute (x*, y*) if args.use_optim_for_optimal: find_optimal_function = mtp_utils.find_optimal_using_optim else: find_optimal_function = mtp_utils.find_optimal (x_opt, y_opt), best_loss = find_optimal_function(theta_t=model_t, theta_p=model_p, input_shape=ds.datum_shape, n_classes=ds.n_classes, trials=args.trials, num_steps=args.num_steps, step_size=args.optim_lr, verbose=True, start_with=start_with, lossfn=args.loss, dynamic_lr=args.dynamic_lr, filter=args.filter) # If loss increased, try optimization once more # With double trials, to reduce chance of bad minima if args.skip_bad and best_loss > prev_loss: print(utils.red_print("Re-running optimization with more seeds")) (x_opt, y_opt), best_loss = find_optimal_function( theta_t=model_t, theta_p=model_p, input_shape=ds.datum_shape, n_classes=ds.n_classes, trials=args.trials * 2, num_steps=args.num_steps, step_size=args.optim_lr, verbose=True, start_with=start_with, lossfn=args.loss, dynamic_lr=args.dynamic_lr) # Log some information about x*, y* with ch.no_grad(): pred_t, pred_p = model_t(x_opt), model_p(x_opt) if pred_t.argmax(1) == y_opt.item(): print(utils.red_print("[BAD OPTIMIZATION. CHECK]")) print( utils.cyan_print( "Loss: %.3f Mt(x*): %d, Mp(x*): %d, y*: %d" % (best_loss.item(), pred_t.argmax(1), pred_p.argmax(1), y_opt))) # Line 5: Add (x*, y*) to D_p for _ in range(args.n_copies): D_p[0].append(x_opt.cpu()) D_p[1].append(y_opt.cpu()) ds.add_point_to_train(x_opt.cpu(), y_opt.cpu()) print() # Calculate useful statistics (tst_sub_acc, _), (tst_nsub_acc, _) = dnn_utils.get_model_metrics(model=model_t, loader=test_loader, target_prop=args.poison_class, lossfn=args.loss) (trn_sub_acc, _), (trn_nsub_acc, _) = dnn_utils.get_model_metrics(model=model_t, loader=train_loader, target_prop=args.poison_class, lossfn=args.loss) norm_diffs = dnn_utils.model_l2_closeness(model_t, model_p) # Log information mtp_utils.log_information(logger=logger, best_loss=best_loss, x_opt=x_opt, model_t=model_t, norm_diffs=norm_diffs, trn_sub_acc=trn_sub_acc, trn_nsub_acc=trn_nsub_acc, tst_sub_acc=tst_sub_acc, tst_nsub_acc=tst_nsub_acc, num_iters=num_iters + 1, args=args) # Line 6: Get ready to check condition condition = stop_cond(args=args, best_loss=best_loss, num_iters=num_iters, tst_sub_acc=tst_sub_acc, norm_diffs=norm_diffs) # Keep track of no. of iterations num_iters += 1 # Keep track of loss from previous iteration prev_loss = best_loss.item() # Line 7: Return poison data return D_p, model_t
def run_task(task, args, config): """ Runs a mutation testing task with settings in `args` and `config`. Collects results and produces CSV data. """ flash_command = config['flash_command'] port = args.port if args.port else utils.get_default_serial_port() os.environ['PORT'] = port timeout = int(args.timeout) csv = args.csv rng = random.Random(args.seed) mutation = mutator.Mutator(src=task['src'], mutation_patterns=task['patterns'], rng=rng) mutations_list = mutation.generateMutants( mutants_per_pattern=(task['mutants_per_pattern'] if 'mutants_per_pattern' in task else None), random=args.randomize) data_record = [] trials = [] test_to_kills = {} run_cnt = 0 nc = 0 total_failures = 0 mutant_cnt = int(args.mutants) failures_per_pattern = {} total_per_pattern = {} for mp in mutation.getPatterns(): if mp not in failures_per_pattern: failures_per_pattern[mp] = 0 if mp not in total_per_pattern: total_per_pattern[mp] = 0 # outer try is for finally generating csv if automation stops early try: for occurrence in mutations_list: if run_cnt == mutant_cnt: break mp = occurrence.pattern # mutate the code utils.yellow_print(occurrence) original_line, mutated_line = mutation.mutate(occurrence) file_changed = occurrence.file.rstrip(".old") line_number = occurrence.line # try is for catching compile failure to continue execution try: # cmake, build, flash, and read output, final_flag = flash_and_read(port, timeout, flash_command) # reaching here means success, so change counters run_cnt += 1 total_per_pattern[mp] += 1 # tests expected to catch tests_expected_to_catch = "N/A" if args.line_coverage: tests_expected_to_catch = ",".join( get_expected_catch(args.line_coverage, int(line_number))) # mutant_status can either be "FAIL", "PASS", "CRASH", "TIMEOUT" mutant_status = "FAIL" if final_flag == FLAGS.PassFlag: utils.red_print("Mutant is Alive") utils.red_print( "Tests that are expected to catch this mutant are: \n{}" .format(tests_expected_to_catch)) mutant_status = "PASS" else: failures_per_pattern[mp] += 1 total_failures += 1 utils.green_print("Mutant is Killed") if final_flag == FLAGS.CrashFlag: mutant_status = "CRASH" elif final_flag == "TIMEOUT": mutant_status = "TIMEOUT" # Analyze the output to count per test failures results = re.findall(TestRegEx, output) for group, test, result in results: if (group, test) not in test_to_kills: test_to_kills[(group, test)] = ( 1, 1) if result == 'FAIL' else (0, 1) else: kills, total = test_to_kills[(group, test)] test_to_kills[(group, test)] = ((kills + 1, total + 1) if result == 'FAIL' else (kills, total + 1)) # Add result to CSV queue trials.append({ 'file': file_changed, 'line': line_number, 'original': original_line, 'mutant': mutated_line, 'result': "{}/KILLED".format(mutant_status) if mutant_status != "PASS" else "PASS/LIVE", 'expected_catch': tests_expected_to_catch }) utils.yellow_print("Successful Mutant Runs: {}/{}".format( run_cnt, mutant_cnt)) except CompileFailed: utils.yellow_print("Cannot compile, discard and move on") nc += 1 finally: mutation.restore() except: traceback.print_exc() raise finally: mutation.cleanup() # calculate mutant score score = percentage(total_failures, run_cnt) utils.yellow_print("Score: {}%".format(score)) utils.yellow_print( "Alive: {} Killed: {} Mutants: {} No-Compile: {} Attempted Runs: {}" .format(run_cnt - total_failures, total_failures, run_cnt, nc, run_cnt + nc)) trials.append({ 'file': "RESULTS:", 'line': "{} NO-COMPILE".format(nc), 'mutant': "SCORE", 'original': "{} KILLED/{} MUTANTS".format(total_failures, run_cnt), 'result': "{}%".format(score) }) # aggregate pass/fail counts for each found test in test group aggregates = [] for group, test in test_to_kills: kills, total = test_to_kills[(group, test)] aggregates.append({ 'Group': group, 'Test': test, 'Fails': kills, 'Passes': total - kills, 'Total': total }) # pattern comparison for mp in total_per_pattern: data_record.append({ 'pattern': "{} => {}".format(mp.pattern, mp.transformation), 'failures': failures_per_pattern[mp], 'total': total_per_pattern[mp], 'percentage': float( percentage(failures_per_pattern[mp], total_per_pattern[mp])) * 0.01 if total_per_pattern[mp] > 0 else 2 }) # log to csv if csv: csv_path = os.path.join( dir_path, "csvs/{}/{}".format(current_date, current_time)) pattern_csv = os.path.join( csv_path, "{}_pattern_comparison.csv".format(task['name'])) trials_csv = os.path.join( csv_path, "{}_mutants_created.csv".format(task['name'])) per_test_csv = os.path.join( csv_path, "{}_test_aggregates.csv".format(task['name'])) to_csv(pattern_csv, ['pattern', 'failures', 'total', 'percentage'], data_record) to_csv(trials_csv, [ 'file', 'line', 'original', 'mutant', 'result', 'expected_catch' ], trials) to_csv(per_test_csv, ['Group', 'Test', 'Fails', 'Passes', 'Total'], aggregates)
def modelTargetPoisoningEnsemble(models_p, logger, args): # Implementation of Algorithm 1, modified for DNNs # Line number corresponding to the Algorithm is mentioned # Along with each high-level function call # Fetch appropriate dataset ds = datasets.dataset_helper("memory")(path=args.path_1) # Maintain copy of clean data (for seed sampling) ds_clean = datasets.dataset_helper("memory")(path=args.path_1) # Data to pick points from (for x* optimization) ds_second = datasets.dataset_helper("memory")(path=args.path_2) loader_optim, _ = ds_second.get_loaders(1000) # Line 1: Collect poisoning points D_p = [[], []] # Line 3: Since D_p is empty in first iteration, simply train it outside model_t_pretrained, pretrain_optim = mtp_utils.train_clean_model(ds, args) # Report performance of clean model batch_size = args.batch_size if batch_size == -1: batch_size = len(ds.train) train_loader, test_loader = ds.get_loaders(batch_size) clean_acc, clean_total_loss = dnn_utils.get_model_metrics( model_t_pretrained, test_loader, lossfn=args.loss) print(utils.yellow_print("[Clean-model][Test] Total Acc: %.4f" % clean_acc)) print( utils.yellow_print("[Clean-model] Loss on train: %.4f" % clean_total_loss)) (population_acc, _), (non_population_acc, _) = dnn_utils.get_model_metrics(model_t_pretrained, test_loader, lossfn=args.loss, target_prop=args.poison_class) print( utils.red_print("[Clean-model][Test] Population Acc: %.4f" % population_acc)) print( utils.red_print("[Clean-model][Test] Non- Population Acc: %.4f" % non_population_acc)) print() # Line 2: Iterate until stopping criteria met tst_sub_acc = 1.0 best_loss = np.inf num_iters = 0 condition = True while condition: if len(D_p[0]) > 0: # Line 3: theta_t = train(D_c U D_p) print( utils.yellow_print("[Training model on Dc U Dp " "(on %d samples)]" % len(ds.train))) # Get loader for D_c U D_p batch_size = args.batch_size if batch_size == -1: batch_size = len(ds.train) data_loader, _ = ds.get_loaders(batch_size) # Do not re-initialize model if finetuning requested if not args.finetune: # Construct model model_t = dnn_utils.get_seeded_wrapped_model( args, n_classes=ds.n_classes) else: # Start finetuning from the point where model # has seen only clean data model_t = copy.deepcopy(model_t_pretrained) # Set model to training mode model_t.train() # Define optimizer optim = ch.optim.Adam(model_t.parameters(), lr=args.pretrain_lr, weight_decay=args.pretrain_weight_decay) # Adjust starting point of optimizer # if finetuning is requested if args.finetune: optim.load_state_dict(pretrain_optim.state_dict()) # Increase numer of iterations theta_t is trained for # as size of its training set |D_c U D_p| increases iters = args.iters if args.increase_iters: iters += int((len(ds.train) - len(ds_clean.train)) / args.increase_every) # Train model for e in range(iters): # Train epoch dnn_utils.epoch(model=model_t, loader=data_loader, optimizer=optim, epoch_num=e + 1, c_rule=None, n_classes=None, verbose=True, lossfn=args.loss) else: model_t = model_t_pretrained # Make sure theta_t is in eval mode model_t.eval() # Line 4: Compute (x*, y*) if args.optim_type == "lookup": # Loss-difference based lookup method (x_opt, y_opt), best_loss = mtp_utils.lookup_based_optimal( theta_t=model_t, theta_p=models_p, loader=loader_optim, n_classes=ds_second.n_classes, random=args.random, lossfn=args.loss, filter=args.filter, verbose=True, ensemble_p=True) elif args.optim_type == "dataset_grad": # Dataset-gradient alignment loss based optimization (x_opt, y_opt), best_loss = mtp_utils.dataset_grad_optimal( theta_t=model_t, theta_p=models_p, input_shape=ds_second.datum_shape, n_classes=ds_second.n_classes, trials=args.optim_trials, ds=ds, num_steps=args.optim_steps, step_size=args.optim_lr, verbose=True, signed=args.signed, ensemble_p=True, batch_sample_estimate=args.batch_sample_estimate) elif args.optim_type == "loss_difference": # Loss difference based optimization (x_opt, y_opt), best_loss = mtp_utils.find_optimal_using_optim( theta_t=model_t, theta_p=models_p, input_shape=ds_second.datum_shape, n_classes=ds_second.n_classes, num_steps=args.optim_steps, trials=args.optim_trials, step_size=args.optim_lr, filter=args.filter, verbose=True, ensemble_p=True) else: raise NotImplemented("Loss optimization method not implemented") # Log some information about x*, y* with ch.no_grad(): pred_t = model_t(x_opt) preds_t = ",".join( [str(mp(x_opt).argmax(1).item()) for mp in models_p]) print( utils.cyan_print("Mt(x*): %d, Mp(x*): %s, y*: %d" % (pred_t.argmax(1), preds_t, y_opt))) # Set n_copies dynamically, if requested n_copies = args.n_copies if args.dynamic_repeat: n_copies = mtp_utils.dynamic_n(tst_sub_acc, args.n_copies) # Line 5: Add (x*, y*) to D_p for _ in range(args.n_copies): D_p[0].append(x_opt.cpu()) D_p[1].append(y_opt.cpu()) ds.add_point_to_train(x_opt.cpu(), y_opt.cpu()) print() # Calculate useful statistics (tst_sub_acc, _), (tst_nsub_acc, _) = dnn_utils.get_model_metrics(model=model_t, loader=test_loader, target_prop=args.poison_class, lossfn=args.loss) (trn_sub_acc, _), (trn_nsub_acc, _) = dnn_utils.get_model_metrics(model=model_t, loader=train_loader, target_prop=args.poison_class, lossfn=args.loss) norm_diffs = dnn_utils.model_l2_closeness(model_t, models_p, ensemble=True) # Log information mtp_utils.log_information(logger=logger, best_loss=best_loss, x_opt=x_opt, norm_diffs=norm_diffs, trn_sub_acc=trn_sub_acc, trn_nsub_acc=trn_nsub_acc, tst_sub_acc=tst_sub_acc, tst_nsub_acc=tst_nsub_acc, num_iters=num_iters + 1, args=args, label=y_opt) # Line 6: Get ready to check condition condition = stop_cond(args=args, best_loss=best_loss, num_iters=num_iters, tst_sub_acc=tst_sub_acc, norm_diffs=norm_diffs) # Keep track of no. of iterations num_iters += 1 # Line 7: Return poison data return D_p, model_t
# Different levels of verbose parser.add_argument('--verbose', action="store_true", help='If true, print everything') parser.add_argument('--verbose_pretrain', action="store_true", help='If true, print per-epoch training statistics') args = parser.parse_args() if args.verbose: args.verbose_pretrain = True try: wanted_errors = [float(x) for x in args.errors.split(",")] print(utils.red_print("Target error rates: %s" % str(wanted_errors))) except ValueError: raise ValueError("Wanted errors provided in invalid format") # Ensure directory exists where model will be saved utils.ensure_dir_exists(args.save_dir) # Print all arguments utils.flash_utils(args) # Prepare logger log_dir = os.path.join( args.log_path, "indiscriminate_" + str(args.n_copies) + "_" + str(args.seed)) utils.ensure_dir_exists(log_dir) logger = SummaryWriter(log_dir=log_dir, flush_secs=10)
# Purpose of this mode is just to train model once # Exit after that if args.use_given_data: exit(0) # Save current model model_name = "seed-{}_ratio-{}_loss-{}_bs-{}.pth".format( args.seed, ratio, train_loss, args.batch_size) ch.save( copy.deepcopy(model).state_dict(), os.path.join(model_dir, model_name)) print("Saved model to %s" % os.path.join(model_dir, model_name)) if tst_sub_acc <= args.attacker_goal and train_loss < best_loss: best_loss = train_loss best_model_obj = { "model": copy.deepcopy(model), "test_acc": test_acc, "train_loss": train_loss, "test_collat_acc": tst_nsub_acc, "test_target_acc": tst_sub_acc, "ratio": ratio } print( utils.yellow_print("Updated lowest train loss: %.4f" % train_loss)) if best_model_obj is None: print(utils.red_print("No model satisfied given adversary's goal!"))
def modelTargetPoisoning(models_p, logger, args): # Implementation of Algorithm 1, modified for DNNs # Line number corresponding to the Algorithm is mentioned # Along with each high-level function call # Fetch appropriate dataset ds = datasets.dataset_helper("memory")(path=args.path_1) # Maintain copy of clean data (for seed sampling) ds_clean = datasets.dataset_helper("memory")(path=args.path_1) # Data to pick points from (for x* optimization) ds_second = datasets.dataset_helper("memory")(path=args.path_2) loader_optim, _ = ds_second.get_loaders(1000) # Line 1: Collect poisoning points D_p = [[], []] # Line 3: Since D_p is empty in first iteration, simply train it outside models_t_pretrained = [] for seed in args.seeds: args.seed = seed print(utils.yellow_print("Printing model with seed %d" % args.seed)) model_t_pretrained, _ = mtp_utils.train_clean_model(ds, args) models_t_pretrained.append(model_t_pretrained) # Report performance of clean model batch_size = len(ds.train) train_loader, test_loader = ds.get_loaders(batch_size) clean_accs, clean_total_losses = [], [] population_accs, non_population_accs = [], [] for model_t_pretrained in models_t_pretrained: clean_acc, clean_total_loss = dnn_utils.get_model_metrics( model_t_pretrained, test_loader, lossfn=args.loss) clean_accs.append(clean_acc) clean_total_losses.append(clean_total_loss) (population_acc, _), (non_population_acc, _) = dnn_utils.get_model_metrics(model_t_pretrained, test_loader, lossfn=args.loss, target_prop=args.poison_class) population_accs.append(population_acc) non_population_accs.append(non_population_acc) print( utils.yellow_print("[Clean-model][Test] Total Acc: %.4f" % np.mean(clean_accs))) print( utils.yellow_print("[Clean-model] Loss on train: %.4f" % np.mean(clean_total_losses))) print( utils.red_print("[Clean-model][Test] Population Acc: %.4f" % np.mean(population_accs))) print( utils.red_print("[Clean-model][Test] Non-Population Acc: %.4f" % np.mean(non_population_accs))) print() # Line 2: Iterate until stopping criteria met best_loss = np.inf num_iters = 0 condition = True while condition: if len(D_p[0]) > 0: # Line 3: theta_t = train(D_c U D_p) print( utils.yellow_print("[Training model on Dc U Dp " "(on %d samples)]" % len(ds.train))) # Get loader for D_c U D_p batch_size = len(ds.train) data_loader, _ = ds.get_loaders(batch_size) # Increase numer of iterations theta_t is trained for # as size of its training set |D_c U D_p| increases iters = args.iters if args.increase_iters: iters += int((len(ds.train) - len(ds_clean.train)) / args.increase_every) # Construct model models_t = [] for seed in args.seeds: args.seed = seed model_t = dnn_utils.get_seeded_wrapped_model( args, n_classes=ds.n_classes) # Set model to training mode model_t.train() # Define optimizer optim = ch.optim.Adam(model_t.parameters(), lr=args.pretrain_lr, weight_decay=args.pretrain_weight_decay) # Train model print( utils.yellow_print("Printing model with seed %d" % args.seed)) for e in range(iters): # Train epoch dnn_utils.epoch(model=model_t, loader=data_loader, optimizer=optim, epoch_num=e + 1, c_rule=None, n_classes=None, verbose=True, lossfn=args.loss) models_t.append(model_t) else: models_t = models_t_pretrained # Make sure theta_t are in eval mode for model_t in models_t: model_t.eval() # Line 4: Compute (x*, y*) (x_opt, y_opt), best_loss = mtp_utils.lookup_based_optimal( theta_t=models_t, theta_p=models_p, loader=loader_optim, n_classes=ds_second.n_classes, random=args.random, lossfn=args.loss, filter=args.filter, verbose=True, ensemble_t=True, ensemble_p=True, pick_optimal=args.pick_optimal) # Log some information about x*, y* with ch.no_grad(): preds_p = [ str(model_p(x_opt).argmax(1).item()) for model_p in models_p ] preds_t = [ str(model_t(x_opt).argmax(1).item()) for model_t in models_t ] print( utils.cyan_print("Loss: %.3f Mt(x*): %s, Mp(x*): %s, y*: %d" % (best_loss.item(), ",".join(preds_t), ",".join(preds_p), y_opt))) # Line 5: Add (x*, y*) to D_p for _ in range(args.n_copies): D_p[0].append(x_opt.cpu()) D_p[1].append(y_opt.cpu()) ds.add_point_to_train(x_opt.cpu(), y_opt.cpu()) print() # Calculate useful statistics tst_sub_accs, tst_nsub_accs = [], [] trn_sub_accs, trn_nsub_accs = [], [] for model_t in models_t: (tst_sub_acc, _), (tst_nsub_acc, _) = dnn_utils.get_model_metrics( model=model_t, loader=test_loader, target_prop=args.poison_class, lossfn=args.loss) tst_sub_accs.append(tst_sub_acc) tst_nsub_accs.append(tst_nsub_acc) (trn_sub_acc, _), (trn_nsub_acc, _) = dnn_utils.get_model_metrics( model=model_t, loader=train_loader, target_prop=args.poison_class, lossfn=args.loss) trn_sub_accs.append(trn_sub_acc) trn_nsub_accs.append(trn_nsub_acc) # Get mean of these metrics trn_sub_acc = np.mean(trn_sub_accs) tst_sub_acc = np.mean(tst_sub_accs) trn_nsub_acc = np.mean(trn_nsub_accs) tst_nsub_acc = np.mean(tst_nsub_accs) # Log information mtp_utils.log_information(logger=logger, best_loss=best_loss, x_opt=x_opt, norm_diffs=None, trn_sub_acc=trn_sub_acc, trn_nsub_acc=trn_nsub_acc, tst_sub_acc=tst_sub_acc, tst_nsub_acc=tst_nsub_acc, num_iters=num_iters + 1, args=args, label=y_opt) # Line 6: Get ready to check condition condition = stop_cond(args=args, best_loss=best_loss, num_iters=num_iters, tst_sub_acc=tst_sub_acc) # Keep track of no. of iterations num_iters += 1 # Line 7: Return poison data return D_p, models_t