Ejemplo n.º 1
0
    def generateRandomMutant(self) -> Tuple[Pattern, Occurrence, str, str]:
        """ Mutates a line in the code by choosing random pattern then a random line

        When running mutants sequentially and we want to control mutant order,
        it would not be a good idea to use this call as it gives a
        random mutant.
        
        It is a better idea to set up your own runs with the other API methods. However,
        this can be used as an example for what this class can do.
        """
        # Reset pattern_generator to all patterns
        self.pattern_generator = self.mutation_patterns.copy()

        # Go through every pattern to find an occurrence
        while len(self.pattern_generator) > 0:
            pattern = self.nextRandomPattern()
            occurrences = self.findOccurrences(pattern)
            if len(occurrences) != 0:
                occurrence = self.rng.choice(occurrences)
                original_line, mutated_line = self.mutate(occurrence)
                return pattern, occurrence, original_line, mutated_line
        utils.red_print(
            "Could not create a mutant. Please make sure it is a C file.")
        utils.red_print("You may need to indent your C file.")
        return None
Ejemplo n.º 2
0
def train_poisoned_model(model, callable_ds, poison_ratio, args):
    # Poison data once at the start, train model normal afterwards

    ds = callable_ds()
    ds_clean = callable_ds()

    indices = None

    if args.use_given_data:
        print(utils.yellow_print("Using given data"))
        poison_data = np.load(args.poison_path)
        poison_x, poison_y = poison_data['x'], poison_data['y']
        ds.add_poison_data(poison_x, poison_y)
    else:
        ds.poison_train_data(args.poison_class,
                             poison_ratio,
                             args.c_rule,
                             selection=indices,
                             save_data=args.save_poisoned_data,
                             offset=args.offset)

    print("Training on %d samples" % len(ds.train))
    print(
        utils.red_print("%d additional points" %
                        (len(ds.train) - len(ds_clean.train))))

    batch_size = args.batch_size
    shuffle = True
    if batch_size == -1:
        batch_size = len(ds.train)
        shuffle = False

    train_loader, val_loader = ds.get_loaders(batch_size, shuffle=shuffle)
    clean_train_loader, _ = ds_clean.get_loaders(batch_size, shuffle=shuffle)

    return_data = train_model(model, (train_loader, val_loader),
                              epochs=args.epochs,
                              c_rule=args.c_rule,
                              n_classes=ds.n_classes,
                              weight_decay=args.weight_decay,
                              lr=args.lr,
                              verbose=args.verbose,
                              no_val=True,
                              get_metrics_at_epoch_end=args.poison_class,
                              clean_train_loader=clean_train_loader,
                              study_mode=args.study_mode,
                              loss_fn=args.loss)

    if args.study_mode:
        model, _, _, all_stats = return_data
    else:
        model, _, _ = return_data

    if args.study_mode:
        return model, all_stats

    return model
Ejemplo n.º 3
0
def stop_cond(args, best_loss, num_iters, tst_sub_acc):
    ol_lr_threshold = args.incre_tol_par

    if args.fixed_budget <= 0:
        if args.require_acc:
            # Define stop condition
            stop_cond = tst_sub_acc > 1 - args.err_threshold
            print(
                utils.red_print("Current accuracy on population (test): %.4f" %
                                tst_sub_acc))
        else:
            # Define stop condition
            stop_cond = best_loss > ol_lr_threshold
    else:
        stop_cond = num_iters < args.fixed_budget
    return stop_cond
Ejemplo n.º 4
0
def stop_cond(args, best_loss, num_iters, tst_sub_acc, norm_diffs):
    ol_lr_threshold = args.incre_tol_par

    if args.fixed_budget <= 0:
        if args.require_acc:
            # Define stop condition
            stop_cond = tst_sub_acc > 1 - args.err_threshold
            print(
                utils.red_print("Current accuracy on population (test): %.4f" %
                                tst_sub_acc))
        else:
            if args.online_alg_criteria == "max_loss":
                current_tol_par = best_loss
            else:
                # use the euclidean distance as the stop criteria
                current_tol_par = norm_diffs
            # Define stop condition
            stop_cond = current_tol_par > ol_lr_threshold
    else:
        stop_cond = num_iters < args.fixed_budget
    return stop_cond
Ejemplo n.º 5
0
def modelTargetPoisoning(model_p, logger, args):
    # Implementation of Algorithm 1, modified for DNNs
    # Line number corresponding to the Algorithm is mentioned
    # Along with each high-level function call

    # Fetch appropriate dataset
    ds = datasets.dataset_helper(args.dataset)()

    # Maintain copy of clean data (for seed sampling)
    ds_clean = datasets.dataset_helper(args.dataset)()

    # Line 1: Collect poisoning points
    D_p = [[], []]

    # Load poison data, if provided
    if args.poison_data:
        print(utils.green_print("Loading poison data"))
        data = np.load("./data/poison_data/poison_data.npz")
        # Normalize to 0-1 for use by model
        all_poison_data_x = ch.from_numpy(data['x']).float() / 255.
        all_poison_data_x = ch.unsqueeze(all_poison_data_x, 1)
        all_poison_data_y = ch.from_numpy(data['y'])

    # Line 3: Since D_p is empty in first iteration, simply train it outside
    model_t_pretrained, pretrain_optim = mtp_utils.train_clean_model(ds, args)

    # Report performance of clean model
    batch_size = args.batch_size
    if batch_size == -1:
        batch_size = len(ds.train)
    train_loader, test_loader = ds.get_loaders(batch_size)
    clean_acc, clean_total_loss = dnn_utils.get_model_metrics(
        model_t_pretrained, test_loader, lossfn=args.loss)
    print(utils.yellow_print("[Clean-model][Test] Total Acc: %.4f" %
                             clean_acc))
    print(
        utils.yellow_print("[Clean-model] Loss on train: %.4f" %
                           clean_total_loss))
    (population_acc,
     _), (non_population_acc,
          _) = dnn_utils.get_model_metrics(model_t_pretrained,
                                           test_loader,
                                           lossfn=args.loss,
                                           target_prop=args.poison_class)
    print(
        utils.red_print("[Clean-model][Test] Population Acc: %.4f" %
                        population_acc))
    print(
        utils.red_print("[Clean-model][Test] Non- Population Acc: %.4f" %
                        non_population_acc))
    print()

    # Line 2: Iterate until stopping criteria met
    prev_loss, best_loss = np.inf, np.inf
    num_iters = 0
    condition = True
    while condition:

        if len(D_p[0]) > 0:
            # Line 3: theta_t = train(D_c U D_p)
            print(
                utils.yellow_print("[Training model on Dc U Dp "
                                   "(on %d samples)]" % len(ds.train)))
            # Get loader for D_c U D_p
            batch_size = args.batch_size
            if batch_size == -1:
                batch_size = len(ds.train)
            data_loader, _ = ds.get_loaders(batch_size)

            # Do not re-initialize model if finetuning requested
            if not args.finetune:
                # Construct model
                model_t = dnn_utils.get_seeded_wrapped_model(
                    args, n_classes=ds.n_classes)
            else:
                # Start finetuning from the point where model
                # has seen only clean data
                model_t = copy.deepcopy(model_t_pretrained)
            # Set model to training mode
            model_t.train()

            # Define optimizer
            optim = ch.optim.Adam(model_t.parameters(),
                                  lr=args.pretrain_lr,
                                  weight_decay=args.pretrain_weight_decay)

            # Adjust starting point of optimizer
            # if finetuning is requested
            if args.finetune:
                optim.load_state_dict(pretrain_optim.state_dict())

            # Increase numer of iterations theta_t is trained for
            # as size of its training set |D_c U D_p| increases
            iters = args.iters
            if args.increase_iters:
                iters += int((len(ds.train) - len(ds_clean.train)) /
                             args.increase_every)

            # Train model
            for e in range(iters):
                # Train epoch
                dnn_utils.epoch(model=model_t,
                                loader=data_loader,
                                optimizer=optim,
                                epoch_num=e + 1,
                                c_rule=None,
                                n_classes=None,
                                verbose=True,
                                lossfn=args.loss)
        else:
            model_t = model_t_pretrained

        # Make sure theta_t is in eval mode
        model_t.eval()

        start_with = None
        if args.start_opt_real:
            # If flag set, start with real data sampled from
            # (unpoisoned) train loader
            batch_size = args.batch_size
            if batch_size == -1:
                batch_size = len(ds.train)
            loader, _ = ds_clean.get_loaders(batch_size)
            start_with = datasets.get_sample_from_loader(
                loader, args.trials, ds_clean.n_classes)
        elif args.poison_data:
            # Sample 'num-trials' data from this
            perm = ch.randperm(all_poison_data_x.size(0))
            idx = perm[:args.trials]
            start_with = (all_poison_data_x[idx], all_poison_data_y[idx])

        # Line 4: Compute (x*, y*)
        if args.use_optim_for_optimal:
            find_optimal_function = mtp_utils.find_optimal_using_optim
        else:
            find_optimal_function = mtp_utils.find_optimal

        (x_opt,
         y_opt), best_loss = find_optimal_function(theta_t=model_t,
                                                   theta_p=model_p,
                                                   input_shape=ds.datum_shape,
                                                   n_classes=ds.n_classes,
                                                   trials=args.trials,
                                                   num_steps=args.num_steps,
                                                   step_size=args.optim_lr,
                                                   verbose=True,
                                                   start_with=start_with,
                                                   lossfn=args.loss,
                                                   dynamic_lr=args.dynamic_lr,
                                                   filter=args.filter)

        # If loss increased, try optimization once more
        # With double trials, to reduce chance of bad minima
        if args.skip_bad and best_loss > prev_loss:
            print(utils.red_print("Re-running optimization with more seeds"))
            (x_opt, y_opt), best_loss = find_optimal_function(
                theta_t=model_t,
                theta_p=model_p,
                input_shape=ds.datum_shape,
                n_classes=ds.n_classes,
                trials=args.trials * 2,
                num_steps=args.num_steps,
                step_size=args.optim_lr,
                verbose=True,
                start_with=start_with,
                lossfn=args.loss,
                dynamic_lr=args.dynamic_lr)

        # Log some information about x*, y*
        with ch.no_grad():
            pred_t, pred_p = model_t(x_opt), model_p(x_opt)
            if pred_t.argmax(1) == y_opt.item():
                print(utils.red_print("[BAD OPTIMIZATION. CHECK]"))
        print(
            utils.cyan_print(
                "Loss: %.3f Mt(x*): %d, Mp(x*): %d, y*: %d" %
                (best_loss.item(), pred_t.argmax(1), pred_p.argmax(1), y_opt)))

        # Line 5: Add (x*, y*) to D_p
        for _ in range(args.n_copies):
            D_p[0].append(x_opt.cpu())
            D_p[1].append(y_opt.cpu())
            ds.add_point_to_train(x_opt.cpu(), y_opt.cpu())
        print()

        # Calculate useful statistics
        (tst_sub_acc,
         _), (tst_nsub_acc,
              _) = dnn_utils.get_model_metrics(model=model_t,
                                               loader=test_loader,
                                               target_prop=args.poison_class,
                                               lossfn=args.loss)
        (trn_sub_acc,
         _), (trn_nsub_acc,
              _) = dnn_utils.get_model_metrics(model=model_t,
                                               loader=train_loader,
                                               target_prop=args.poison_class,
                                               lossfn=args.loss)
        norm_diffs = dnn_utils.model_l2_closeness(model_t, model_p)

        # Log information
        mtp_utils.log_information(logger=logger,
                                  best_loss=best_loss,
                                  x_opt=x_opt,
                                  model_t=model_t,
                                  norm_diffs=norm_diffs,
                                  trn_sub_acc=trn_sub_acc,
                                  trn_nsub_acc=trn_nsub_acc,
                                  tst_sub_acc=tst_sub_acc,
                                  tst_nsub_acc=tst_nsub_acc,
                                  num_iters=num_iters + 1,
                                  args=args)

        # Line 6: Get ready to check condition
        condition = stop_cond(args=args,
                              best_loss=best_loss,
                              num_iters=num_iters,
                              tst_sub_acc=tst_sub_acc,
                              norm_diffs=norm_diffs)

        # Keep track of no. of iterations
        num_iters += 1

        # Keep track of loss from previous iteration
        prev_loss = best_loss.item()

    # Line 7: Return poison data
    return D_p, model_t
Ejemplo n.º 6
0
def run_task(task, args, config):
    """ Runs a mutation testing task with settings in `args` and `config`.

    Collects results and produces CSV data.
    """
    flash_command = config['flash_command']

    port = args.port if args.port else utils.get_default_serial_port()
    os.environ['PORT'] = port
    timeout = int(args.timeout)
    csv = args.csv
    rng = random.Random(args.seed)

    mutation = mutator.Mutator(src=task['src'],
                               mutation_patterns=task['patterns'],
                               rng=rng)
    mutations_list = mutation.generateMutants(
        mutants_per_pattern=(task['mutants_per_pattern']
                             if 'mutants_per_pattern' in task else None),
        random=args.randomize)

    data_record = []
    trials = []
    test_to_kills = {}
    run_cnt = 0
    nc = 0
    total_failures = 0
    mutant_cnt = int(args.mutants)

    failures_per_pattern = {}
    total_per_pattern = {}
    for mp in mutation.getPatterns():
        if mp not in failures_per_pattern:
            failures_per_pattern[mp] = 0
        if mp not in total_per_pattern:
            total_per_pattern[mp] = 0

    # outer try is for finally generating csv if automation stops early
    try:
        for occurrence in mutations_list:
            if run_cnt == mutant_cnt:
                break

            mp = occurrence.pattern
            # mutate the code
            utils.yellow_print(occurrence)
            original_line, mutated_line = mutation.mutate(occurrence)
            file_changed = occurrence.file.rstrip(".old")
            line_number = occurrence.line
            # try is for catching compile failure to continue execution
            try:
                # cmake, build, flash, and read
                output, final_flag = flash_and_read(port, timeout,
                                                    flash_command)

                # reaching here means success, so change counters
                run_cnt += 1
                total_per_pattern[mp] += 1

                # tests expected to catch
                tests_expected_to_catch = "N/A"
                if args.line_coverage:
                    tests_expected_to_catch = ",".join(
                        get_expected_catch(args.line_coverage,
                                           int(line_number)))

                # mutant_status can either be "FAIL", "PASS", "CRASH", "TIMEOUT"
                mutant_status = "FAIL"
                if final_flag == FLAGS.PassFlag:
                    utils.red_print("Mutant is Alive")
                    utils.red_print(
                        "Tests that are expected to catch this mutant are: \n{}"
                        .format(tests_expected_to_catch))
                    mutant_status = "PASS"
                else:
                    failures_per_pattern[mp] += 1
                    total_failures += 1
                    utils.green_print("Mutant is Killed")
                if final_flag == FLAGS.CrashFlag:
                    mutant_status = "CRASH"
                elif final_flag == "TIMEOUT":
                    mutant_status = "TIMEOUT"

                # Analyze the output to count per test failures
                results = re.findall(TestRegEx, output)
                for group, test, result in results:
                    if (group, test) not in test_to_kills:
                        test_to_kills[(group, test)] = (
                            1, 1) if result == 'FAIL' else (0, 1)
                    else:
                        kills, total = test_to_kills[(group, test)]
                        test_to_kills[(group,
                                       test)] = ((kills + 1, total +
                                                  1) if result == 'FAIL' else
                                                 (kills, total + 1))

                # Add result to CSV queue
                trials.append({
                    'file':
                    file_changed,
                    'line':
                    line_number,
                    'original':
                    original_line,
                    'mutant':
                    mutated_line,
                    'result':
                    "{}/KILLED".format(mutant_status)
                    if mutant_status != "PASS" else "PASS/LIVE",
                    'expected_catch':
                    tests_expected_to_catch
                })
                utils.yellow_print("Successful Mutant Runs: {}/{}".format(
                    run_cnt, mutant_cnt))
            except CompileFailed:
                utils.yellow_print("Cannot compile, discard and move on")
                nc += 1
            finally:
                mutation.restore()
    except:
        traceback.print_exc()
        raise
    finally:
        mutation.cleanup()
        # calculate mutant score
        score = percentage(total_failures, run_cnt)
        utils.yellow_print("Score: {}%".format(score))
        utils.yellow_print(
            "Alive: {} Killed: {} Mutants: {} No-Compile: {} Attempted Runs: {}"
            .format(run_cnt - total_failures, total_failures, run_cnt, nc,
                    run_cnt + nc))
        trials.append({
            'file':
            "RESULTS:",
            'line':
            "{} NO-COMPILE".format(nc),
            'mutant':
            "SCORE",
            'original':
            "{} KILLED/{} MUTANTS".format(total_failures, run_cnt),
            'result':
            "{}%".format(score)
        })

        # aggregate pass/fail counts for each found test in test group
        aggregates = []
        for group, test in test_to_kills:
            kills, total = test_to_kills[(group, test)]
            aggregates.append({
                'Group': group,
                'Test': test,
                'Fails': kills,
                'Passes': total - kills,
                'Total': total
            })

        # pattern comparison
        for mp in total_per_pattern:
            data_record.append({
                'pattern':
                "{} => {}".format(mp.pattern, mp.transformation),
                'failures':
                failures_per_pattern[mp],
                'total':
                total_per_pattern[mp],
                'percentage':
                float(
                    percentage(failures_per_pattern[mp],
                               total_per_pattern[mp])) *
                0.01 if total_per_pattern[mp] > 0 else 2
            })

        # log to csv
        if csv:
            csv_path = os.path.join(
                dir_path, "csvs/{}/{}".format(current_date, current_time))
            pattern_csv = os.path.join(
                csv_path, "{}_pattern_comparison.csv".format(task['name']))
            trials_csv = os.path.join(
                csv_path, "{}_mutants_created.csv".format(task['name']))
            per_test_csv = os.path.join(
                csv_path, "{}_test_aggregates.csv".format(task['name']))
            to_csv(pattern_csv, ['pattern', 'failures', 'total', 'percentage'],
                   data_record)
            to_csv(trials_csv, [
                'file', 'line', 'original', 'mutant', 'result',
                'expected_catch'
            ], trials)
            to_csv(per_test_csv, ['Group', 'Test', 'Fails', 'Passes', 'Total'],
                   aggregates)
def modelTargetPoisoningEnsemble(models_p, logger, args):
    # Implementation of Algorithm 1, modified for DNNs
    # Line number corresponding to the Algorithm is mentioned
    # Along with each high-level function call

    # Fetch appropriate dataset
    ds = datasets.dataset_helper("memory")(path=args.path_1)

    # Maintain copy of clean data (for seed sampling)
    ds_clean = datasets.dataset_helper("memory")(path=args.path_1)

    # Data to pick points from (for x* optimization)
    ds_second = datasets.dataset_helper("memory")(path=args.path_2)
    loader_optim, _ = ds_second.get_loaders(1000)

    # Line 1: Collect poisoning points
    D_p = [[], []]

    # Line 3: Since D_p is empty in first iteration, simply train it outside
    model_t_pretrained, pretrain_optim = mtp_utils.train_clean_model(ds, args)

    # Report performance of clean model
    batch_size = args.batch_size
    if batch_size == -1:
        batch_size = len(ds.train)

    train_loader, test_loader = ds.get_loaders(batch_size)
    clean_acc, clean_total_loss = dnn_utils.get_model_metrics(
        model_t_pretrained, test_loader, lossfn=args.loss)
    print(utils.yellow_print("[Clean-model][Test] Total Acc: %.4f" %
                             clean_acc))
    print(
        utils.yellow_print("[Clean-model] Loss on train: %.4f" %
                           clean_total_loss))
    (population_acc,
     _), (non_population_acc,
          _) = dnn_utils.get_model_metrics(model_t_pretrained,
                                           test_loader,
                                           lossfn=args.loss,
                                           target_prop=args.poison_class)
    print(
        utils.red_print("[Clean-model][Test] Population Acc: %.4f" %
                        population_acc))
    print(
        utils.red_print("[Clean-model][Test] Non- Population Acc: %.4f" %
                        non_population_acc))
    print()

    # Line 2: Iterate until stopping criteria met
    tst_sub_acc = 1.0
    best_loss = np.inf
    num_iters = 0
    condition = True
    while condition:

        if len(D_p[0]) > 0:
            # Line 3: theta_t = train(D_c U D_p)
            print(
                utils.yellow_print("[Training model on Dc U Dp "
                                   "(on %d samples)]" % len(ds.train)))
            # Get loader for D_c U D_p
            batch_size = args.batch_size
            if batch_size == -1:
                batch_size = len(ds.train)
            data_loader, _ = ds.get_loaders(batch_size)

            # Do not re-initialize model if finetuning requested
            if not args.finetune:
                # Construct model
                model_t = dnn_utils.get_seeded_wrapped_model(
                    args, n_classes=ds.n_classes)
            else:
                # Start finetuning from the point where model
                # has seen only clean data
                model_t = copy.deepcopy(model_t_pretrained)
            # Set model to training mode
            model_t.train()

            # Define optimizer
            optim = ch.optim.Adam(model_t.parameters(),
                                  lr=args.pretrain_lr,
                                  weight_decay=args.pretrain_weight_decay)

            # Adjust starting point of optimizer
            # if finetuning is requested
            if args.finetune:
                optim.load_state_dict(pretrain_optim.state_dict())

            # Increase numer of iterations theta_t is trained for
            # as size of its training set |D_c U D_p| increases
            iters = args.iters
            if args.increase_iters:
                iters += int((len(ds.train) - len(ds_clean.train)) /
                             args.increase_every)

            # Train model
            for e in range(iters):
                # Train epoch
                dnn_utils.epoch(model=model_t,
                                loader=data_loader,
                                optimizer=optim,
                                epoch_num=e + 1,
                                c_rule=None,
                                n_classes=None,
                                verbose=True,
                                lossfn=args.loss)
        else:
            model_t = model_t_pretrained

        # Make sure theta_t is in eval mode
        model_t.eval()

        # Line 4: Compute (x*, y*)
        if args.optim_type == "lookup":
            # Loss-difference based lookup method
            (x_opt, y_opt), best_loss = mtp_utils.lookup_based_optimal(
                theta_t=model_t,
                theta_p=models_p,
                loader=loader_optim,
                n_classes=ds_second.n_classes,
                random=args.random,
                lossfn=args.loss,
                filter=args.filter,
                verbose=True,
                ensemble_p=True)
        elif args.optim_type == "dataset_grad":
            # Dataset-gradient alignment loss based optimization
            (x_opt, y_opt), best_loss = mtp_utils.dataset_grad_optimal(
                theta_t=model_t,
                theta_p=models_p,
                input_shape=ds_second.datum_shape,
                n_classes=ds_second.n_classes,
                trials=args.optim_trials,
                ds=ds,
                num_steps=args.optim_steps,
                step_size=args.optim_lr,
                verbose=True,
                signed=args.signed,
                ensemble_p=True,
                batch_sample_estimate=args.batch_sample_estimate)
        elif args.optim_type == "loss_difference":
            # Loss difference based optimization
            (x_opt, y_opt), best_loss = mtp_utils.find_optimal_using_optim(
                theta_t=model_t,
                theta_p=models_p,
                input_shape=ds_second.datum_shape,
                n_classes=ds_second.n_classes,
                num_steps=args.optim_steps,
                trials=args.optim_trials,
                step_size=args.optim_lr,
                filter=args.filter,
                verbose=True,
                ensemble_p=True)
        else:
            raise NotImplemented("Loss optimization method not implemented")

        # Log some information about x*, y*
        with ch.no_grad():
            pred_t = model_t(x_opt)
            preds_t = ",".join(
                [str(mp(x_opt).argmax(1).item()) for mp in models_p])
        print(
            utils.cyan_print("Mt(x*): %d, Mp(x*): %s, y*: %d" %
                             (pred_t.argmax(1), preds_t, y_opt)))

        # Set n_copies dynamically, if requested
        n_copies = args.n_copies
        if args.dynamic_repeat:
            n_copies = mtp_utils.dynamic_n(tst_sub_acc, args.n_copies)

        # Line 5: Add (x*, y*) to D_p
        for _ in range(args.n_copies):
            D_p[0].append(x_opt.cpu())
            D_p[1].append(y_opt.cpu())
            ds.add_point_to_train(x_opt.cpu(), y_opt.cpu())
        print()

        # Calculate useful statistics
        (tst_sub_acc,
         _), (tst_nsub_acc,
              _) = dnn_utils.get_model_metrics(model=model_t,
                                               loader=test_loader,
                                               target_prop=args.poison_class,
                                               lossfn=args.loss)
        (trn_sub_acc,
         _), (trn_nsub_acc,
              _) = dnn_utils.get_model_metrics(model=model_t,
                                               loader=train_loader,
                                               target_prop=args.poison_class,
                                               lossfn=args.loss)

        norm_diffs = dnn_utils.model_l2_closeness(model_t,
                                                  models_p,
                                                  ensemble=True)

        # Log information
        mtp_utils.log_information(logger=logger,
                                  best_loss=best_loss,
                                  x_opt=x_opt,
                                  norm_diffs=norm_diffs,
                                  trn_sub_acc=trn_sub_acc,
                                  trn_nsub_acc=trn_nsub_acc,
                                  tst_sub_acc=tst_sub_acc,
                                  tst_nsub_acc=tst_nsub_acc,
                                  num_iters=num_iters + 1,
                                  args=args,
                                  label=y_opt)

        # Line 6: Get ready to check condition
        condition = stop_cond(args=args,
                              best_loss=best_loss,
                              num_iters=num_iters,
                              tst_sub_acc=tst_sub_acc,
                              norm_diffs=norm_diffs)

        # Keep track of no. of iterations
        num_iters += 1

    # Line 7: Return poison data
    return D_p, model_t
    # Different levels of verbose
    parser.add_argument('--verbose',
                        action="store_true",
                        help='If true, print everything')
    parser.add_argument('--verbose_pretrain',
                        action="store_true",
                        help='If true, print per-epoch training statistics')

    args = parser.parse_args()

    if args.verbose:
        args.verbose_pretrain = True

    try:
        wanted_errors = [float(x) for x in args.errors.split(",")]
        print(utils.red_print("Target error rates: %s" % str(wanted_errors)))
    except ValueError:
        raise ValueError("Wanted errors provided in invalid format")

    # Ensure directory exists where model will be saved
    utils.ensure_dir_exists(args.save_dir)

    # Print all arguments
    utils.flash_utils(args)

    # Prepare logger
    log_dir = os.path.join(
        args.log_path,
        "indiscriminate_" + str(args.n_copies) + "_" + str(args.seed))
    utils.ensure_dir_exists(log_dir)
    logger = SummaryWriter(log_dir=log_dir, flush_secs=10)
Ejemplo n.º 9
0
        # Purpose of this mode is just to train model once
        # Exit after that
        if args.use_given_data:
            exit(0)

        # Save current model
        model_name = "seed-{}_ratio-{}_loss-{}_bs-{}.pth".format(
            args.seed, ratio, train_loss, args.batch_size)
        ch.save(
            copy.deepcopy(model).state_dict(),
            os.path.join(model_dir, model_name))
        print("Saved model to %s" % os.path.join(model_dir, model_name))

        if tst_sub_acc <= args.attacker_goal and train_loss < best_loss:
            best_loss = train_loss
            best_model_obj = {
                "model": copy.deepcopy(model),
                "test_acc": test_acc,
                "train_loss": train_loss,
                "test_collat_acc": tst_nsub_acc,
                "test_target_acc": tst_sub_acc,
                "ratio": ratio
            }
            print(
                utils.yellow_print("Updated lowest train loss: %.4f" %
                                   train_loss))

    if best_model_obj is None:
        print(utils.red_print("No model satisfied given adversary's goal!"))
Ejemplo n.º 10
0
def modelTargetPoisoning(models_p, logger, args):
    # Implementation of Algorithm 1, modified for DNNs
    # Line number corresponding to the Algorithm is mentioned
    # Along with each high-level function call

    # Fetch appropriate dataset
    ds = datasets.dataset_helper("memory")(path=args.path_1)

    # Maintain copy of clean data (for seed sampling)
    ds_clean = datasets.dataset_helper("memory")(path=args.path_1)

    # Data to pick points from (for x* optimization)
    ds_second = datasets.dataset_helper("memory")(path=args.path_2)
    loader_optim, _ = ds_second.get_loaders(1000)

    # Line 1: Collect poisoning points
    D_p = [[], []]

    # Line 3: Since D_p is empty in first iteration, simply train it outside
    models_t_pretrained = []
    for seed in args.seeds:
        args.seed = seed
        print(utils.yellow_print("Printing model with seed %d" % args.seed))
        model_t_pretrained, _ = mtp_utils.train_clean_model(ds, args)
        models_t_pretrained.append(model_t_pretrained)

    # Report performance of clean model
    batch_size = len(ds.train)

    train_loader, test_loader = ds.get_loaders(batch_size)
    clean_accs, clean_total_losses = [], []
    population_accs, non_population_accs = [], []
    for model_t_pretrained in models_t_pretrained:
        clean_acc, clean_total_loss = dnn_utils.get_model_metrics(
            model_t_pretrained, test_loader, lossfn=args.loss)
        clean_accs.append(clean_acc)
        clean_total_losses.append(clean_total_loss)

        (population_acc,
         _), (non_population_acc,
              _) = dnn_utils.get_model_metrics(model_t_pretrained,
                                               test_loader,
                                               lossfn=args.loss,
                                               target_prop=args.poison_class)
        population_accs.append(population_acc)
        non_population_accs.append(non_population_acc)

    print(
        utils.yellow_print("[Clean-model][Test] Total Acc: %.4f" %
                           np.mean(clean_accs)))
    print(
        utils.yellow_print("[Clean-model] Loss on train: %.4f" %
                           np.mean(clean_total_losses)))
    print(
        utils.red_print("[Clean-model][Test] Population Acc: %.4f" %
                        np.mean(population_accs)))
    print(
        utils.red_print("[Clean-model][Test] Non-Population Acc: %.4f" %
                        np.mean(non_population_accs)))
    print()

    # Line 2: Iterate until stopping criteria met
    best_loss = np.inf
    num_iters = 0
    condition = True
    while condition:

        if len(D_p[0]) > 0:
            # Line 3: theta_t = train(D_c U D_p)
            print(
                utils.yellow_print("[Training model on Dc U Dp "
                                   "(on %d samples)]" % len(ds.train)))

            # Get loader for D_c U D_p
            batch_size = len(ds.train)
            data_loader, _ = ds.get_loaders(batch_size)

            # Increase numer of iterations theta_t is trained for
            # as size of its training set |D_c U D_p| increases
            iters = args.iters
            if args.increase_iters:
                iters += int((len(ds.train) - len(ds_clean.train)) /
                             args.increase_every)

            # Construct model
            models_t = []
            for seed in args.seeds:
                args.seed = seed
                model_t = dnn_utils.get_seeded_wrapped_model(
                    args, n_classes=ds.n_classes)
                # Set model to training mode
                model_t.train()

                # Define optimizer
                optim = ch.optim.Adam(model_t.parameters(),
                                      lr=args.pretrain_lr,
                                      weight_decay=args.pretrain_weight_decay)

                # Train model
                print(
                    utils.yellow_print("Printing model with seed %d" %
                                       args.seed))
                for e in range(iters):
                    # Train epoch
                    dnn_utils.epoch(model=model_t,
                                    loader=data_loader,
                                    optimizer=optim,
                                    epoch_num=e + 1,
                                    c_rule=None,
                                    n_classes=None,
                                    verbose=True,
                                    lossfn=args.loss)

                models_t.append(model_t)
        else:
            models_t = models_t_pretrained

        # Make sure theta_t are in eval mode
        for model_t in models_t:
            model_t.eval()

        # Line 4: Compute (x*, y*)
        (x_opt, y_opt), best_loss = mtp_utils.lookup_based_optimal(
            theta_t=models_t,
            theta_p=models_p,
            loader=loader_optim,
            n_classes=ds_second.n_classes,
            random=args.random,
            lossfn=args.loss,
            filter=args.filter,
            verbose=True,
            ensemble_t=True,
            ensemble_p=True,
            pick_optimal=args.pick_optimal)

        # Log some information about x*, y*
        with ch.no_grad():
            preds_p = [
                str(model_p(x_opt).argmax(1).item()) for model_p in models_p
            ]
            preds_t = [
                str(model_t(x_opt).argmax(1).item()) for model_t in models_t
            ]
        print(
            utils.cyan_print("Loss: %.3f Mt(x*): %s, Mp(x*): %s, y*: %d" %
                             (best_loss.item(), ",".join(preds_t),
                              ",".join(preds_p), y_opt)))

        # Line 5: Add (x*, y*) to D_p
        for _ in range(args.n_copies):
            D_p[0].append(x_opt.cpu())
            D_p[1].append(y_opt.cpu())
            ds.add_point_to_train(x_opt.cpu(), y_opt.cpu())
        print()

        # Calculate useful statistics
        tst_sub_accs, tst_nsub_accs = [], []
        trn_sub_accs, trn_nsub_accs = [], []
        for model_t in models_t:
            (tst_sub_acc, _), (tst_nsub_acc, _) = dnn_utils.get_model_metrics(
                model=model_t,
                loader=test_loader,
                target_prop=args.poison_class,
                lossfn=args.loss)
            tst_sub_accs.append(tst_sub_acc)
            tst_nsub_accs.append(tst_nsub_acc)

            (trn_sub_acc, _), (trn_nsub_acc, _) = dnn_utils.get_model_metrics(
                model=model_t,
                loader=train_loader,
                target_prop=args.poison_class,
                lossfn=args.loss)
            trn_sub_accs.append(trn_sub_acc)
            trn_nsub_accs.append(trn_nsub_acc)

        # Get mean of these metrics
        trn_sub_acc = np.mean(trn_sub_accs)
        tst_sub_acc = np.mean(tst_sub_accs)
        trn_nsub_acc = np.mean(trn_nsub_accs)
        tst_nsub_acc = np.mean(tst_nsub_accs)

        # Log information
        mtp_utils.log_information(logger=logger,
                                  best_loss=best_loss,
                                  x_opt=x_opt,
                                  norm_diffs=None,
                                  trn_sub_acc=trn_sub_acc,
                                  trn_nsub_acc=trn_nsub_acc,
                                  tst_sub_acc=tst_sub_acc,
                                  tst_nsub_acc=tst_nsub_acc,
                                  num_iters=num_iters + 1,
                                  args=args,
                                  label=y_opt)

        # Line 6: Get ready to check condition
        condition = stop_cond(args=args,
                              best_loss=best_loss,
                              num_iters=num_iters,
                              tst_sub_acc=tst_sub_acc)

        # Keep track of no. of iterations
        num_iters += 1

    # Line 7: Return poison data
    return D_p, models_t