Beispiel #1
0
    def generateMutants(self,
                        mutants_per_pattern=10,
                        random=False) -> List[Occurrence]:
        """ Returns a list of occurrences including multiple different patterns

        For each pattern, the list contains a maximum of `mutants_per_pattern` number of 
        occurrences. Default is 10 if not provided.

        If `random` is True, then the list is shuffled using this mutator's rng
        """
        utils.yellow_print("Searching for patterns...")
        # list of mutations to execute, contains occurrence objects
        mutations_list = []
        # dictionary that maps a pattern to the list of occurrences
        mutations_dict = {}
        # go through each pattern
        for mp in self.getPatterns():
            # find their occurrences
            occurrences_with_mp = self.findOccurrences(mutation_pattern=mp)
            # for o in occurrences_with_mp:
            #     utils.yellow_print(o)
            # if occurrences are found add them to dictionary
            if mp not in mutations_dict:
                mutations_dict[mp] = []
            if random:
                self.rng.shuffle(occurrences_with_mp)
            mutations_dict[mp] += occurrences_with_mp[0:mutants_per_pattern]
        for mp in mutations_dict:
            mutations_list += mutations_dict[mp]
        if random:
            self.rng.shuffle(mutations_list)
        return mutations_list
Beispiel #2
0
def _read_until_complete(target, stop_flag, crash_flag, pass_flag, timeout):
    is_serial = isinstance(target, serial.Serial)
    target_read = _serial_read if is_serial else _subproc_read
    target_terminated = (lambda: False) if is_serial else target.stdout.at_eof

    final_output = ''
    final_flag = stop_flag
    timeout_end = time.time() + timeout
    # Read until timeout or stop_flag is detected.
    while not target_terminated():
        if time.time() >= timeout_end:
            utils.yellow_print("TIMEOUT")
            final_flag = "TIMEOUT"
            break
        try:
            c = target_read(target).decode()
        except (UnicodeDecodeError, asyncio.TimeoutError,
                serial.SerialException):
            time.sleep(1)
        else:
            if not c:
                continue
            utils.raw_print(c)
            final_output += c
            if crash_flag is not None and crash_flag in final_output:
                utils.raw_print('\n')
                final_flag = crash_flag
                break
            if stop_flag is not None and stop_flag in final_output:
                utils.raw_print('\n')
                break
            if pass_flag is not None and pass_flag in final_output:
                final_flag = pass_flag

    return final_output, final_flag
Beispiel #3
0
    def restore(self):
        """ Restores source files by replacing src with old

        Path should be root directory /freertos
        """
        for i in range(len(self.src)):
            shutil.copyfile(self.olds[i], self.modified[i])
        utils.yellow_print("Source Code Restored")
Beispiel #4
0
def to_csv(csv_path, headers, dict):
    """
    Create a csv file at `csv_path` with `headers` using data in `dict` where each key in `dict` is
    equal to one of the `headers` and `dict[key]` will be written.
    """
    directory = os.path.dirname(csv_path)
    Path(directory).mkdir(parents=True, exist_ok=True)
    with open(csv_path, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=headers)
        writer.writeheader()
        for row in dict:
            writer.writerow(row)
        utils.yellow_print("Successfully wrote to CSV - {}".format(csv_path))
Beispiel #5
0
    def __init__(self, src: dict, mutation_patterns: dict,
                 rng=random.Random()):
        """ Initializes various fields
        
        `src` is a dictionary mapping from a path to file to a list of line ranges. eg.

        { 
            "code.c" : [],
            "code2.c" : [[1,12],[50,150]]
        }

        `mutation_pattern` is a dictionary mapping from str to list of str or str. eg.

        {
            " == " : [ " <= ", " != " ],
            " return " : [" return -2 * ", " return -1 + "],
            "++" : "--"
        }

        `rng` is a Random type. Used for random operations.
        """
        utils.yellow_print("CWD: {}".format(os.getcwd()))
        self.olds = []  # stores original file paths
        self.modified = []  # stores path to the modified file
        self.lines_to_mutate = {
        }  # maps file to the lines that file should mutate
        self.src = src

        for f in self.src:
            # create copies of the original
            old = '{}.old'.format(f)
            shutil.copyfile(f, old)
            self.olds.append(old)
            self.modified.append(f)
            # process the line intervals into a list of line numbers
            if len(self.src[f]) == 0:
                self.lines_to_mutate[old] = list(
                    range(1,
                          len(open(f).read().split('\n')) + 1))
            else:
                self.lines_to_mutate[old] = utils.flatten(
                    utils.merge(self.src[f]))
        self.rng = rng
        self.mutation_patterns = self.flattenPatterns(mutation_patterns)
        self.pattern_generator = self.mutation_patterns.copy()
        self.file_index = None
Beispiel #6
0
def train_poisoned_model(model, callable_ds, poison_ratio, args):
    # Poison data once at the start, train model normal afterwards

    ds = callable_ds()
    ds_clean = callable_ds()

    indices = None

    if args.use_given_data:
        print(utils.yellow_print("Using given data"))
        poison_data = np.load(args.poison_path)
        poison_x, poison_y = poison_data['x'], poison_data['y']
        ds.add_poison_data(poison_x, poison_y)
    else:
        ds.poison_train_data(args.poison_class,
                             poison_ratio,
                             args.c_rule,
                             selection=indices,
                             save_data=args.save_poisoned_data,
                             offset=args.offset)

    print("Training on %d samples" % len(ds.train))
    print(
        utils.red_print("%d additional points" %
                        (len(ds.train) - len(ds_clean.train))))

    batch_size = args.batch_size
    shuffle = True
    if batch_size == -1:
        batch_size = len(ds.train)
        shuffle = False

    train_loader, val_loader = ds.get_loaders(batch_size, shuffle=shuffle)
    clean_train_loader, _ = ds_clean.get_loaders(batch_size, shuffle=shuffle)

    return_data = train_model(model, (train_loader, val_loader),
                              epochs=args.epochs,
                              c_rule=args.c_rule,
                              n_classes=ds.n_classes,
                              weight_decay=args.weight_decay,
                              lr=args.lr,
                              verbose=args.verbose,
                              no_val=True,
                              get_metrics_at_epoch_end=args.poison_class,
                              clean_train_loader=clean_train_loader,
                              study_mode=args.study_mode,
                              loss_fn=args.loss)

    if args.study_mode:
        model, _, _, all_stats = return_data
    else:
        model, _, _ = return_data

    if args.study_mode:
        return model, all_stats

    return model
Beispiel #7
0
def read_device_output(port,
                       *,
                       start_flag=None,
                       end_flag=None,
                       crash_flag=None,
                       pass_flag=None,
                       start_timeout=360,
                       exec_timeout):
    """
    Read output from a board. Will look for `start_flag`, or wait for output if not provided, until
    `start_timeout` expires. If success, continue to read output until `exec_timeout` expires or
    end_flag is detected if provided.
    """
    try:
        utils.yellow_print("Monitoring...")
        with serial.Serial(port, 115200, timeout=comm.SERIAL_TIMEOUT) as port:
            output, final_flag = comm.read_target_output(
                port, start_flag, crash_flag, end_flag, pass_flag,
                start_timeout, exec_timeout)
    except serial.SerialException as e:
        raise SerialPortError(str(e))
    return output, final_flag
Beispiel #8
0
def flash(port):
    os.chdir(root_path)
    # ./vendors/espressif/esp-idf/tools/idf.py erase_flash flash monitor -B build
    cmd = "./vendors/espressif/esp-idf/tools/idf.py erase_flash flash -B build -p {}".format(
        port)
    utils.yellow_print(cmd)
    # Flash to device
    utils.yellow_print("Flashing to device...")
    subprocess.check_output(shlex.split(cmd))
    utils.yellow_print("Done Flashing")
Beispiel #9
0
def flash_and_read(port, timeout, flash_command):
    """ Executes a subprocess by calling `flash_command` and reads serial output from `port`.

    `flash_command` is a str shell command that will be fed into a subprocess.

    `port` is the port to read serial output from.

    `timeout` is the max time before ending read process.
    """
    # begin recording time
    time_begin = time.time()
    try:
        subprocess.check_call(flash_command, shell=True)
    except subprocess.CalledProcessError as e:
        print(e)
        raise CompileFailed("Failed to compile")
    time_finish_build = time.time()
    utils.yellow_print(
        f"Build Time: {time_finish_build - time_begin:.2f} seconds")

    # # cmake the mutant
    # cmake(vendor, board, compiler)
    # # build and flash
    # build()
    # flash(flash_command)
    final_flag = FLAGS.EndFlag
    # Read device output through serial port
    output, final_flag = read_device_output(port=port,
                                            start_flag=None,
                                            crash_flag=FLAGS.CrashFlag,
                                            end_flag=FLAGS.EndFlag,
                                            pass_flag=FLAGS.PassFlag,
                                            exec_timeout=timeout)
    time_finish_read = time.time()
    utils.yellow_print(
        f"Serial Monitor Time: {time_finish_read - time_finish_build:.2f} seconds"
    )
    utils.yellow_print(
        f"Build and Read Time: {time_finish_read - time_begin:.2f} seconds")
    return output, final_flag
        args.verbose_precomp = True

    # Print all arguments
    utils.flash_utils(args)

    # Load target model theta_p
    theta_p = dnn_utils.model_helper(args.model_arch)()
    theta_p = dnn_utils.multi_gpu_wrap(theta_p)
    theta_p.load_state_dict(ch.load(args.poison_model_path))
    theta_p.eval()

    # Report performance of poisoned model
    train_loader, test_loader = datasets.dataset_helper(
        args.dataset)().get_loaders(args.batch_size)
    clean_acc, _ = dnn_utils.get_model_metrics(theta_p, test_loader)
    print(utils.yellow_print("[Poisoned-model] Total Acc: %.4f" % clean_acc))
    _, clean_total_loss = dnn_utils.get_model_metrics(theta_p, train_loader)
    print(
        utils.yellow_print("[Poisoned-model] Loss on train: %.4f" %
                           clean_total_loss))
    # Report weight norm for poisoned model
    poisoned_norm = dnn_utils.get_model_l2_norm(theta_p).item()
    print(
        utils.yellow_print("[Poisoned-model] Weights norm: %.4f" %
                           poisoned_norm))
    print()

    for valid_theta_err in args.theta_values:
        args.err_threshold = valid_theta_err

        # Prepare logger
Beispiel #11
0
def mutation_main(args, config):
    """ Function that is called when user specifies `mutation_runner.py start` from cmd

    Runs every task in configuration json.
    """
    os.chdir(dir_path)
    if args.jobfile:
        with open(args.jobfile, 'r') as f:
            jobfile = ast.literal_eval(f.read())
            args.port = jobfile['port']
            args.mutants = int(jobfile['mutants'])
            args.timeout = int(jobfile['timeout'])
            args.csv = jobfile['csv']
            args.seed = jobfile['seed']
            args.randomize = jobfile['randomize']
    if args.line_coverage:
        with open(args.line_coverage) as f:
            args.line_coverage = json.loads(f.read())

    # change to root path
    os.chdir(root_path)
    # create a rng for this run
    if not args.seed:
        args.seed = random.randrange(sys.maxsize)
    utils.yellow_print("Current test seed is: {}".format(args.seed))

    time_begin = time.time()
    utils.yellow_print("Running tasks")
    for task in config['tasks']:
        time_task_begin = time.time()
        utils.yellow_print("Running task: {}".format(task['name']))

        # Generate test runner to run only on those test groups
        utils.yellow_print(
            "Generating test runner based on supplied test groups...")
        backup = generate_test_runner(task['test_groups'])

        # set default mutations if patterns is equal to "all"
        if type(task['patterns']) == str:
            task['patterns'] = mutator.pattern_dict[task['patterns']]
        try:
            run_task(task, args, config)
        except:
            traceback.print_exc()
            raise
        finally:
            # restore aws_test_runner.c
            shutil.copy(backup, os.path.splitext(backup)[0])
            os.remove(backup)
            time_task_end = time.time()
            utils.yellow_print(
                f"Task Time: {time_task_end - time_task_begin:.2f} seconds")

    time_end = time.time()
    utils.yellow_print(f"Total Time: {time_end - time_begin:.2f} seconds")

    create_jobfile(mutants=args.mutants,
                   port=args.port,
                   timeout=args.timeout,
                   csv=args.csv,
                   seed=args.seed,
                   randomize=args.randomize)
Beispiel #12
0
    utils.flash_utils(args)

    # Get number of classes
    n_classes = datasets.dataset_helper(args.dataset)().n_classes

    # Load target model theta_p, set to eval mode
    theta_p = dnn_utils.model_helper(args.model_arch)(n_classes=n_classes)
    theta_p = theta_p.cuda()
    theta_p.load_state_dict(ch.load(args.poison_model_path))
    theta_p.eval()

    # Report performance of poisoned model
    train_loader, test_loader = datasets.dataset_helper(
        args.dataset)().get_loaders(512)
    clean_acc, _ = dnn_utils.get_model_metrics(theta_p, test_loader)
    print(utils.yellow_print("[Poisoned-model] Total Acc: %.4f" % clean_acc))
    _, clean_total_loss = dnn_utils.get_model_metrics(theta_p, train_loader)
    print(
        utils.yellow_print("[Poisoned-model] Loss on train: %.4f" %
                           clean_total_loss))
    # Report weight norm for poisoned model
    poisoned_norm = dnn_utils.get_model_l2_norm(theta_p).item()
    print(
        utils.yellow_print("[Poisoned-model] Weights norm: %.4f" %
                           poisoned_norm))
    # Report accuracy on unseen population data
    (tst_sub_acc,
     _), (tst_nsub_acc,
          _) = dnn_utils.get_model_metrics(model=theta_p,
                                           loader=test_loader,
                                           target_prop=args.poison_class)
def modelTargetPoisoningEnsemble(models_p, logger, args):
    # Implementation of Algorithm 1, modified for DNNs
    # Line number corresponding to the Algorithm is mentioned
    # Along with each high-level function call

    # Fetch appropriate dataset
    ds = datasets.dataset_helper("memory")(path=args.path_1)

    # Maintain copy of clean data (for seed sampling)
    ds_clean = datasets.dataset_helper("memory")(path=args.path_1)

    # Data to pick points from (for x* optimization)
    ds_second = datasets.dataset_helper("memory")(path=args.path_2)
    loader_optim, _ = ds_second.get_loaders(1000)

    # Line 1: Collect poisoning points
    D_p = [[], []]

    # Line 3: Since D_p is empty in first iteration, simply train it outside
    model_t_pretrained, pretrain_optim = mtp_utils.train_clean_model(ds, args)

    # Report performance of clean model
    batch_size = args.batch_size
    if batch_size == -1:
        batch_size = len(ds.train)

    train_loader, test_loader = ds.get_loaders(batch_size)
    clean_acc, clean_total_loss = dnn_utils.get_model_metrics(
        model_t_pretrained, test_loader, lossfn=args.loss)
    print(utils.yellow_print("[Clean-model][Test] Total Acc: %.4f" %
                             clean_acc))
    print(
        utils.yellow_print("[Clean-model] Loss on train: %.4f" %
                           clean_total_loss))
    (population_acc,
     _), (non_population_acc,
          _) = dnn_utils.get_model_metrics(model_t_pretrained,
                                           test_loader,
                                           lossfn=args.loss,
                                           target_prop=args.poison_class)
    print(
        utils.red_print("[Clean-model][Test] Population Acc: %.4f" %
                        population_acc))
    print(
        utils.red_print("[Clean-model][Test] Non- Population Acc: %.4f" %
                        non_population_acc))
    print()

    # Line 2: Iterate until stopping criteria met
    tst_sub_acc = 1.0
    best_loss = np.inf
    num_iters = 0
    condition = True
    while condition:

        if len(D_p[0]) > 0:
            # Line 3: theta_t = train(D_c U D_p)
            print(
                utils.yellow_print("[Training model on Dc U Dp "
                                   "(on %d samples)]" % len(ds.train)))
            # Get loader for D_c U D_p
            batch_size = args.batch_size
            if batch_size == -1:
                batch_size = len(ds.train)
            data_loader, _ = ds.get_loaders(batch_size)

            # Do not re-initialize model if finetuning requested
            if not args.finetune:
                # Construct model
                model_t = dnn_utils.get_seeded_wrapped_model(
                    args, n_classes=ds.n_classes)
            else:
                # Start finetuning from the point where model
                # has seen only clean data
                model_t = copy.deepcopy(model_t_pretrained)
            # Set model to training mode
            model_t.train()

            # Define optimizer
            optim = ch.optim.Adam(model_t.parameters(),
                                  lr=args.pretrain_lr,
                                  weight_decay=args.pretrain_weight_decay)

            # Adjust starting point of optimizer
            # if finetuning is requested
            if args.finetune:
                optim.load_state_dict(pretrain_optim.state_dict())

            # Increase numer of iterations theta_t is trained for
            # as size of its training set |D_c U D_p| increases
            iters = args.iters
            if args.increase_iters:
                iters += int((len(ds.train) - len(ds_clean.train)) /
                             args.increase_every)

            # Train model
            for e in range(iters):
                # Train epoch
                dnn_utils.epoch(model=model_t,
                                loader=data_loader,
                                optimizer=optim,
                                epoch_num=e + 1,
                                c_rule=None,
                                n_classes=None,
                                verbose=True,
                                lossfn=args.loss)
        else:
            model_t = model_t_pretrained

        # Make sure theta_t is in eval mode
        model_t.eval()

        # Line 4: Compute (x*, y*)
        if args.optim_type == "lookup":
            # Loss-difference based lookup method
            (x_opt, y_opt), best_loss = mtp_utils.lookup_based_optimal(
                theta_t=model_t,
                theta_p=models_p,
                loader=loader_optim,
                n_classes=ds_second.n_classes,
                random=args.random,
                lossfn=args.loss,
                filter=args.filter,
                verbose=True,
                ensemble_p=True)
        elif args.optim_type == "dataset_grad":
            # Dataset-gradient alignment loss based optimization
            (x_opt, y_opt), best_loss = mtp_utils.dataset_grad_optimal(
                theta_t=model_t,
                theta_p=models_p,
                input_shape=ds_second.datum_shape,
                n_classes=ds_second.n_classes,
                trials=args.optim_trials,
                ds=ds,
                num_steps=args.optim_steps,
                step_size=args.optim_lr,
                verbose=True,
                signed=args.signed,
                ensemble_p=True,
                batch_sample_estimate=args.batch_sample_estimate)
        elif args.optim_type == "loss_difference":
            # Loss difference based optimization
            (x_opt, y_opt), best_loss = mtp_utils.find_optimal_using_optim(
                theta_t=model_t,
                theta_p=models_p,
                input_shape=ds_second.datum_shape,
                n_classes=ds_second.n_classes,
                num_steps=args.optim_steps,
                trials=args.optim_trials,
                step_size=args.optim_lr,
                filter=args.filter,
                verbose=True,
                ensemble_p=True)
        else:
            raise NotImplemented("Loss optimization method not implemented")

        # Log some information about x*, y*
        with ch.no_grad():
            pred_t = model_t(x_opt)
            preds_t = ",".join(
                [str(mp(x_opt).argmax(1).item()) for mp in models_p])
        print(
            utils.cyan_print("Mt(x*): %d, Mp(x*): %s, y*: %d" %
                             (pred_t.argmax(1), preds_t, y_opt)))

        # Set n_copies dynamically, if requested
        n_copies = args.n_copies
        if args.dynamic_repeat:
            n_copies = mtp_utils.dynamic_n(tst_sub_acc, args.n_copies)

        # Line 5: Add (x*, y*) to D_p
        for _ in range(args.n_copies):
            D_p[0].append(x_opt.cpu())
            D_p[1].append(y_opt.cpu())
            ds.add_point_to_train(x_opt.cpu(), y_opt.cpu())
        print()

        # Calculate useful statistics
        (tst_sub_acc,
         _), (tst_nsub_acc,
              _) = dnn_utils.get_model_metrics(model=model_t,
                                               loader=test_loader,
                                               target_prop=args.poison_class,
                                               lossfn=args.loss)
        (trn_sub_acc,
         _), (trn_nsub_acc,
              _) = dnn_utils.get_model_metrics(model=model_t,
                                               loader=train_loader,
                                               target_prop=args.poison_class,
                                               lossfn=args.loss)

        norm_diffs = dnn_utils.model_l2_closeness(model_t,
                                                  models_p,
                                                  ensemble=True)

        # Log information
        mtp_utils.log_information(logger=logger,
                                  best_loss=best_loss,
                                  x_opt=x_opt,
                                  norm_diffs=norm_diffs,
                                  trn_sub_acc=trn_sub_acc,
                                  trn_nsub_acc=trn_nsub_acc,
                                  tst_sub_acc=tst_sub_acc,
                                  tst_nsub_acc=tst_nsub_acc,
                                  num_iters=num_iters + 1,
                                  args=args,
                                  label=y_opt)

        # Line 6: Get ready to check condition
        condition = stop_cond(args=args,
                              best_loss=best_loss,
                              num_iters=num_iters,
                              tst_sub_acc=tst_sub_acc,
                              norm_diffs=norm_diffs)

        # Keep track of no. of iterations
        num_iters += 1

    # Line 7: Return poison data
    return D_p, model_t
def indiscriminateAttack(logger, wanted_errors, args):
    # Fetch appropriate dataset
    ds = datasets.dataset_helper("memory")(path=args.path_1)

    # Maintain copy of clean data (for seed sampling)
    ds_clean = datasets.dataset_helper("memory")(path=args.path_1)

    # Data to pick points from (for x* optimization)
    ds_second = datasets.dataset_helper("memory")(path=args.path_2)
    loader_optim, _ = ds_second.get_loaders(1000)

    # Line 1: Collect poisoning points
    D_p = [[], []]

    # Line 3: Since D_p is empty in first iteration, simply train it outside
    model_t_pretrained, pretrain_optim = mtp_utils.train_clean_model(ds, args)

    # Report performance of clean model
    batch_size = args.batch_size
    if batch_size == -1:
        batch_size = len(ds.train)

    train_loader, test_loader = ds.get_loaders(batch_size)
    clean_acc, _ = dnn_utils.get_model_metrics(model_t_pretrained,
                                               test_loader,
                                               lossfn=args.loss)
    print(utils.yellow_print("[Clean-model] Total Acc: %.4f" % clean_acc))
    _, clean_total_loss = dnn_utils.get_model_metrics(model_t_pretrained,
                                                      train_loader,
                                                      lossfn=args.loss)
    print(
        utils.yellow_print("[Clean-model] Loss on train: %.4f" %
                           clean_total_loss))
    print()

    # Keep track of which errors have been achieved so far
    achieved_so_far = 0

    # Line 2: Iterate until stopping criteria met
    best_loss = np.inf
    num_iters = 0
    while achieved_so_far < len(wanted_errors):

        if len(D_p[0]) > 0:
            # Line 3: theta_t = train(D_c U D_p)
            print(
                utils.yellow_print("[Training model on Dc U Dp "
                                   "(on %d samples)]" % len(ds.train)))
            # Get loader for D_c U D_p
            batch_size = args.batch_size
            if batch_size == -1:
                batch_size = len(ds.train)
            data_loader, _ = ds.get_loaders(batch_size)

            # Do not re-initialize model if finetuning requested
            if not args.finetune:
                # Construct model
                model_t = dnn_utils.get_seeded_wrapped_model(
                    args, n_classes=ds.n_classes)
            else:
                # Start finetuning from the point where model
                # has seen only clean data
                model_t = copy.deepcopy(model_t_pretrained)
            # Set model to training mode
            model_t.train()

            # Define optimizer
            optim = ch.optim.Adam(model_t.parameters(),
                                  lr=args.pretrain_lr,
                                  weight_decay=args.pretrain_weight_decay)

            # Adjust starting point of optimizer
            # if finetuning is requested
            if args.finetune:
                optim.load_state_dict(pretrain_optim.state_dict())

            # Increase numer of iterations theta_t is trained for
            # as size of its training set |D_c U D_p| increases
            iters = args.iters
            if args.increase_iters:
                iters += int((len(ds.train) - len(ds_clean.train)) /
                             args.increase_every)

            # Train model
            for e in range(iters):
                # Train epoch
                dnn_utils.epoch(model=model_t,
                                loader=data_loader,
                                optimizer=optim,
                                epoch_num=e + 1,
                                c_rule=None,
                                n_classes=None,
                                verbose=True,
                                lossfn=args.loss)
        else:
            model_t = model_t_pretrained

        # Make sure theta_t is in eval mode
        model_t.eval()

        # Line 4: Compute (x*, y*)
        (x_opt,
         y_opt), best_loss = lookup_based_optimal(theta_t=model_t,
                                                  loader=loader_optim,
                                                  lossfn=args.loss,
                                                  filter=args.filter,
                                                  n_classes=ds.n_classes,
                                                  verbose=True)

        # Log some information about x*, y*
        with ch.no_grad():
            pred_t = model_t(x_opt)
        print(
            utils.cyan_print("Loss: %.3f Mt(x*): %d, y*: %d" %
                             (best_loss.item(), pred_t.argmax(1), y_opt)))

        # Line 5: Add (x*, y*) to D_p
        for _ in range(args.n_copies):
            D_p[0].append(x_opt.cpu())
            D_p[1].append(y_opt.cpu())
            ds.add_point_to_train(x_opt.cpu(), y_opt.cpu())
        print()

        # Calculate useful statistics
        (tst_acc, _) = dnn_utils.get_model_metrics(model=model_t,
                                                   loader=test_loader,
                                                   lossfn=args.loss)
        (trn_acc, _) = dnn_utils.get_model_metrics(model=model_t,
                                                   loader=train_loader,
                                                   lossfn=args.loss)

        # Log information
        # Log optimized image
        logger.add_image("X*", x_opt[0], (num_iters + 1) * args.n_copies)
        # Log weight Norm
        logger.add_scalar("Weight norm",
                          dnn_utils.get_model_l2_norm(model_t).item(),
                          global_step=(num_iters + 1) * args.n_copies)
        # Log population accuracies on train, test data
        logger.add_scalar("[Train] Accuracy",
                          trn_acc,
                          global_step=(num_iters + 1) * args.n_copies)
        logger.add_scalar("[Test] Accuracy",
                          tst_acc,
                          global_step=(num_iters + 1) * args.n_copies)
        # Log best loss
        logger.add_scalar("Loss on x*,y*",
                          best_loss.item(),
                          global_step=(num_iters + 1) * args.n_copies)

        # Keep track of no. of iterations
        num_iters += 1

        # If wanted error achieved, switch to next goal:
        if (1 - trn_acc) > wanted_errors[achieved_so_far]:
            # Save current model
            model_name = "seed-{}_error-{}_testacc-{}.pth".format(
                args.seed, wanted_errors[achieved_so_far], tst_acc)
            ch.save(
                copy.deepcopy(model_t).state_dict(),
                os.path.join(args.save_dir, model_name))
            print(
                utils.pink_print("Achieved %.3f loss!" %
                                 wanted_errors[achieved_so_far]))
            achieved_so_far += 1
Beispiel #15
0
        # Purpose of this mode is just to train model once
        # Exit after that
        if args.use_given_data:
            exit(0)

        # Save current model
        model_name = "seed-{}_ratio-{}_loss-{}_bs-{}.pth".format(
            args.seed, ratio, train_loss, args.batch_size)
        ch.save(
            copy.deepcopy(model).state_dict(),
            os.path.join(model_dir, model_name))
        print("Saved model to %s" % os.path.join(model_dir, model_name))

        if tst_sub_acc <= args.attacker_goal and train_loss < best_loss:
            best_loss = train_loss
            best_model_obj = {
                "model": copy.deepcopy(model),
                "test_acc": test_acc,
                "train_loss": train_loss,
                "test_collat_acc": tst_nsub_acc,
                "test_target_acc": tst_sub_acc,
                "ratio": ratio
            }
            print(
                utils.yellow_print("Updated lowest train loss: %.4f" %
                                   train_loss))

    if best_model_obj is None:
        print(utils.red_print("No model satisfied given adversary's goal!"))
def modelTargetPoisoning(model_p, logger, args):
    # Implementation of Algorithm 1, modified for DNNs
    # Line number corresponding to the Algorithm is mentioned
    # Along with each high-level function call

    # Fetch appropriate dataset
    ds = datasets.dataset_helper(args.dataset)()

    # Keep track of number of points model has seen (virtually)
    # For loss-normalization purposes
    points_seen_count = len(ds.train)

    # Line 1: Collect poisoning points
    D_p = [[], []]

    # Line 3: Since D_p is empty in first iteration, simply train it outside
    model_t = mtp_utils.train_clean_model(ds, args)

    # Report performance of clean model
    train_loader, test_loader = ds.get_loaders(args.batch_size)
    clean_acc, _ = dnn_utils.get_model_metrics(model_t, test_loader)
    print(utils.yellow_print("[Clean-model] Total Acc: %.4f" % clean_acc))
    _, clean_total_loss = dnn_utils.get_model_metrics(model_t, train_loader)
    print(
        utils.yellow_print("[Clean-model] Loss on train: %.4f" %
                           clean_total_loss))
    print()

    # theta_1: (sum of) gradients of model weights
    # with respect to clean training set
    print(utils.yellow_print("[Computing gradients on clean training data]"))
    theta_curr = datasets.get_dataset_gradients(
        model=model_t,
        ds=ds,
        batch_size=args.batch_size,
        weight_decay=args.pretrain_weight_decay,
        verbose=args.verbose_precomp,
        is_train=True)

    # Line 2: Iterate until stopping criteria met
    best_loss = np.inf
    num_iters = 0
    condition = True
    while condition:

        # Line 4: Compute (x_opt, y_opt)
        opt_pair, best_loss = mtp_utils.find_optimal(
            theta_t=model_t,
            theta_p=model_p,
            input_shape=ds.datum_shape,
            n_classes=ds.n_classes,
            trials=args.trials,
            num_steps=args.num_steps,
            step_size=args.optim_lr,
            verbose=args.verbose_opt)
        x_opt, y_opt = opt_pair

        # Update theta (gradients for online learning) for use in next iter
        print(utils.yellow_print("[Updating gradients]"))
        theta_curr = mtp_utils.update_gradients(
            model=model_t,
            thetas=theta_curr,
            weight_decay=args.update_weight_decay,
            x_opt=x_opt,
            y_opt=y_opt)

        # Calculate useful statistics
        (tst_sub_acc,
         _), _ = dnn_utils.get_model_metrics(model=model_t,
                                             loader=test_loader,
                                             target_prop=args.poison_class)
        _, (trn_nsub_acc,
            _) = dnn_utils.get_model_metrics(model=model_t,
                                             loader=train_loader,
                                             target_prop=args.poison_class)
        norm_diffs = dnn_utils.model_l2_closeness(model_t, model_p)

        # Log information
        mtp_utils.log_information(logger=logger,
                                  best_loss=best_loss,
                                  x_opt=x_opt,
                                  model_t=model_t,
                                  norm_diffs=norm_diffs,
                                  trn_nsub_acc=trn_nsub_acc,
                                  tst_sub_acc=tst_sub_acc,
                                  num_iters=num_iters,
                                  args=args)

        # Line 3: theta_t = train(D_c U D_p)
        # Instead of training from scratch, perform online mirror descent
        model_t = mtp_utils.w_optimal_gradient_ascent(
            model=model_t,
            thetas=theta_curr,
            num_points_seen_virtually=points_seen_count,
            method=args.method,
            lr=args.oga_lr,
            weight_decay=args.oga_weight_decay,
            # Not sure if should be same weight decay
            # when model was pre-trained
            # Or a larger value to prevent model weights from exploding
            # weight_decay=args.pretrain_weight_decay,
            iters=args.iters,
            verbose=args.verbose_oga)

        # Line 5: Add (x*, y*) to D_p
        D_p[0].append(x_opt.cpu())
        D_p[1].append(y_opt.cpu())
        points_seen_count += 1

        # Log some information about x*, y*
        pred_t, pred_p = model_t(x_opt), model_p(x_opt)
        print(
            utils.cyan_print("Mt(x*): %d, Mp(x*): %d, y*: %d" %
                             (pred_t.argmax(1), pred_p.argmax(1), y_opt)))

        # Line 6: Get ready to check condition
        condition = stop_cond(args=args,
                              best_loss=best_loss,
                              num_iters=num_iters,
                              model_t=model_t,
                              model_p=model_p,
                              tst_sub_acc=tst_sub_acc,
                              norm_diffs=norm_diffs)

        # Keep track of no. of iterations
        num_iters += 1
        print()

    # Line 7: Return poison data
    return D_p, model_t
Beispiel #17
0
def cmake(vendor, board, compiler):
    # cmake -DVENDOR=espressif -DBOARD=esp32_wrover_kit -DCOMPILER=xtensa-esp32 -S . -B build -DAFR_ENABLE_TESTS=1
    cmd = "cmake -DVENDOR={} -DBOARD={} -DCOMPILER={} -S . -B build -DAFR_ENABLE_TESTS=1".format(
        vendor, board, compiler)
    utils.yellow_print(cmd)
    subprocess.check_call(shlex.split(cmd))
Beispiel #18
0
def run_coverage(args, config):
    flash_command = config['flash_command']

    port = args.port if args.port else utils.get_default_serial_port()
    os.environ['PORT'] = port
    timeout = int(args.timeout)

    os.chdir(root_path)

    for task in config['tasks']:
        utils.yellow_print("Running task: {}".format(task['name']))

        # Generate test runner to run only on those test groups
        utils.yellow_print(
            "Generating test runner based on supplied test groups...")
        backup = mutation_runner.generate_test_runner(task['test_groups'])
        try:
            for s in task['src']:
                shutil.copyfile(s, "{}.old".format(s))
                with open(s) as f:
                    text = f.read()
                    funcs = re.findall(FuncRegEx, text, re.MULTILINE)
                    write_line_prints(s, text, funcs)
                # run once
                output, _ = mutation_runner.flash_and_read(
                    port, timeout, flash_command)

                # process the output to determine functional coverage
                line_set = set()
                line_coverage_map = {}
                for line in output.split('\n'):
                    funcm = re.search(CovRegEx, line)
                    testm = re.search(TestRegEx, line)
                    if funcm:
                        line_set.add((funcm.group(2), funcm.group(3)))
                    if testm:
                        test = testm.group(2)
                        if test not in line_coverage_map:
                            line_coverage_map[test] = []
                        for line_range in line_set:
                            line_coverage_map[test].append(
                                [int(line_range[0]),
                                 int(line_range[1])])
                        line_set.clear()
                os.chdir(dir_path)
                with open(args.output, 'w', encoding='utf-8') as f:
                    json.dump(line_coverage_map,
                              f,
                              ensure_ascii=False,
                              indent=4,
                              sort_keys=True)
                utils.yellow_print("Written line coverage data to {}".format(
                    args.output))
        except Exception as e:
            traceback.print_exc()
            raise Exception(e)
        finally:
            # restore aws_test_runner.c
            os.chdir(root_path)
            shutil.copy(backup, os.path.splitext(backup)[0])
            os.remove(backup)
            for s in task['src']:
                shutil.copyfile("{}.old".format(s), s)
                os.remove("{}.old".format(s))
                utils.yellow_print("Source code restored")
Beispiel #19
0
def train_model(model,
                loaders,
                epochs,
                c_rule,
                n_classes,
                save_path=None,
                corrupt_class=None,
                lr=1e-3,
                save_option='last',
                weight_decay=0.09,
                early_stop=False,
                poison_ratio=1.0,
                verbose=True,
                no_val=False,
                low_confidence=False,
                get_metrics_at_epoch_end=None,
                clean_train_loader=None,
                use_plateau_scheduler=False,
                study_mode=False,
                loss_fn="ce"):
    if save_path is None:
        save_option = 'none'
    if save_option not in ['best', 'last', 'none']:
        raise ValueError("Model-saving mode must be best/last/none")
    if save_option == 'best' and no_val:
        raise ValueError(
            "Cannot identify best-val-loss model if val loss not computed")

    optim = ch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = ch.optim.lr_scheduler.ReduceLROnPlateau(optim,
                                                        patience=5,
                                                        verbose=True)
    stopper = EarlyStopper(patience=10, decimal=5)

    train_loader, val_loader = loaders

    best_loss, best_vacc = np.inf, 0.0
    best_model = None

    if study_mode: collect_stats = []

    iterator = range(epochs)
    if not verbose:
        iterator = tqdm(iterator)
    for e in iterator:
        # Train epoch
        tr_loss, _ = epoch(model,
                           train_loader,
                           optim,
                           e + 1,
                           c_rule,
                           n_classes,
                           corrupt_class=corrupt_class,
                           poison_ratio=poison_ratio,
                           verbose=verbose,
                           low_confidence=low_confidence,
                           lossfn=loss_fn)

        if not no_val:
            # Validation epoch
            (loss, acc) = epoch(model,
                                val_loader,
                                None,
                                e + 1,
                                c_rule,
                                n_classes,
                                verbose=verbose,
                                lossfn=loss_fn)
        if verbose or study_mode:
            if get_metrics_at_epoch_end is not None:
                (prop_acc, _), (noprop_acc, _) = get_model_metrics(
                    model,
                    clean_train_loader,
                    target_prop=get_metrics_at_epoch_end,
                    lossfn=loss_fn)
                print(
                    utils.yellow_print(
                        "[Train] Population acc: %.4f, Non-population acc: %.4f"
                        % (prop_acc, noprop_acc)))

                (val_prop_acc, _), (val_noprop_acc, _) = get_model_metrics(
                    model,
                    val_loader,
                    target_prop=get_metrics_at_epoch_end,
                    lossfn=loss_fn)
                print(
                    utils.yellow_print(
                        "[Val] Population acc: %.4f, Non-population acc: %.4f"
                        % (val_prop_acc, val_noprop_acc)))

                norm = get_model_l2_norm(model).item()
                print(utils.yellow_print("[Model] R(w): %.3f" % norm))

                if study_mode:
                    stats = {
                        "train_prop_acc": 100 * prop_acc,
                        "train_noprop_acc": 100 * noprop_acc,
                        "val_prop_acc": 100 * val_prop_acc,
                        "val_noprop_acc": 100 * val_noprop_acc,
                        "norm": norm,
                        # 100 scal for binary, 50 for multiclass
                        # Scaling to visualize better
                        "lossx100": 100 * tr_loss,
                        "lossx50": 50 * tr_loss
                    }
                    collect_stats.append(stats)
            print()

        # Keep track of checkpoint with best validation loss so far
        # If option is picked
        if save_option == 'best':
            if loss < best_loss:
                best_model = copy.deepcopy(model)
                best_loss, best_vacc = loss, acc

        # If early stopping, stop training
        if early_stop and stopper.track(tr_loss):
            print("Stopping early, as requested!")
            break

        # Take scheduler step, if enables
        if use_plateau_scheduler:
            scheduler.step(tr_loss)

    # Save latest model state, if this option is picked
    if save_option == 'last':
        best_model = model

    if save_option != 'none':
        ch.save(best_model.state_dict(), os.path.join(save_path))

    # Keep track of everything, if asked
    if study_mode:
        return model, best_loss, best_vacc, collect_stats

    # Return best validation metrics
    return model, best_loss, best_vacc
Beispiel #20
0
def run_task(task, args, config):
    """ Runs a mutation testing task with settings in `args` and `config`.

    Collects results and produces CSV data.
    """
    flash_command = config['flash_command']

    port = args.port if args.port else utils.get_default_serial_port()
    os.environ['PORT'] = port
    timeout = int(args.timeout)
    csv = args.csv
    rng = random.Random(args.seed)

    mutation = mutator.Mutator(src=task['src'],
                               mutation_patterns=task['patterns'],
                               rng=rng)
    mutations_list = mutation.generateMutants(
        mutants_per_pattern=(task['mutants_per_pattern']
                             if 'mutants_per_pattern' in task else None),
        random=args.randomize)

    data_record = []
    trials = []
    test_to_kills = {}
    run_cnt = 0
    nc = 0
    total_failures = 0
    mutant_cnt = int(args.mutants)

    failures_per_pattern = {}
    total_per_pattern = {}
    for mp in mutation.getPatterns():
        if mp not in failures_per_pattern:
            failures_per_pattern[mp] = 0
        if mp not in total_per_pattern:
            total_per_pattern[mp] = 0

    # outer try is for finally generating csv if automation stops early
    try:
        for occurrence in mutations_list:
            if run_cnt == mutant_cnt:
                break

            mp = occurrence.pattern
            # mutate the code
            utils.yellow_print(occurrence)
            original_line, mutated_line = mutation.mutate(occurrence)
            file_changed = occurrence.file.rstrip(".old")
            line_number = occurrence.line
            # try is for catching compile failure to continue execution
            try:
                # cmake, build, flash, and read
                output, final_flag = flash_and_read(port, timeout,
                                                    flash_command)

                # reaching here means success, so change counters
                run_cnt += 1
                total_per_pattern[mp] += 1

                # tests expected to catch
                tests_expected_to_catch = "N/A"
                if args.line_coverage:
                    tests_expected_to_catch = ",".join(
                        get_expected_catch(args.line_coverage,
                                           int(line_number)))

                # mutant_status can either be "FAIL", "PASS", "CRASH", "TIMEOUT"
                mutant_status = "FAIL"
                if final_flag == FLAGS.PassFlag:
                    utils.red_print("Mutant is Alive")
                    utils.red_print(
                        "Tests that are expected to catch this mutant are: \n{}"
                        .format(tests_expected_to_catch))
                    mutant_status = "PASS"
                else:
                    failures_per_pattern[mp] += 1
                    total_failures += 1
                    utils.green_print("Mutant is Killed")
                if final_flag == FLAGS.CrashFlag:
                    mutant_status = "CRASH"
                elif final_flag == "TIMEOUT":
                    mutant_status = "TIMEOUT"

                # Analyze the output to count per test failures
                results = re.findall(TestRegEx, output)
                for group, test, result in results:
                    if (group, test) not in test_to_kills:
                        test_to_kills[(group, test)] = (
                            1, 1) if result == 'FAIL' else (0, 1)
                    else:
                        kills, total = test_to_kills[(group, test)]
                        test_to_kills[(group,
                                       test)] = ((kills + 1, total +
                                                  1) if result == 'FAIL' else
                                                 (kills, total + 1))

                # Add result to CSV queue
                trials.append({
                    'file':
                    file_changed,
                    'line':
                    line_number,
                    'original':
                    original_line,
                    'mutant':
                    mutated_line,
                    'result':
                    "{}/KILLED".format(mutant_status)
                    if mutant_status != "PASS" else "PASS/LIVE",
                    'expected_catch':
                    tests_expected_to_catch
                })
                utils.yellow_print("Successful Mutant Runs: {}/{}".format(
                    run_cnt, mutant_cnt))
            except CompileFailed:
                utils.yellow_print("Cannot compile, discard and move on")
                nc += 1
            finally:
                mutation.restore()
    except:
        traceback.print_exc()
        raise
    finally:
        mutation.cleanup()
        # calculate mutant score
        score = percentage(total_failures, run_cnt)
        utils.yellow_print("Score: {}%".format(score))
        utils.yellow_print(
            "Alive: {} Killed: {} Mutants: {} No-Compile: {} Attempted Runs: {}"
            .format(run_cnt - total_failures, total_failures, run_cnt, nc,
                    run_cnt + nc))
        trials.append({
            'file':
            "RESULTS:",
            'line':
            "{} NO-COMPILE".format(nc),
            'mutant':
            "SCORE",
            'original':
            "{} KILLED/{} MUTANTS".format(total_failures, run_cnt),
            'result':
            "{}%".format(score)
        })

        # aggregate pass/fail counts for each found test in test group
        aggregates = []
        for group, test in test_to_kills:
            kills, total = test_to_kills[(group, test)]
            aggregates.append({
                'Group': group,
                'Test': test,
                'Fails': kills,
                'Passes': total - kills,
                'Total': total
            })

        # pattern comparison
        for mp in total_per_pattern:
            data_record.append({
                'pattern':
                "{} => {}".format(mp.pattern, mp.transformation),
                'failures':
                failures_per_pattern[mp],
                'total':
                total_per_pattern[mp],
                'percentage':
                float(
                    percentage(failures_per_pattern[mp],
                               total_per_pattern[mp])) *
                0.01 if total_per_pattern[mp] > 0 else 2
            })

        # log to csv
        if csv:
            csv_path = os.path.join(
                dir_path, "csvs/{}/{}".format(current_date, current_time))
            pattern_csv = os.path.join(
                csv_path, "{}_pattern_comparison.csv".format(task['name']))
            trials_csv = os.path.join(
                csv_path, "{}_mutants_created.csv".format(task['name']))
            per_test_csv = os.path.join(
                csv_path, "{}_test_aggregates.csv".format(task['name']))
            to_csv(pattern_csv, ['pattern', 'failures', 'total', 'percentage'],
                   data_record)
            to_csv(trials_csv, [
                'file', 'line', 'original', 'mutant', 'result',
                'expected_catch'
            ], trials)
            to_csv(per_test_csv, ['Group', 'Test', 'Fails', 'Passes', 'Total'],
                   aggregates)
Beispiel #21
0
def modelTargetPoisoning(model_p, logger, args):
    # Implementation of Algorithm 1, modified for DNNs
    # Line number corresponding to the Algorithm is mentioned
    # Along with each high-level function call

    # Fetch appropriate dataset
    ds = datasets.dataset_helper(args.dataset)()

    # Maintain copy of clean data (for seed sampling)
    ds_clean = datasets.dataset_helper(args.dataset)()

    # Line 1: Collect poisoning points
    D_p = [[], []]

    # Load poison data, if provided
    if args.poison_data:
        print(utils.green_print("Loading poison data"))
        data = np.load("./data/poison_data/poison_data.npz")
        # Normalize to 0-1 for use by model
        all_poison_data_x = ch.from_numpy(data['x']).float() / 255.
        all_poison_data_x = ch.unsqueeze(all_poison_data_x, 1)
        all_poison_data_y = ch.from_numpy(data['y'])

    # Line 3: Since D_p is empty in first iteration, simply train it outside
    model_t_pretrained, pretrain_optim = mtp_utils.train_clean_model(ds, args)

    # Report performance of clean model
    batch_size = args.batch_size
    if batch_size == -1:
        batch_size = len(ds.train)
    train_loader, test_loader = ds.get_loaders(batch_size)
    clean_acc, clean_total_loss = dnn_utils.get_model_metrics(
        model_t_pretrained, test_loader, lossfn=args.loss)
    print(utils.yellow_print("[Clean-model][Test] Total Acc: %.4f" %
                             clean_acc))
    print(
        utils.yellow_print("[Clean-model] Loss on train: %.4f" %
                           clean_total_loss))
    (population_acc,
     _), (non_population_acc,
          _) = dnn_utils.get_model_metrics(model_t_pretrained,
                                           test_loader,
                                           lossfn=args.loss,
                                           target_prop=args.poison_class)
    print(
        utils.red_print("[Clean-model][Test] Population Acc: %.4f" %
                        population_acc))
    print(
        utils.red_print("[Clean-model][Test] Non- Population Acc: %.4f" %
                        non_population_acc))
    print()

    # Line 2: Iterate until stopping criteria met
    prev_loss, best_loss = np.inf, np.inf
    num_iters = 0
    condition = True
    while condition:

        if len(D_p[0]) > 0:
            # Line 3: theta_t = train(D_c U D_p)
            print(
                utils.yellow_print("[Training model on Dc U Dp "
                                   "(on %d samples)]" % len(ds.train)))
            # Get loader for D_c U D_p
            batch_size = args.batch_size
            if batch_size == -1:
                batch_size = len(ds.train)
            data_loader, _ = ds.get_loaders(batch_size)

            # Do not re-initialize model if finetuning requested
            if not args.finetune:
                # Construct model
                model_t = dnn_utils.get_seeded_wrapped_model(
                    args, n_classes=ds.n_classes)
            else:
                # Start finetuning from the point where model
                # has seen only clean data
                model_t = copy.deepcopy(model_t_pretrained)
            # Set model to training mode
            model_t.train()

            # Define optimizer
            optim = ch.optim.Adam(model_t.parameters(),
                                  lr=args.pretrain_lr,
                                  weight_decay=args.pretrain_weight_decay)

            # Adjust starting point of optimizer
            # if finetuning is requested
            if args.finetune:
                optim.load_state_dict(pretrain_optim.state_dict())

            # Increase numer of iterations theta_t is trained for
            # as size of its training set |D_c U D_p| increases
            iters = args.iters
            if args.increase_iters:
                iters += int((len(ds.train) - len(ds_clean.train)) /
                             args.increase_every)

            # Train model
            for e in range(iters):
                # Train epoch
                dnn_utils.epoch(model=model_t,
                                loader=data_loader,
                                optimizer=optim,
                                epoch_num=e + 1,
                                c_rule=None,
                                n_classes=None,
                                verbose=True,
                                lossfn=args.loss)
        else:
            model_t = model_t_pretrained

        # Make sure theta_t is in eval mode
        model_t.eval()

        start_with = None
        if args.start_opt_real:
            # If flag set, start with real data sampled from
            # (unpoisoned) train loader
            batch_size = args.batch_size
            if batch_size == -1:
                batch_size = len(ds.train)
            loader, _ = ds_clean.get_loaders(batch_size)
            start_with = datasets.get_sample_from_loader(
                loader, args.trials, ds_clean.n_classes)
        elif args.poison_data:
            # Sample 'num-trials' data from this
            perm = ch.randperm(all_poison_data_x.size(0))
            idx = perm[:args.trials]
            start_with = (all_poison_data_x[idx], all_poison_data_y[idx])

        # Line 4: Compute (x*, y*)
        if args.use_optim_for_optimal:
            find_optimal_function = mtp_utils.find_optimal_using_optim
        else:
            find_optimal_function = mtp_utils.find_optimal

        (x_opt,
         y_opt), best_loss = find_optimal_function(theta_t=model_t,
                                                   theta_p=model_p,
                                                   input_shape=ds.datum_shape,
                                                   n_classes=ds.n_classes,
                                                   trials=args.trials,
                                                   num_steps=args.num_steps,
                                                   step_size=args.optim_lr,
                                                   verbose=True,
                                                   start_with=start_with,
                                                   lossfn=args.loss,
                                                   dynamic_lr=args.dynamic_lr,
                                                   filter=args.filter)

        # If loss increased, try optimization once more
        # With double trials, to reduce chance of bad minima
        if args.skip_bad and best_loss > prev_loss:
            print(utils.red_print("Re-running optimization with more seeds"))
            (x_opt, y_opt), best_loss = find_optimal_function(
                theta_t=model_t,
                theta_p=model_p,
                input_shape=ds.datum_shape,
                n_classes=ds.n_classes,
                trials=args.trials * 2,
                num_steps=args.num_steps,
                step_size=args.optim_lr,
                verbose=True,
                start_with=start_with,
                lossfn=args.loss,
                dynamic_lr=args.dynamic_lr)

        # Log some information about x*, y*
        with ch.no_grad():
            pred_t, pred_p = model_t(x_opt), model_p(x_opt)
            if pred_t.argmax(1) == y_opt.item():
                print(utils.red_print("[BAD OPTIMIZATION. CHECK]"))
        print(
            utils.cyan_print(
                "Loss: %.3f Mt(x*): %d, Mp(x*): %d, y*: %d" %
                (best_loss.item(), pred_t.argmax(1), pred_p.argmax(1), y_opt)))

        # Line 5: Add (x*, y*) to D_p
        for _ in range(args.n_copies):
            D_p[0].append(x_opt.cpu())
            D_p[1].append(y_opt.cpu())
            ds.add_point_to_train(x_opt.cpu(), y_opt.cpu())
        print()

        # Calculate useful statistics
        (tst_sub_acc,
         _), (tst_nsub_acc,
              _) = dnn_utils.get_model_metrics(model=model_t,
                                               loader=test_loader,
                                               target_prop=args.poison_class,
                                               lossfn=args.loss)
        (trn_sub_acc,
         _), (trn_nsub_acc,
              _) = dnn_utils.get_model_metrics(model=model_t,
                                               loader=train_loader,
                                               target_prop=args.poison_class,
                                               lossfn=args.loss)
        norm_diffs = dnn_utils.model_l2_closeness(model_t, model_p)

        # Log information
        mtp_utils.log_information(logger=logger,
                                  best_loss=best_loss,
                                  x_opt=x_opt,
                                  model_t=model_t,
                                  norm_diffs=norm_diffs,
                                  trn_sub_acc=trn_sub_acc,
                                  trn_nsub_acc=trn_nsub_acc,
                                  tst_sub_acc=tst_sub_acc,
                                  tst_nsub_acc=tst_nsub_acc,
                                  num_iters=num_iters + 1,
                                  args=args)

        # Line 6: Get ready to check condition
        condition = stop_cond(args=args,
                              best_loss=best_loss,
                              num_iters=num_iters,
                              tst_sub_acc=tst_sub_acc,
                              norm_diffs=norm_diffs)

        # Keep track of no. of iterations
        num_iters += 1

        # Keep track of loss from previous iteration
        prev_loss = best_loss.item()

    # Line 7: Return poison data
    return D_p, model_t
Beispiel #22
0
def modelTargetPoisoning(models_p, logger, args):
    # Implementation of Algorithm 1, modified for DNNs
    # Line number corresponding to the Algorithm is mentioned
    # Along with each high-level function call

    # Fetch appropriate dataset
    ds = datasets.dataset_helper("memory")(path=args.path_1)

    # Maintain copy of clean data (for seed sampling)
    ds_clean = datasets.dataset_helper("memory")(path=args.path_1)

    # Data to pick points from (for x* optimization)
    ds_second = datasets.dataset_helper("memory")(path=args.path_2)
    loader_optim, _ = ds_second.get_loaders(1000)

    # Line 1: Collect poisoning points
    D_p = [[], []]

    # Line 3: Since D_p is empty in first iteration, simply train it outside
    models_t_pretrained = []
    for seed in args.seeds:
        args.seed = seed
        print(utils.yellow_print("Printing model with seed %d" % args.seed))
        model_t_pretrained, _ = mtp_utils.train_clean_model(ds, args)
        models_t_pretrained.append(model_t_pretrained)

    # Report performance of clean model
    batch_size = len(ds.train)

    train_loader, test_loader = ds.get_loaders(batch_size)
    clean_accs, clean_total_losses = [], []
    population_accs, non_population_accs = [], []
    for model_t_pretrained in models_t_pretrained:
        clean_acc, clean_total_loss = dnn_utils.get_model_metrics(
            model_t_pretrained, test_loader, lossfn=args.loss)
        clean_accs.append(clean_acc)
        clean_total_losses.append(clean_total_loss)

        (population_acc,
         _), (non_population_acc,
              _) = dnn_utils.get_model_metrics(model_t_pretrained,
                                               test_loader,
                                               lossfn=args.loss,
                                               target_prop=args.poison_class)
        population_accs.append(population_acc)
        non_population_accs.append(non_population_acc)

    print(
        utils.yellow_print("[Clean-model][Test] Total Acc: %.4f" %
                           np.mean(clean_accs)))
    print(
        utils.yellow_print("[Clean-model] Loss on train: %.4f" %
                           np.mean(clean_total_losses)))
    print(
        utils.red_print("[Clean-model][Test] Population Acc: %.4f" %
                        np.mean(population_accs)))
    print(
        utils.red_print("[Clean-model][Test] Non-Population Acc: %.4f" %
                        np.mean(non_population_accs)))
    print()

    # Line 2: Iterate until stopping criteria met
    best_loss = np.inf
    num_iters = 0
    condition = True
    while condition:

        if len(D_p[0]) > 0:
            # Line 3: theta_t = train(D_c U D_p)
            print(
                utils.yellow_print("[Training model on Dc U Dp "
                                   "(on %d samples)]" % len(ds.train)))

            # Get loader for D_c U D_p
            batch_size = len(ds.train)
            data_loader, _ = ds.get_loaders(batch_size)

            # Increase numer of iterations theta_t is trained for
            # as size of its training set |D_c U D_p| increases
            iters = args.iters
            if args.increase_iters:
                iters += int((len(ds.train) - len(ds_clean.train)) /
                             args.increase_every)

            # Construct model
            models_t = []
            for seed in args.seeds:
                args.seed = seed
                model_t = dnn_utils.get_seeded_wrapped_model(
                    args, n_classes=ds.n_classes)
                # Set model to training mode
                model_t.train()

                # Define optimizer
                optim = ch.optim.Adam(model_t.parameters(),
                                      lr=args.pretrain_lr,
                                      weight_decay=args.pretrain_weight_decay)

                # Train model
                print(
                    utils.yellow_print("Printing model with seed %d" %
                                       args.seed))
                for e in range(iters):
                    # Train epoch
                    dnn_utils.epoch(model=model_t,
                                    loader=data_loader,
                                    optimizer=optim,
                                    epoch_num=e + 1,
                                    c_rule=None,
                                    n_classes=None,
                                    verbose=True,
                                    lossfn=args.loss)

                models_t.append(model_t)
        else:
            models_t = models_t_pretrained

        # Make sure theta_t are in eval mode
        for model_t in models_t:
            model_t.eval()

        # Line 4: Compute (x*, y*)
        (x_opt, y_opt), best_loss = mtp_utils.lookup_based_optimal(
            theta_t=models_t,
            theta_p=models_p,
            loader=loader_optim,
            n_classes=ds_second.n_classes,
            random=args.random,
            lossfn=args.loss,
            filter=args.filter,
            verbose=True,
            ensemble_t=True,
            ensemble_p=True,
            pick_optimal=args.pick_optimal)

        # Log some information about x*, y*
        with ch.no_grad():
            preds_p = [
                str(model_p(x_opt).argmax(1).item()) for model_p in models_p
            ]
            preds_t = [
                str(model_t(x_opt).argmax(1).item()) for model_t in models_t
            ]
        print(
            utils.cyan_print("Loss: %.3f Mt(x*): %s, Mp(x*): %s, y*: %d" %
                             (best_loss.item(), ",".join(preds_t),
                              ",".join(preds_p), y_opt)))

        # Line 5: Add (x*, y*) to D_p
        for _ in range(args.n_copies):
            D_p[0].append(x_opt.cpu())
            D_p[1].append(y_opt.cpu())
            ds.add_point_to_train(x_opt.cpu(), y_opt.cpu())
        print()

        # Calculate useful statistics
        tst_sub_accs, tst_nsub_accs = [], []
        trn_sub_accs, trn_nsub_accs = [], []
        for model_t in models_t:
            (tst_sub_acc, _), (tst_nsub_acc, _) = dnn_utils.get_model_metrics(
                model=model_t,
                loader=test_loader,
                target_prop=args.poison_class,
                lossfn=args.loss)
            tst_sub_accs.append(tst_sub_acc)
            tst_nsub_accs.append(tst_nsub_acc)

            (trn_sub_acc, _), (trn_nsub_acc, _) = dnn_utils.get_model_metrics(
                model=model_t,
                loader=train_loader,
                target_prop=args.poison_class,
                lossfn=args.loss)
            trn_sub_accs.append(trn_sub_acc)
            trn_nsub_accs.append(trn_nsub_acc)

        # Get mean of these metrics
        trn_sub_acc = np.mean(trn_sub_accs)
        tst_sub_acc = np.mean(tst_sub_accs)
        trn_nsub_acc = np.mean(trn_nsub_accs)
        tst_nsub_acc = np.mean(tst_nsub_accs)

        # Log information
        mtp_utils.log_information(logger=logger,
                                  best_loss=best_loss,
                                  x_opt=x_opt,
                                  norm_diffs=None,
                                  trn_sub_acc=trn_sub_acc,
                                  trn_nsub_acc=trn_nsub_acc,
                                  tst_sub_acc=tst_sub_acc,
                                  tst_nsub_acc=tst_nsub_acc,
                                  num_iters=num_iters + 1,
                                  args=args,
                                  label=y_opt)

        # Line 6: Get ready to check condition
        condition = stop_cond(args=args,
                              best_loss=best_loss,
                              num_iters=num_iters,
                              tst_sub_acc=tst_sub_acc)

        # Keep track of no. of iterations
        num_iters += 1

    # Line 7: Return poison data
    return D_p, models_t