Exemple #1
0
    def train(
        self, train_dataset, output_dir, show_running_loss=True, eval_data=None, verbose=True, **kwargs,
    ):
        """
        Trains the model on train_dataset.

        Utility function to be used by the train_model() method. Not intended to be used directly.
        """

        model = self.model
        args = self.args

        tb_writer = SummaryWriter(logdir=args["tensorboard_dir"])
        train_sampler = RandomSampler(train_dataset)
        train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args["train_batch_size"])

        if args["max_steps"] > 0:
            t_total = args["max_steps"]
            args["num_train_epochs"] = (
                args["max_steps"] // (len(train_dataloader) // args["gradient_accumulation_steps"]) + 1
            )
        else:
            t_total = len(train_dataloader) // args["gradient_accumulation_steps"] * args["num_train_epochs"]

        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": args["weight_decay"],
            },
            {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)]},
        ]

        warmup_steps = math.ceil(t_total * args["warmup_ratio"])
        args["warmup_steps"] = warmup_steps if args["warmup_steps"] == 0 else args["warmup_steps"]

        # TODO: Use custom optimizer like with BertSum?
        optimizer = AdamW(optimizer_grouped_parameters, lr=args["learning_rate"], eps=args["adam_epsilon"])
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=args["warmup_steps"], num_training_steps=t_total
        )

        if (
            args["model_name"]
            and os.path.isfile(os.path.join(args["model_name"], "optimizer.pt"))
            and os.path.isfile(os.path.join(args["model_name"], "scheduler.pt"))
        ):
            # Load in optimizer and scheduler states
            optimizer.load_state_dict(torch.load(os.path.join(args["model_name"], "optimizer.pt")))
            scheduler.load_state_dict(torch.load(os.path.join(args["model_name"], "scheduler.pt")))

        if args["fp16"]:
            try:
                from apex import amp
            except ImportError:
                raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")

            model, optimizer = amp.initialize(model, optimizer, opt_level=args["fp16_opt_level"])

        if args["n_gpu"] > 1:
            model = torch.nn.DataParallel(model)

        logger.info(" Training started")

        global_step = 0
        tr_loss, logging_loss = 0.0, 0.0
        model.zero_grad()
        train_iterator = trange(int(args["num_train_epochs"]), desc="Epoch", disable=args["silent"], mininterval=0)
        epoch_number = 0
        best_eval_metric = None
        early_stopping_counter = 0
        steps_trained_in_current_epoch = 0
        epochs_trained = 0

        if args["model_name"] and os.path.exists(args["model_name"]):
            try:
                # set global_step to gobal_step of last saved checkpoint from model path
                checkpoint_suffix = args["model_name"].split("/")[-1].split("-")
                if len(checkpoint_suffix) > 2:
                    checkpoint_suffix = checkpoint_suffix[1]
                else:
                    checkpoint_suffix = checkpoint_suffix[-1]
                global_step = int(checkpoint_suffix)
                epochs_trained = global_step // (len(train_dataloader) // args["gradient_accumulation_steps"])
                steps_trained_in_current_epoch = global_step % (
                    len(train_dataloader) // args["gradient_accumulation_steps"]
                )

                logger.info("   Continuing training from checkpoint, will skip to saved global_step")
                logger.info("   Continuing training from epoch %d", epochs_trained)
                logger.info("   Continuing training from global step %d", global_step)
                logger.info("   Will skip the first %d steps in the current epoch", steps_trained_in_current_epoch)
            except ValueError:
                logger.info("   Starting fine-tuning.")

        if args["evaluate_during_training"]:
            training_progress_scores = self._create_training_progress_scores(**kwargs)

        if args["wandb_project"]:
            wandb.init(project=args["wandb_project"], config={**args}, **args["wandb_kwargs"])
            wandb.watch(self.model)

        model.train()
        for current_epoch in train_iterator:
            if epochs_trained > 0:
                epochs_trained -= 1
                continue
            # epoch_iterator = tqdm(train_dataloader, desc="Iteration")
            for step, batch in enumerate(tqdm(train_dataloader, desc="Current iteration", disable=args["silent"])):
                if steps_trained_in_current_epoch > 0:
                    steps_trained_in_current_epoch -= 1
                    continue
                # batch = tuple(t.to(device) for t in batch)

                inputs = self._get_inputs_dict(batch)
                outputs = model(**inputs)
                # model outputs are always tuple in pytorch-transformers (see doc)
                loss = outputs[0]

                if args["n_gpu"] > 1:
                    loss = loss.mean()  # mean() to average on multi-gpu parallel training

                current_loss = loss.item()

                if show_running_loss:
                    print("\rRunning loss: %f" % loss, end="")

                if args["gradient_accumulation_steps"] > 1:
                    loss = loss / args["gradient_accumulation_steps"]

                if args["fp16"]:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                    # torch.nn.utils.clip_grad_norm_(
                    #     amp.master_params(optimizer), args["max_grad_norm"]
                    # )
                else:
                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(
                    #     model.parameters(), args["max_grad_norm"]
                    # )

                tr_loss += loss.item()
                if (step + 1) % args["gradient_accumulation_steps"] == 0:
                    if args["fp16"]:
                        torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args["max_grad_norm"])
                    else:
                        torch.nn.utils.clip_grad_norm_(model.parameters(), args["max_grad_norm"])

                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    model.zero_grad()
                    global_step += 1

                    if args["logging_steps"] > 0 and global_step % args["logging_steps"] == 0:
                        # Log metrics
                        tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
                        tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args["logging_steps"], global_step)
                        logging_loss = tr_loss
                        if args["wandb_project"]:
                            wandb.log(
                                {
                                    "Training loss": current_loss,
                                    "lr": scheduler.get_lr()[0],
                                    "global_step": global_step,
                                }
                            )

                    if args["save_steps"] > 0 and global_step % args["save_steps"] == 0:
                        # Save model checkpoint
                        output_dir_current = os.path.join(output_dir, "checkpoint-{}".format(global_step))

                        self._save_model(output_dir_current, optimizer, scheduler, model=model)

                    if args["evaluate_during_training"] and (
                        args["evaluate_during_training_steps"] > 0
                        and global_step % args["evaluate_during_training_steps"] == 0
                    ):
                        # Only evaluate when single GPU otherwise metrics may not average well
                        results = self.eval_model(
                            eval_data,
                            verbose=verbose and args["evaluate_during_training_verbose"],
                            silent=True,
                            **kwargs,
                        )
                        for key, value in results.items():
                            tb_writer.add_scalar("eval_{}".format(key), value, global_step)

                        output_dir_current = os.path.join(output_dir, "checkpoint-{}".format(global_step))

                        if args["save_eval_checkpoints"]:
                            self._save_model(output_dir_current, optimizer, scheduler, model=model, results=results)

                        training_progress_scores["global_step"].append(global_step)
                        training_progress_scores["train_loss"].append(current_loss)
                        for key in results:
                            training_progress_scores[key].append(results[key])
                        report = pd.DataFrame(training_progress_scores)
                        report.to_csv(
                            os.path.join(args["output_dir"], "training_progress_scores.csv"), index=False,
                        )

                        if args["wandb_project"]:
                            wandb.log(self._get_last_metrics(training_progress_scores))

                        if not best_eval_metric:
                            best_eval_metric = results[args["early_stopping_metric"]]
                            if args["save_best_model"]:
                                self._save_model(
                                    args["best_model_dir"], optimizer, scheduler, model=model, results=results
                                )
                        if best_eval_metric and args["early_stopping_metric_minimize"]:
                            if (
                                results[args["early_stopping_metric"]] - best_eval_metric
                                < args["early_stopping_delta"]
                            ):
                                best_eval_metric = results[args["early_stopping_metric"]]
                                if args["save_best_model"]:
                                    self._save_model(
                                        args["best_model_dir"], optimizer, scheduler, model=model, results=results
                                    )
                                early_stopping_counter = 0
                            else:
                                if args["use_early_stopping"]:
                                    if early_stopping_counter < args["early_stopping_patience"]:
                                        early_stopping_counter += 1
                                        if verbose:
                                            logger.info(f" No improvement in {args['early_stopping_metric']}")
                                            logger.info(f" Current step: {early_stopping_counter}")
                                            logger.info(f" Early stopping patience: {args['early_stopping_patience']}")
                                    else:
                                        if verbose:
                                            logger.info(
                                                f" Patience of {args['early_stopping_patience']} steps reached"
                                            )
                                            logger.info(" Training terminated.")
                                            train_iterator.close()
                                        return global_step, tr_loss / global_step
                        else:
                            if (
                                results[args["early_stopping_metric"]] - best_eval_metric
                                > args["early_stopping_delta"]
                            ):
                                best_eval_metric = results[args["early_stopping_metric"]]
                                if args["save_best_model"]:
                                    self._save_model(
                                        args["best_model_dir"], optimizer, scheduler, model=model, results=results
                                    )
                                early_stopping_counter = 0
                            else:
                                if args["use_early_stopping"]:
                                    if early_stopping_counter < args["early_stopping_patience"]:
                                        early_stopping_counter += 1
                                        if verbose:
                                            logger.info(f" No improvement in {args['early_stopping_metric']}")
                                            logger.info(f" Current step: {early_stopping_counter}")
                                            logger.info(f" Early stopping patience: {args['early_stopping_patience']}")
                                    else:
                                        if verbose:
                                            logger.info(
                                                f" Patience of {args['early_stopping_patience']} steps reached"
                                            )
                                            logger.info(" Training terminated.")
                                            train_iterator.close()
                                        return global_step, tr_loss / global_step

            epoch_number += 1
            output_dir_current = os.path.join(output_dir, "checkpoint-{}-epoch-{}".format(global_step, epoch_number))

            if args["save_model_every_epoch"] or args["evaluate_during_training"]:
                os.makedirs(output_dir_current, exist_ok=True)

            if args["save_model_every_epoch"]:
                self._save_model(output_dir_current, optimizer, scheduler, model=model)

            if args["evaluate_during_training"]:
                results = self.eval_model(
                    eval_data, verbose=verbose and args["evaluate_during_training_verbose"], silent=True, **kwargs
                )

                if args["save_eval_checkpoints"]:
                    self._save_model(output_dir_current, optimizer, scheduler, results=results)

                training_progress_scores["global_step"].append(global_step)
                training_progress_scores["train_loss"].append(current_loss)
                for key in results:
                    training_progress_scores[key].append(results[key])
                report = pd.DataFrame(training_progress_scores)
                report.to_csv(os.path.join(args["output_dir"], "training_progress_scores.csv"), index=False)

                if args["wandb_project"]:
                    wandb.log(self._get_last_metrics(training_progress_scores))

                if not best_eval_metric:
                    best_eval_metric = results[args["early_stopping_metric"]]
                    if args["save_best_model"]:
                        self._save_model(args["best_model_dir"], optimizer, scheduler, model=model, results=results)
                if best_eval_metric and args["early_stopping_metric_minimize"]:
                    if results[args["early_stopping_metric"]] - best_eval_metric < args["early_stopping_delta"]:
                        best_eval_metric = results[args["early_stopping_metric"]]
                        if args["save_best_model"]:
                            self._save_model(
                                args["best_model_dir"], optimizer, scheduler, model=model, results=results
                            )
                        early_stopping_counter = 0
                    else:
                        if args["use_early_stopping"] and args["early_stopping_consider_epochs"]:
                            if early_stopping_counter < args["early_stopping_patience"]:
                                early_stopping_counter += 1
                                if verbose:
                                    logger.info(f" No improvement in {args['early_stopping_metric']}")
                                    logger.info(f" Current step: {early_stopping_counter}")
                                    logger.info(f" Early stopping patience: {args['early_stopping_patience']}")
                            else:
                                if verbose:
                                    logger.info(f" Patience of {args['early_stopping_patience']} steps reached")
                                    logger.info(" Training terminated.")
                                    train_iterator.close()
                                return global_step, tr_loss / global_step
                else:
                    if results[args["early_stopping_metric"]] - best_eval_metric > args["early_stopping_delta"]:
                        best_eval_metric = results[args["early_stopping_metric"]]
                        if args["save_best_model"]:
                            self._save_model(
                                args["best_model_dir"], optimizer, scheduler, model=model, results=results
                            )
                        early_stopping_counter = 0
                    else:
                        if args["use_early_stopping"] and args["early_stopping_consider_epochs"]:
                            if early_stopping_counter < args["early_stopping_patience"]:
                                early_stopping_counter += 1
                                if verbose:
                                    logger.info(f" No improvement in {args['early_stopping_metric']}")
                                    logger.info(f" Current step: {early_stopping_counter}")
                                    logger.info(f" Early stopping patience: {args['early_stopping_patience']}")
                            else:
                                if verbose:
                                    logger.info(f" Patience of {args['early_stopping_patience']} steps reached")
                                    logger.info(" Training terminated.")
                                    train_iterator.close()
                                return global_step, tr_loss / global_step

        return global_step, tr_loss / global_step
Exemple #2
0
    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,). If `self.targeted` is true, then `y` represents the target labels. If `self.targeted`
                  is true, then `y_val` represents the target labels. Otherwise, the targets are the original class
                  labels.
        :return: An array holding the adversarial examples.
        """
        y = check_and_transform_label_format(y, self.estimator.nb_classes)
        x_adv = x.astype(ART_NUMPY_DTYPE)

        if self.estimator.clip_values is not None:
            clip_min, clip_max = self.estimator.clip_values
        else:
            clip_min, clip_max = np.amin(x), np.amax(x)

        # Assert that, if attack is targeted, y_val is provided:
        if self.targeted and y is None:
            raise ValueError("Target labels `y` need to be provided for a targeted attack.")

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(self.estimator.predict(x, batch_size=self.batch_size))

        # Compute perturbation with implicit batching
        nb_batches = int(np.ceil(x_adv.shape[0] / float(self.batch_size)))
        for batch_id in trange(nb_batches, desc="C&W L_2", disable=not self.verbose):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
            x_batch = x_adv[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]

            # The optimization is performed in tanh space to keep the adversarial images bounded in correct range
            x_batch_tanh = original_to_tanh(x_batch, clip_min, clip_max, self._tanh_smoother)

            # Initialize binary search:
            c_current = self.initial_const * np.ones(x_batch.shape[0])
            c_lower_bound = np.zeros(x_batch.shape[0])
            c_double = np.ones(x_batch.shape[0]) > 0

            # Initialize placeholders for best l2 distance and attack found so far
            best_l2dist = np.inf * np.ones(x_batch.shape[0])
            best_x_adv_batch = x_batch.copy()

            for bss in range(self.binary_search_steps):
                logger.debug(
                    "Binary search step %i out of %i (c_mean==%f)", bss, self.binary_search_steps, np.mean(c_current),
                )
                nb_active = int(np.sum(c_current < self._c_upper_bound))
                logger.debug(
                    "Number of samples with c_current < _c_upper_bound: %i out of %i", nb_active, x_batch.shape[0],
                )
                if nb_active == 0:
                    break
                learning_rate = self.learning_rate * np.ones(x_batch.shape[0])

                # Initialize perturbation in tanh space:
                x_adv_batch = x_batch.copy()
                x_adv_batch_tanh = x_batch_tanh.copy()

                z_logits, l2dist, loss = self._loss(x_batch, x_adv_batch, y_batch, c_current)
                attack_success = loss - l2dist <= 0
                overall_attack_success = attack_success

                for i_iter in range(self.max_iter):
                    logger.debug("Iteration step %i out of %i", i_iter, self.max_iter)
                    logger.debug("Average Loss: %f", np.mean(loss))
                    logger.debug("Average L2Dist: %f", np.mean(l2dist))
                    logger.debug("Average Margin Loss: %f", np.mean(loss - l2dist))
                    logger.debug(
                        "Current number of succeeded attacks: %i out of %i",
                        int(np.sum(attack_success)),
                        len(attack_success),
                    )

                    improved_adv = attack_success & (l2dist < best_l2dist)
                    logger.debug("Number of improved L2 distances: %i", int(np.sum(improved_adv)))
                    if np.sum(improved_adv) > 0:
                        best_l2dist[improved_adv] = l2dist[improved_adv]
                        best_x_adv_batch[improved_adv] = x_adv_batch[improved_adv]

                    active = (c_current < self._c_upper_bound) & (learning_rate > 0)
                    nb_active = int(np.sum(active))
                    logger.debug(
                        "Number of samples with c_current < _c_upper_bound and learning_rate > 0: %i out of %i",
                        nb_active,
                        x_batch.shape[0],
                    )
                    if nb_active == 0:
                        break

                    # compute gradient:
                    logger.debug("Compute loss gradient")
                    perturbation_tanh = -self._loss_gradient(
                        z_logits[active],
                        y_batch[active],
                        x_batch[active],
                        x_adv_batch[active],
                        x_adv_batch_tanh[active],
                        c_current[active],
                        clip_min,
                        clip_max,
                    )

                    # perform line search to optimize perturbation
                    # first, halve the learning rate until perturbation actually decreases the loss:
                    prev_loss = loss.copy()
                    best_loss = loss.copy()
                    best_lr = np.zeros(x_batch.shape[0])
                    halving = np.zeros(x_batch.shape[0])

                    for i_halve in range(self.max_halving):
                        logger.debug(
                            "Perform halving iteration %i out of %i", i_halve, self.max_halving,
                        )
                        do_halving = loss[active] >= prev_loss[active]
                        logger.debug(
                            "Halving to be performed on %i samples", int(np.sum(do_halving)),
                        )
                        if np.sum(do_halving) == 0:
                            break
                        active_and_do_halving = active.copy()
                        active_and_do_halving[active] = do_halving

                        lr_mult = learning_rate[active_and_do_halving]
                        for _ in range(len(x.shape) - 1):
                            lr_mult = lr_mult[:, np.newaxis]

                        x_adv1 = x_adv_batch_tanh[active_and_do_halving]
                        new_x_adv_batch_tanh = x_adv1 + lr_mult * perturbation_tanh[do_halving]
                        new_x_adv_batch = tanh_to_original(new_x_adv_batch_tanh, clip_min, clip_max)
                        _, l2dist[active_and_do_halving], loss[active_and_do_halving] = self._loss(
                            x_batch[active_and_do_halving],
                            new_x_adv_batch,
                            y_batch[active_and_do_halving],
                            c_current[active_and_do_halving],
                        )

                        logger.debug("New Average Loss: %f", np.mean(loss))
                        logger.debug("New Average L2Dist: %f", np.mean(l2dist))
                        logger.debug("New Average Margin Loss: %f", np.mean(loss - l2dist))

                        best_lr[loss < best_loss] = learning_rate[loss < best_loss]
                        best_loss[loss < best_loss] = loss[loss < best_loss]
                        learning_rate[active_and_do_halving] /= 2
                        halving[active_and_do_halving] += 1
                    learning_rate[active] *= 2

                    # if no halving was actually required, double the learning rate as long as this
                    # decreases the loss:
                    for i_double in range(self.max_doubling):
                        logger.debug(
                            "Perform doubling iteration %i out of %i", i_double, self.max_doubling,
                        )
                        do_doubling = (halving[active] == 1) & (loss[active] <= best_loss[active])
                        logger.debug(
                            "Doubling to be performed on %i samples", int(np.sum(do_doubling)),
                        )
                        if np.sum(do_doubling) == 0:
                            break
                        active_and_do_doubling = active.copy()
                        active_and_do_doubling[active] = do_doubling
                        learning_rate[active_and_do_doubling] *= 2

                        lr_mult = learning_rate[active_and_do_doubling]
                        for _ in range(len(x.shape) - 1):
                            lr_mult = lr_mult[:, np.newaxis]

                        x_adv2 = x_adv_batch_tanh[active_and_do_doubling]
                        new_x_adv_batch_tanh = x_adv2 + lr_mult * perturbation_tanh[do_doubling]
                        new_x_adv_batch = tanh_to_original(new_x_adv_batch_tanh, clip_min, clip_max)
                        _, l2dist[active_and_do_doubling], loss[active_and_do_doubling] = self._loss(
                            x_batch[active_and_do_doubling],
                            new_x_adv_batch,
                            y_batch[active_and_do_doubling],
                            c_current[active_and_do_doubling],
                        )
                        logger.debug("New Average Loss: %f", np.mean(loss))
                        logger.debug("New Average L2Dist: %f", np.mean(l2dist))
                        logger.debug("New Average Margin Loss: %f", np.mean(loss - l2dist))
                        best_lr[loss < best_loss] = learning_rate[loss < best_loss]
                        best_loss[loss < best_loss] = loss[loss < best_loss]

                    learning_rate[halving == 1] /= 2

                    update_adv = best_lr[active] > 0
                    logger.debug(
                        "Number of adversarial samples to be finally updated: %i", int(np.sum(update_adv)),
                    )

                    if np.sum(update_adv) > 0:
                        active_and_update_adv = active.copy()
                        active_and_update_adv[active] = update_adv
                        best_lr_mult = best_lr[active_and_update_adv]
                        for _ in range(len(x.shape) - 1):
                            best_lr_mult = best_lr_mult[:, np.newaxis]

                        x_adv4 = x_adv_batch_tanh[active_and_update_adv]
                        best_lr1 = best_lr_mult * perturbation_tanh[update_adv]
                        x_adv_batch_tanh[active_and_update_adv] = x_adv4 + best_lr1

                        x_adv6 = x_adv_batch_tanh[active_and_update_adv]
                        x_adv_batch[active_and_update_adv] = tanh_to_original(x_adv6, clip_min, clip_max)
                        (
                            z_logits[active_and_update_adv],
                            l2dist[active_and_update_adv],
                            loss[active_and_update_adv],
                        ) = self._loss(
                            x_batch[active_and_update_adv],
                            x_adv_batch[active_and_update_adv],
                            y_batch[active_and_update_adv],
                            c_current[active_and_update_adv],
                        )
                        attack_success = loss - l2dist <= 0
                        overall_attack_success = overall_attack_success | attack_success

                # Update depending on attack success:
                improved_adv = attack_success & (l2dist < best_l2dist)
                logger.debug("Number of improved L2 distances: %i", int(np.sum(improved_adv)))

                if np.sum(improved_adv) > 0:
                    best_l2dist[improved_adv] = l2dist[improved_adv]
                    best_x_adv_batch[improved_adv] = x_adv_batch[improved_adv]

                c_double[overall_attack_success] = False
                c_current[overall_attack_success] = (c_lower_bound + c_current)[overall_attack_success] / 2

                c_old = c_current
                c_current[~overall_attack_success & c_double] *= 2

                c_current1 = (c_current - c_lower_bound)[~overall_attack_success & ~c_double]
                c_current[~overall_attack_success & ~c_double] += c_current1 / 2
                c_lower_bound[~overall_attack_success] = c_old[~overall_attack_success]

            x_adv[batch_index_1:batch_index_2] = best_x_adv_batch

        logger.info(
            "Success rate of C&W L_2 attack: %.2f%%",
            100 * compute_success(self.estimator, x, y, x_adv, self.targeted, batch_size=self.batch_size),
        )

        return x_adv
Exemple #3
0
    def train(self, model_path: Optional[str] = None):
        """
        Main training entry point.

        Args:
            model_path:
                (Optional) Local path to model if model to train has been instantiated from a local path
                If present, we will try reloading the optimizer/scheduler states from there.
        """
        train_dataloader = self.get_train_dataloader()
        if self.args.max_steps > 0:
            t_total = self.args.max_steps
            num_train_epochs = (
                self.args.max_steps // (len(train_dataloader) // self.args.gradient_accumulation_steps) + 1
            )
        else:
            t_total = int(len(train_dataloader) // self.args.gradient_accumulation_steps * self.args.num_train_epochs)
            num_train_epochs = self.args.num_train_epochs

        optimizer, scheduler = self.get_optimizers(num_training_steps=t_total)

        # Check if saved optimizer or scheduler states exist
        if (
            model_path is not None
            and os.path.isfile(os.path.join(model_path, "optimizer.pt"))
            and os.path.isfile(os.path.join(model_path, "scheduler.pt"))
        ):
            # Load in optimizer and scheduler states
            optimizer.load_state_dict(
                torch.load(os.path.join(model_path, "optimizer.pt"), map_location=self.args.device)
            )
            scheduler.load_state_dict(torch.load(os.path.join(model_path, "scheduler.pt")))

        model = self.model
        if self.args.fp16:
            if not is_apex_available():
                raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
            model, optimizer = amp.initialize(model, optimizer, opt_level=self.args.fp16_opt_level)

        # multi-gpu training (should be after apex fp16 initialization)
        if self.args.n_gpu > 1:
            model = torch.nn.DataParallel(model)

        # Distributed training (should be after apex fp16 initialization)
        if self.args.local_rank != -1:
            model = torch.nn.parallel.DistributedDataParallel(
                model,
                device_ids=[self.args.local_rank],
                output_device=self.args.local_rank,
                find_unused_parameters=True,
            )

        if self.tb_writer is not None:
            self.tb_writer.add_text("args", self.args.to_json_string())
            self.tb_writer.add_hparams(self.args.to_sanitized_dict(), metric_dict={})

        # Train!
        if is_tpu_available():
            total_train_batch_size = self.args.train_batch_size * xm.xrt_world_size()
        else:
            total_train_batch_size = (
                self.args.train_batch_size
                * self.args.gradient_accumulation_steps
                * (torch.distributed.get_world_size() if self.args.local_rank != -1 else 1)
            )
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", self.num_examples(train_dataloader))
        logger.info("  Num Epochs = %d", num_train_epochs)
        logger.info("  Instantaneous batch size per device = %d", self.args.per_gpu_train_batch_size)
        logger.info("  Total train batch size (w. parallel, distributed & accumulation) = %d", total_train_batch_size)
        logger.info("  Gradient Accumulation steps = %d", self.args.gradient_accumulation_steps)
        logger.info("  Total optimization steps = %d", t_total)

        self.global_step = 0
        self.epoch = 0
        epochs_trained = 0
        steps_trained_in_current_epoch = 0
        # Check if continuing training from a checkpoint
        if model_path is not None:
            # set global_step to global_step of last saved checkpoint from model path
            try:
                self.global_step = int(model_path.split("-")[-1].split("/")[0])
                epochs_trained = self.global_step // (len(train_dataloader) // self.args.gradient_accumulation_steps)
                steps_trained_in_current_epoch = self.global_step % (
                    len(train_dataloader) // self.args.gradient_accumulation_steps
                )

                logger.info("  Continuing training from checkpoint, will skip to saved global_step")
                logger.info("  Continuing training from epoch %d", epochs_trained)
                logger.info("  Continuing training from global step %d", self.global_step)
                logger.info("  Will skip the first %d steps in the first epoch", steps_trained_in_current_epoch)
            except ValueError:
                self.global_step = 0
                logger.info("  Starting fine-tuning.")

        tr_loss = 0.0
        logging_loss = 0.0
        model.zero_grad()
        train_iterator = trange(
            epochs_trained, int(num_train_epochs), desc="Epoch", disable=not self.is_local_master()
        )

        self.eval_history = []
        for epoch in train_iterator:
            if isinstance(train_dataloader, DataLoader) and isinstance(train_dataloader.sampler, DistributedSampler):
                train_dataloader.sampler.set_epoch(epoch)

            epoch_iterator = tqdm(train_dataloader, desc=f"Epoch-{epoch}", disable=not self.is_local_master())
            for step, inputs in enumerate(epoch_iterator):

                # Skip past any already trained steps if resuming training
                if steps_trained_in_current_epoch > 0:
                    steps_trained_in_current_epoch -= 1
                    continue

                if self.args.do_aug:
                    if self.args.aug_type == 'span_cutoff':
                        step_loss = self._training_step_with_span_cutoff(model, inputs, optimizer)
                    elif self.args.aug_type == 'token_cutoff':
                        step_loss = self._training_step_with_token_cutoff(model, inputs, optimizer)
                    elif self.args.aug_type == 'dim_cutoff':
                        step_loss = self._training_step_with_dim_cutoff(model, inputs, optimizer)
                    else:
                        raise NotImplementedError
                else:
                    step_loss = self._training_step(model, inputs, optimizer)

                tr_loss += step_loss

                if (step + 1) % self.args.gradient_accumulation_steps == 0 or (
                    # last step in epoch but step is always smaller than gradient_accumulation_steps
                    len(epoch_iterator) <= self.args.gradient_accumulation_steps
                    and (step + 1) == len(epoch_iterator)
                ):
                    if self.args.max_grad_norm > 0:
                        if self.args.fp16:
                            torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), self.args.max_grad_norm)
                        else:
                            torch.nn.utils.clip_grad_norm_(model.parameters(), self.args.max_grad_norm)

                    if is_tpu_available():
                        xm.optimizer_step(optimizer)
                    else:
                        optimizer.step()

                    scheduler.step()
                    model.zero_grad()
                    self.global_step += 1
                    self.epoch = epoch + (step + 1) / len(epoch_iterator)

                    if (self.args.logging_steps > 0 and self.global_step % self.args.logging_steps == 0) or (
                        self.global_step == 1 and self.args.logging_first_step
                    ):
                        logs: Dict[str, float] = {}
                        logs["loss"] = (tr_loss - logging_loss) / self.args.logging_steps
                        # backward compatibility for pytorch schedulers
                        logs["learning_rate"] = (
                            scheduler.get_last_lr()[0]
                            if version.parse(torch.__version__) >= version.parse("1.4")
                            else scheduler.get_lr()[0]
                        )
                        logging_loss = tr_loss

                        print()
                        self._log(logs)

                        # if self.args.evaluate_during_training and self.args.save_steps % self.args.logging_steps == 0:
                        #     self.evaluate()

                    if self.is_world_master() and self.args.evaluate_during_training and \
                            self.args.save_steps > 0 and self.global_step % self.args.save_steps == 0:
                        self.evaluate_and_save_model(model, optimizer, scheduler)

                if self.args.max_steps > 0 and self.global_step > self.args.max_steps:
                    epoch_iterator.close()
                    break
            if self.args.max_steps > 0 and self.global_step > self.args.max_steps:
                train_iterator.close()
                break

            if self.is_world_master() and self.args.evaluate_during_training:
                self.evaluate_and_save_model(model, optimizer, scheduler)

            if self.args.tpu_metrics_debug:
                # tpu-comment: Logging debug metrics for PyTorch/XLA (compile, execute times, ops, etc.)
                xm.master_print(met.metrics_report())

        if self.tb_writer:
            self.tb_writer.close()

        logger.info("\n\nTraining completed.\n\n")

        self.eval_history = sorted(self.eval_history, key=lambda x: x[0])
        for x in self.eval_history:
            del x[-1]
        report_results(self.eval_header, self.eval_history, axis=self.eval_key_axis)
        return TrainOutput(self.global_step, tr_loss / self.global_step)
    def train(self, model_path: Optional[str] = None):
        """
        Main training entry point.

        Args:
            model_path (:obj:`str`, `optional`):
                Local path to the model if the model to train has been instantiated from a local path. If present,
                training will resume from the optimizer/scheduler states loaded here.
        """
        train_dataloader = self.get_train_dataloader()
        if self.args.max_steps > 0:
            t_total = self.args.max_steps
            num_train_epochs = (
                self.args.max_steps // (len(train_dataloader) // self.args.gradient_accumulation_steps) + 1
            )
        else:
            t_total = int(len(train_dataloader) // self.args.gradient_accumulation_steps * self.args.num_train_epochs)
            num_train_epochs = self.args.num_train_epochs

        optimizer, scheduler = self.get_optimizers(num_training_steps=t_total)

        # Check if saved optimizer or scheduler states exist
        if (
            model_path is not None
            and os.path.isfile(os.path.join(model_path, "optimizer.pt"))
            and os.path.isfile(os.path.join(model_path, "scheduler.pt"))
        ):
            # Load in optimizer and scheduler states
            optimizer.load_state_dict(
                torch.load(os.path.join(model_path, "optimizer.pt"), map_location=self.args.device)
            )
            scheduler.load_state_dict(torch.load(os.path.join(model_path, "scheduler.pt")))

        model = self.model
        if self.args.fp16:
            if not is_apex_available():
                raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
            model, optimizer = amp.initialize(model, optimizer, opt_level=self.args.fp16_opt_level)

        # multi-gpu training (should be after apex fp16 initialization)
        if self.args.n_gpu > 1:
            model = torch.nn.DataParallel(model)

        # Distributed training (should be after apex fp16 initialization)
        if self.args.local_rank != -1:
            model = torch.nn.parallel.DistributedDataParallel(
                model,
                device_ids=[self.args.local_rank],
                output_device=self.args.local_rank,
                find_unused_parameters=True,
            )

        if self.tb_writer is not None:
            self.tb_writer.add_text("args", self.args.to_json_string())
            self.tb_writer.add_hparams(self.args.to_sanitized_dict(), metric_dict={})

        # Train!
        if is_torch_tpu_available():
            total_train_batch_size = self.args.train_batch_size * xm.xrt_world_size()
        else:
            total_train_batch_size = (
                self.args.train_batch_size
                * self.args.gradient_accumulation_steps
                * (torch.distributed.get_world_size() if self.args.local_rank != -1 else 1)
            )
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", self.num_examples(train_dataloader))
        logger.info("  Num Epochs = %d", num_train_epochs)
        logger.info("  Instantaneous batch size per device = %d", self.args.per_device_train_batch_size)
        logger.info("  Total train batch size (w. parallel, distributed & accumulation) = %d", total_train_batch_size)
        logger.info("  Gradient Accumulation steps = %d", self.args.gradient_accumulation_steps)
        logger.info("  Total optimization steps = %d", t_total)

        self.global_step = 0
        self.epoch = 0
        epochs_trained = 0
        steps_trained_in_current_epoch = 0
        # Check if continuing training from a checkpoint
        if model_path is not None:
            # set global_step to global_step of last saved checkpoint from model path
            try:
                self.global_step = int(model_path.split("-")[-1].split("/")[0])
                epochs_trained = self.global_step // (len(train_dataloader) // self.args.gradient_accumulation_steps)
                steps_trained_in_current_epoch = self.global_step % (
                    len(train_dataloader) // self.args.gradient_accumulation_steps
                )

                logger.info("  Continuing training from checkpoint, will skip to saved global_step")
                logger.info("  Continuing training from epoch %d", epochs_trained)
                logger.info("  Continuing training from global step %d", self.global_step)
                logger.info("  Will skip the first %d steps in the first epoch", steps_trained_in_current_epoch)
            except ValueError:
                self.global_step = 0
                logger.info("  Starting fine-tuning.")

        tr_loss = 0.0
        logging_loss = 0.0
        model.zero_grad()
        train_iterator = trange(
            epochs_trained, int(num_train_epochs), desc="Epoch", disable=not self.is_local_master()
        )
        for epoch in train_iterator:
            if isinstance(train_dataloader, DataLoader) and isinstance(train_dataloader.sampler, DistributedSampler):
                train_dataloader.sampler.set_epoch(epoch)

            if is_torch_tpu_available():
                parallel_loader = pl.ParallelLoader(train_dataloader, [self.args.device]).per_device_loader(
                    self.args.device
                )
                epoch_iterator = tqdm(parallel_loader, desc="Iteration", disable=not self.is_local_master())
            else:
                epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=not self.is_local_master())

            # Reset the past mems state at the beginning of each epoch if necessary.
            if self.args.past_index >= 0:
                self._past = None

            for step, inputs in enumerate(epoch_iterator):

                # Skip past any already trained steps if resuming training
                if steps_trained_in_current_epoch > 0:
                    steps_trained_in_current_epoch -= 1
                    continue

                tr_loss += self.training_step(model, inputs, optimizer)

                if (step + 1) % self.args.gradient_accumulation_steps == 0 or (
                    # last step in epoch but step is always smaller than gradient_accumulation_steps
                    len(epoch_iterator) <= self.args.gradient_accumulation_steps
                    and (step + 1) == len(epoch_iterator)
                ):
                    if self.args.fp16:
                        torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), self.args.max_grad_norm)
                    else:
                        torch.nn.utils.clip_grad_norm_(model.parameters(), self.args.max_grad_norm)

                    if is_torch_tpu_available():
                        xm.optimizer_step(optimizer)
                    else:
                        optimizer.step()

                    scheduler.step()
                    model.zero_grad()
                    self.global_step += 1
                    self.epoch = epoch + (step + 1) / len(epoch_iterator)

                    if (self.args.logging_steps > 0 and self.global_step % self.args.logging_steps == 0) or (
                        self.global_step == 1 and self.args.logging_first_step
                    ):
                        logs: Dict[str, float] = {}
                        logs["loss"] = (tr_loss - logging_loss) / self.args.logging_steps
                        # backward compatibility for pytorch schedulers
                        logs["learning_rate"] = (
                            scheduler.get_last_lr()[0]
                            if version.parse(torch.__version__) >= version.parse("1.4")
                            else scheduler.get_lr()[0]
                        )
                        logging_loss = tr_loss

                        self.log(logs)

                    if self.args.evaluate_during_training and self.global_step % self.args.eval_steps == 0:
                        self.evaluate()

                    if self.args.save_steps > 0 and self.global_step % self.args.save_steps == 0:
                        # In all cases (even distributed/parallel), self.model is always a reference
                        # to the model we want to save.
                        if hasattr(model, "module"):
                            assert model.module is self.model
                        else:
                            assert model is self.model
                        # Save model checkpoint
                        output_dir = os.path.join(self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.global_step}")

                        self.save_model(output_dir)

                        if self.is_world_master():
                            self._rotate_checkpoints()

                        if is_torch_tpu_available():
                            xm.rendezvous("saving_optimizer_states")
                            xm.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt"))
                            xm.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt"))
                        elif self.is_world_master():
                            torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt"))
                            torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt"))

                if self.args.max_steps > 0 and self.global_step > self.args.max_steps:
                    epoch_iterator.close()
                    break
            if self.args.max_steps > 0 and self.global_step > self.args.max_steps:
                train_iterator.close()
                break
            if self.args.tpu_metrics_debug or self.args.debug:
                # tpu-comment: Logging debug metrics for PyTorch/XLA (compile, execute times, ops, etc.)
                xm.master_print(met.metrics_report())

        if self.tb_writer:
            self.tb_writer.close()
        if self.args.past_index and hasattr(self, "_past"):
            # Clean the state at the end of training
            delattr(self, "_past")

        logger.info("\n\nTraining completed. Do not forget to share your model on huggingface.co/models =)\n\n")
        return TrainOutput(self.global_step, tr_loss / self.global_step)
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
        :param mask: An array with a mask broadcastable to input `x` defining where to apply adversarial perturbations.
                     Shape needs to be broadcastable to the shape of x and can also be of the same shape as `x`. Any
                     features for which the mask is zero will not be adversarially perturbed.
        :type mask: `np.ndarray`
        :return: An array holding the adversarial examples.
        """
        mask = kwargs.get("mask")

        y = check_and_transform_label_format(y, self.estimator.nb_classes)

        if y is None:
            if self.targeted:
                raise ValueError(
                    "Target labels `y` need to be provided for a targeted attack."
                )
            y = get_labels_np_array(
                self.estimator.predict(x, batch_size=self.batch_size)).astype(
                    np.int32)

        x_adv = x.astype(ART_NUMPY_DTYPE)

        for _ in trange(max(1, self.nb_random_init),
                        desc="AutoPGD - restart",
                        disable=not self.verbose):
            # Determine correctly predicted samples
            y_pred = self.estimator.predict(x_adv)
            if self.targeted:
                sample_is_robust = np.argmax(y_pred, axis=1) != np.argmax(
                    y, axis=1)
            elif not self.targeted:
                sample_is_robust = np.argmax(y_pred,
                                             axis=1) == np.argmax(y, axis=1)

            if np.sum(sample_is_robust) == 0:
                break

            x_robust = x_adv[sample_is_robust]
            y_robust = y[sample_is_robust]
            x_init = x[sample_is_robust]

            n = x_robust.shape[0]
            m = np.prod(x_robust.shape[1:]).item()
            random_perturbation = (random_sphere(
                n, m, self.eps,
                self.norm).reshape(x_robust.shape).astype(ART_NUMPY_DTYPE))

            x_robust = x_robust + random_perturbation

            if self.estimator.clip_values is not None:
                clip_min, clip_max = self.estimator.clip_values
                x_robust = np.clip(x_robust, clip_min, clip_max)

            perturbation = projection(x_robust - x_init, self.eps, self.norm)
            x_robust = x_init + perturbation

            # Compute perturbation with implicit batching
            for batch_id in trange(
                    int(np.ceil(x_robust.shape[0] / float(self.batch_size))),
                    desc="AutoPGD - batch",
                    leave=False,
                    disable=not self.verbose,
            ):
                self.eta = 2 * self.eps_step
                batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                    batch_id + 1) * self.batch_size
                x_k = x_robust[batch_index_1:batch_index_2].astype(
                    ART_NUMPY_DTYPE)
                x_init_batch = x_init[batch_index_1:batch_index_2].astype(
                    ART_NUMPY_DTYPE)
                y_batch = y_robust[batch_index_1:batch_index_2]

                p_0 = 0
                p_1 = 0.22
                W = [p_0, p_1]

                while True:
                    p_j_p_1 = W[-1] + max(W[-1] - W[-2] - 0.03, 0.06)
                    if p_j_p_1 > 1:
                        break
                    W.append(p_j_p_1)

                W = [math.ceil(p * self.max_iter) for p in W]

                eta = self.eps_step
                self.count_condition_1 = 0

                for k_iter in trange(self.max_iter,
                                     desc="AutoPGD - iteration",
                                     leave=False,
                                     disable=not self.verbose):

                    # Get perturbation, use small scalar to avoid division by 0
                    tol = 10e-8

                    # Get gradient wrt loss; invert it if attack is targeted
                    grad = self.estimator.loss_gradient(
                        x_k, y_batch) * (1 - 2 * int(self.targeted))

                    # Apply norm bound
                    if self.norm in [np.inf, "inf"]:
                        grad = np.sign(grad)
                    elif self.norm == 1:
                        ind = tuple(range(1, len(x_k.shape)))
                        grad = grad / (np.sum(
                            np.abs(grad), axis=ind, keepdims=True) + tol)
                    elif self.norm == 2:
                        ind = tuple(range(1, len(x_k.shape)))
                        grad = grad / (np.sqrt(
                            np.sum(np.square(grad), axis=ind, keepdims=True)) +
                                       tol)
                    assert x_k.shape == grad.shape

                    perturbation = grad

                    if mask is not None:
                        perturbation = perturbation * (
                            mask.astype(ART_NUMPY_DTYPE))

                    # Apply perturbation and clip
                    z_k_p_1 = x_k + eta * perturbation

                    if self.estimator.clip_values is not None:
                        clip_min, clip_max = self.estimator.clip_values
                        z_k_p_1 = np.clip(z_k_p_1, clip_min, clip_max)

                    if k_iter == 0:
                        x_1 = z_k_p_1
                        perturbation = projection(x_1 - x_init_batch, self.eps,
                                                  self.norm)
                        x_1 = x_init_batch + perturbation

                        f_0 = self.estimator.loss(x=x_k,
                                                  y=y_batch,
                                                  reduction="mean")
                        f_1 = self.estimator.loss(x=x_1,
                                                  y=y_batch,
                                                  reduction="mean")

                        self.eta_w_j_m_1 = eta
                        self.f_max_w_j_m_1 = f_0

                        if f_1 >= f_0:
                            self.f_max = f_1
                            self.x_max = x_1
                            self.x_max_m_1 = x_init_batch
                            self.count_condition_1 += 1
                        else:
                            self.f_max = f_0
                            self.x_max = x_k.copy()
                            self.x_max_m_1 = x_init_batch

                        # Settings for next iteration k
                        x_k_m_1 = x_k.copy()
                        x_k = x_1

                    else:
                        perturbation = projection(z_k_p_1 - x_init_batch,
                                                  self.eps, self.norm)
                        z_k_p_1 = x_init_batch + perturbation

                        alpha = 0.75

                        x_k_p_1 = x_k + alpha * (z_k_p_1 - x_k) + (
                            1 - alpha) * (x_k - x_k_m_1)

                        if self.estimator.clip_values is not None:
                            clip_min, clip_max = self.estimator.clip_values
                            x_k_p_1 = np.clip(x_k_p_1, clip_min, clip_max)

                        perturbation = projection(x_k_p_1 - x_init_batch,
                                                  self.eps, self.norm)
                        x_k_p_1 = x_init_batch + perturbation

                        f_k_p_1 = self.estimator.loss(x=x_k_p_1,
                                                      y=y_batch,
                                                      reduction="mean")

                        if f_k_p_1 == 0.0:
                            x_k = x_k_p_1.copy()
                            break

                        if (not self.targeted and f_k_p_1 > self.f_max) or (
                                self.targeted and f_k_p_1 < self.f_max):
                            self.count_condition_1 += 1
                            self.x_max = x_k_p_1
                            self.x_max_m_1 = x_k
                            self.f_max = f_k_p_1

                        if k_iter in W:

                            rho = 0.75

                            condition_1 = self.count_condition_1 < rho * (
                                k_iter - W[W.index(k_iter) - 1])
                            condition_2 = self.eta_w_j_m_1 == eta and self.f_max_w_j_m_1 == self.f_max

                            if condition_1 or condition_2:
                                eta = eta / 2
                                x_k_m_1 = self.x_max_m_1
                                x_k = self.x_max
                            else:
                                x_k_m_1 = x_k
                                x_k = x_k_p_1.copy()

                            self.count_condition_1 = 0
                            self.eta_w_j_m_1 = eta
                            self.f_max_w_j_m_1 = self.f_max

                        else:
                            x_k_m_1 = x_k
                            x_k = x_k_p_1.copy()

                y_pred_adv_k = self.estimator.predict(x_k)
                if self.targeted:
                    sample_is_not_robust_k = np.invert(
                        np.argmax(y_pred_adv_k, axis=1) != np.argmax(y_batch,
                                                                     axis=1))
                elif not self.targeted:
                    sample_is_not_robust_k = np.invert(
                        np.argmax(y_pred_adv_k, axis=1) == np.argmax(y_batch,
                                                                     axis=1))

                x_robust[batch_index_1:batch_index_2][
                    sample_is_not_robust_k] = x_k[sample_is_not_robust_k]

            x_adv[sample_is_robust] = x_robust

        return x_adv
import numpy as np

from sten import Sten
from tqdm.auto import tqdm, trange
from multiprocessing.pool import ThreadPool as Pool


# -


def create(x):
    st = Sten(x)
    pathlib.Path("./encodedArray/bit_{0}".format(x)).mkdir(parents=True, exist_ok=True)
    pathlib.Path("./decodedArray/bit_{0}".format(x)).mkdir(parents=True, exist_ok=True)
    for set1File in trange(1, int(sys.argv[1])+1):
        for set2File in trange(1, int(sys.argv[2])+1):
            name = str(set1File) + '_' + str(set2File)
            encImg = st.encode("./data/set1/{}.jpg".format(set1File), "./data/set2/{}.jpg".format(set2File), "./encodedArray/bit_{0}/{1}.npy".format(x, name))
            decImg = st.decode("./encodedArray/bit_{0}/{1}.npy".format(x, name), "./decodedArray/bit_{0}/{1}.npy".format(x, name))


pool_size = 9

pool = Pool(pool_size)

for x in trange(0, 9):
    pool.apply_async(create, (x,))

pool.close()
pool.join()
Exemple #7
0
def main(config):
    pprint(config)

    batch_size = config['batch_size']

    epochs = config['epochs']

    hidden_dim = config['hidden_dim']
    embedding_dim = config['embed_dim']

    num_layers = config['num_layers']
    dropout = config['dropout']
    learning_rate = config['learning_rate']
    scale = config['scale']

    number_of_runs = config['num_runs']

    metrics_dict = {}

    data_dir = config['data_dir']

    epsilon = config['epsilon']

    for i in trange(number_of_runs):
        data_name = os.path.join(data_dir, f'reddit-bert.pkl')

        with open(data_name, 'rb') as f:
            df = pickle.load(f)

        df_train, df_test, _, __ = train_test_split(
            df,
            df['label'].tolist(),
            test_size=0.2,
            stratify=df['label'].tolist())

        train_dataset = RedditDataset(df_train.label.values,
                                      df_train.enc.values)
        train_dataloader = DataLoader(train_dataset,
                                      batch_size=batch_size,
                                      collate_fn=pad_collate_reddit,
                                      shuffle=True)

        test_dataset = RedditDataset(df_test.label.values, df_test.enc.values)
        test_dataloader = DataLoader(test_dataset,
                                     batch_size=batch_size,
                                     collate_fn=pad_collate_reddit)

        model = AdvRedditModel(embedding_dim, hidden_dim, num_layers, dropout,
                               epsilon)

        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        model.to(device)
        print(device)

        optimizer = AdamW(model.parameters(),
                          lr=learning_rate,
                          weight_decay=0.03)

        scheduler = get_cosine_schedule_with_warmup(optimizer,
                                                    num_warmup_steps=10,
                                                    num_training_steps=epochs)

        early_stop_counter = 0
        early_stop_limit = config['early_stop']

        best_model_wts = copy.deepcopy(model.state_dict())
        best_loss = np.inf

        for _ in trange(epochs, leave=False):
            loss, accuracy = train_loop(model, train_dataloader, optimizer,
                                        device, len(train_dataset), scale)

            if scheduler is not None:
                scheduler.step()

            if loss >= best_loss:
                early_stop_counter += 1
            else:
                best_model_wts = copy.deepcopy(model.state_dict())
                early_stop_counter = 0
                best_loss = loss

            if early_stop_counter == early_stop_limit:
                break

        model.load_state_dict(best_model_wts)
        _, _, y_pred, y_true, conf = eval_loop(model, test_dataloader, device,
                                               len(test_dataset), scale)

        m = gr_metrics(y_pred, y_true)

        if 'Precision' in metrics_dict:
            metrics_dict['Precision'].append(m[0])
            metrics_dict['Recall'].append(m[1])
            metrics_dict['FScore'].append(m[2])
            metrics_dict['OE'].append(m[3])
            metrics_dict['all'].append([y_pred, y_true])
        else:
            metrics_dict['Precision'] = [m[0]]
            metrics_dict['Recall'] = [m[1]]
            metrics_dict['FScore'] = [m[2]]
            metrics_dict['OE'] = [m[3]]
            metrics_dict['all'] = [[y_pred, y_true]]

    df = pd.DataFrame(metrics_dict)

    df.to_csv(f'{datetime.now().__format__("%d%m%y_%H%M%S")}_df.csv')

    return df['FScore'].median()
    def generate(  # pylint: disable=W0221
            self,
            x: np.ndarray,
            y: Optional[np.ndarray] = None,
            target_label: Optional[Union[int, List[int], np.ndarray]] = None,
            **kwargs) -> np.ndarray:
        """
        Generate DPatch.

        :param x: Sample images.
        :param y: Target labels for object detector.
        :param target_label: The target label of the DPatch attack.
        :param mask: An boolean array of shape equal to the shape of a single samples (1, H, W) or the shape of `x`
                     (N, H, W) without their channel dimensions. Any features for which the mask is True can be the
                     center location of the patch during sampling.
        :type mask: `np.ndarray`
        :return: Adversarial patch.
        """
        mask = kwargs.get("mask")
        if mask is not None:
            mask = mask.copy()
        if mask is not None and (mask.dtype != np.bool or not (
                mask.shape[0] == 1 or mask.shape[0] == x.shape[0]
        ) or not (
            (mask.shape[1] == x.shape[1] and mask.shape[2] == x.shape[2]) or
            (mask.shape[1] == x.shape[2] and mask.shape[2] == x.shape[3]))):
            raise ValueError(
                "The shape of `mask` has to be equal to the shape of a single samples (1, H, W) or the"
                "shape of `x` (N, H, W) without their channel dimensions.")

        channel_index = 1 if self.estimator.channels_first else x.ndim - 1
        if x.shape[channel_index] != self.patch_shape[channel_index - 1]:
            raise ValueError(
                "The color channel index of the images and the patch have to be identical."
            )
        if y is not None:
            raise ValueError("The DPatch attack does not use target labels.")
        if x.ndim != 4:
            raise ValueError(
                "The adversarial patch can only be applied to images.")
        if target_label is not None:
            if isinstance(target_label, int):
                self.target_label = [target_label] * x.shape[0]
            elif isinstance(target_label, np.ndarray):
                if not (target_label.shape == (x.shape[0], 1)
                        or target_label.shape == (x.shape[0], )):
                    raise ValueError(
                        "The target_label has to be a 1-dimensional array.")
                self.target_label = target_label.tolist()
            else:
                if not len(target_label) == x.shape[0] or not isinstance(
                        target_label, list):
                    raise ValueError(
                        "The target_label as list of integers needs to of length number of images in `x`."
                    )
                self.target_label = target_label

        patched_images, transforms = self._augment_images_with_patch(
            x,
            self._patch,
            random_location=True,
            channels_first=self.estimator.channels_first,
            mask=mask,
            transforms=None,
        )
        patch_target: List[Dict[str, np.ndarray]] = list()

        if self.target_label:

            for i_image in range(patched_images.shape[0]):
                if isinstance(self.target_label, int):
                    t_l = self.target_label
                else:
                    t_l = self.target_label[i_image]

                i_x_1 = transforms[i_image]["i_x_1"]
                i_x_2 = transforms[i_image]["i_x_2"]
                i_y_1 = transforms[i_image]["i_y_1"]
                i_y_2 = transforms[i_image]["i_y_2"]

                target_dict = dict()
                target_dict["boxes"] = np.asarray(
                    [[i_x_1, i_y_1, i_x_2, i_y_2]])
                target_dict["labels"] = np.asarray([
                    t_l,
                ])
                target_dict["scores"] = np.asarray([
                    1.0,
                ])

                patch_target.append(target_dict)

        else:

            predictions = self.estimator.predict(x=patched_images)

            for i_image in range(patched_images.shape[0]):
                target_dict = dict()
                target_dict["boxes"] = predictions[i_image]["boxes"]
                target_dict["labels"] = predictions[i_image]["labels"]
                target_dict["scores"] = predictions[i_image]["scores"]

                patch_target.append(target_dict)

        for i_step in trange(self.max_iter,
                             desc="DPatch iteration",
                             disable=not self.verbose):
            if i_step == 0 or (i_step + 1) % 100 == 0:
                logger.info("Training Step: %i", i_step + 1)

            num_batches = math.ceil(x.shape[0] / self.batch_size)
            patch_gradients = np.zeros_like(self._patch)

            for i_batch in range(num_batches):
                i_batch_start = i_batch * self.batch_size
                i_batch_end = min((i_batch + 1) * self.batch_size,
                                  patched_images.shape[0])

                gradients = self.estimator.loss_gradient(
                    x=patched_images[i_batch_start:i_batch_end],
                    y=patch_target[i_batch_start:i_batch_end],
                )

                for i_image in range(gradients.shape[0]):

                    i_x_1 = transforms[i_batch_start + i_image]["i_x_1"]
                    i_x_2 = transforms[i_batch_start + i_image]["i_x_2"]
                    i_y_1 = transforms[i_batch_start + i_image]["i_y_1"]
                    i_y_2 = transforms[i_batch_start + i_image]["i_y_2"]

                    if self.estimator.channels_first:
                        patch_gradients_i = gradients[i_image, :, i_x_1:i_x_2,
                                                      i_y_1:i_y_2]
                    else:
                        patch_gradients_i = gradients[i_image, i_x_1:i_x_2,
                                                      i_y_1:i_y_2, :]

                    patch_gradients = patch_gradients + patch_gradients_i

            if self.target_label:
                self._patch = self._patch - np.sign(
                    patch_gradients) * self.learning_rate
            else:
                self._patch = self._patch + np.sign(
                    patch_gradients) * self.learning_rate

            if self.estimator.clip_values is not None:
                self._patch = np.clip(
                    self._patch,
                    a_min=self.estimator.clip_values[0],
                    a_max=self.estimator.clip_values[1],
                )

            patched_images, _ = self._augment_images_with_patch(
                x,
                self._patch,
                random_location=False,
                channels_first=self.estimator.channels_first,
                mask=None,
                transforms=transforms,
            )

        return self._patch
Exemple #9
0
                # Create worker processes
                print("    - Creating worker processes")
                ps = [
                    Process(target=worker, args=(inQueue, outQueue))
                    for _ in range(njobs)
                ]

                # Start worker processes
                print("    - Starting worker processes")
                for p in ps:
                    p.start()

                # Fill the queue
                print("    - Filling up the queue")
                for i in trange(ncases):
                    inQueue.put((i))

                # Now running the processes
                print("    - Running the processes")
                output = [outQueue.get() for _ in trange(ncases)]

                # Send stop signal to stop iteration
                for _ in range(njobs):
                    inQueue.put('STOP')

                # Stop processes
                print("    - Stopping processes")
                for p in ps:
                    p.join()
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :param y: An array with the original labels to be predicted.
        :return: An array holding the adversarial examples.
        """
        x_adv = x.astype(ART_NUMPY_DTYPE)
        preds = self.estimator.predict(x_adv, batch_size=self.batch_size)
        if (preds < 0.0).any() or (preds > 1.0).any():
            raise TypeError(
                "This attack requires a classifier predicting probabilities in the range [0, 1] as output."
                "Values smaller than 0.0 or larger than 1.0 have been detected."
            )
        # preds_rescaled = self._rescale(preds) # Rescaling needs more testing
        preds_rescaled = preds

        # Compute perturbation with implicit batching
        for batch_id in trange(int(
                np.ceil(x_adv.shape[0] / float(self.batch_size))),
                               desc="VAT",
                               disable=not self.verbose):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            batch = x_adv[batch_index_1:batch_index_2]
            batch = batch.reshape((batch.shape[0], -1))

            # Main algorithm for each batch
            var_d = np.random.randn(*batch.shape).astype(ART_NUMPY_DTYPE)

            # Main loop of the algorithm
            for _ in range(self.max_iter):
                var_d = self._normalize(var_d)
                preds_new = self.estimator.predict(
                    (batch + var_d).reshape((-1, ) +
                                            self.estimator.input_shape))
                if (preds_new < 0.0).any() or (preds_new > 1.0).any():
                    raise TypeError(
                        "This attack requires a classifier predicting probabilities in the range [0, 1] as "
                        "output. Values smaller than 0.0 or larger than 1.0 have been detected."
                    )
                # preds_new_rescaled = self._rescale(preds_new) # Rescaling needs more testing
                preds_new_rescaled = preds_new

                from scipy.stats import entropy

                kl_div1 = entropy(
                    np.transpose(preds_rescaled[batch_index_1:batch_index_2]),
                    np.transpose(preds_new_rescaled),
                )

                var_d_new = np.zeros(var_d.shape).astype(ART_NUMPY_DTYPE)
                for current_index in range(var_d.shape[1]):
                    var_d[:, current_index] += self.finite_diff
                    preds_new = self.estimator.predict(
                        (batch + var_d).reshape((-1, ) +
                                                self.estimator.input_shape))
                    if (preds_new < 0.0).any() or (preds_new > 1.0).any():
                        raise TypeError(
                            "This attack requires a classifier predicting probabilities in the range [0, 1]"
                            "as output. Values smaller than 0.0 or larger than 1.0 have been detected."
                        )
                    # preds_new_rescaled = self._rescale(preds_new) # Rescaling needs more testing
                    preds_new_rescaled = preds_new

                    kl_div2 = entropy(
                        np.transpose(
                            preds_rescaled[batch_index_1:batch_index_2]),
                        np.transpose(preds_new_rescaled),
                    )
                    var_d_new[:, current_index] = (kl_div2 -
                                                   kl_div1) / self.finite_diff
                    var_d[:, current_index] -= self.finite_diff
                var_d = var_d_new

            # Apply perturbation and clip
            if self.estimator.clip_values is not None:
                clip_min, clip_max = self.estimator.clip_values
                x_adv[batch_index_1:batch_index_2] = np.clip(
                    batch + self.eps * self._normalize(var_d), clip_min,
                    clip_max).reshape((-1, ) + self.estimator.input_shape)
            else:
                x_adv[batch_index_1:batch_index_2] = (
                    batch + self.eps * self._normalize(var_d)
                ).reshape((-1, ) + self.estimator.input_shape)

        logger.info(
            "Success rate of virtual adversarial attack: %.2f%%",
            100 * compute_success(
                self.estimator, x, y, x_adv, batch_size=self.batch_size),
        )

        return x_adv
Exemple #11
0
        int(steps_per_epoch * args.num_epochs * 5 / 6)
    ],
                                   values=[
                                       args.initial_lr, args.initial_lr * 0.1,
                                       args.initial_lr * 0.01
                                   ])

    optimizer = tf.keras.optimizers.SGD(learning_rate=lr_fn,
                                        momentum=args.momentum)

    train_log_dir = 'logs/train'
    val_log_dir = 'logs/val'
    train_summary_writer = tf.summary.create_file_writer(train_log_dir)
    val_summary_writer = tf.summary.create_file_writer(val_log_dir)

    for epoch in trange(args.num_epochs, desc='Epoch'):
        avg_loss = 0.0
        avg_conf_loss = 0.0
        avg_loc_loss = 0.0
        start = time.time()
        for i, (_, imgs, gt_confs, gt_locs) in tqdm(enumerate(batch_generator),
                                                    desc='Steps',
                                                    total=steps_per_epoch):
            loss, conf_loss, loc_loss, l2_loss = train_step(
                imgs, gt_confs, gt_locs, ssd, criterion, optimizer)
            avg_loss = (avg_loss * i + loss.numpy()) / (i + 1)
            avg_conf_loss = (avg_conf_loss * i + conf_loss.numpy()) / (i + 1)
            avg_loc_loss = (avg_loc_loss * i + loc_loss.numpy()) / (i + 1)

            if (i + 1) % 10 == 0:
                tqdm.write(
Exemple #12
0
            )
            anchors_total = sum(
                not (len(a["wikidata_ids"]) == 0 and a["wikidata_src"] == "simple")
                for page in wiki.values()
                for a in page["anchors"]
            )
            logging.info(
                "LANG: {} -- Solved {:.2%} of anchors".format(
                    lang, anchors_solved / anchors_total
                )
            )

    elif args.step == "prepare":
        for lang in args.langs.split("|"):
            results = {}
            for rank in trange(32):
                filename = os.path.join(
                    args.base_wikipedia,
                    "{}".format(lang),
                    "{}wiki{}.pkl".format(lang, rank),
                )
                if os.path.exists(filename):
                    logging.info("Loading {}".format(filename))
                    with open(filename, "rb") as f:
                        for k, v in pickle.load(f).items():
                            results[k] = v

            filename = os.path.join(
                args.base_wikipedia,
                "{}".format(lang),
                "{}wiki.pkl".format(lang),
Exemple #13
0
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: An array with the original labels to be predicted.
        :return: An array holding the adversarial examples.
        """
        if len(x.shape) < 3:  # pragma: no cover
            raise ValueError(
                "Frame saliency attack works only on inputs of dimension greater than 2."
            )

        if self.frame_index >= len(x.shape):  # pragma: no cover
            raise ValueError(
                "Frame index is out of bounds for the given input shape.")

        if y is not None:
            y = check_and_transform_label_format(
                y, nb_classes=self.estimator.nb_classes)

        if self.method == "one_shot":
            if y is None:
                return self.attacker.generate(x)

            return self.attacker.generate(x, y)

        if y is None:
            # Throw error if attack is targeted, but no targets are provided
            if hasattr(
                    self.attacker, "targeted"
            ) and self.attacker.targeted:  # type: ignore  # pragma: no cover
                raise ValueError(
                    "Target labels `y` need to be provided for a targeted attack."
                )

            # Use model predictions as correct outputs
            targets = get_labels_np_array(
                self.estimator.predict(x, batch_size=self.batch_size))
        else:
            targets = y

        if self.estimator.nb_classes == 2 and targets.shape[
                1] == 1:  # pragma: no cover
            raise ValueError(
                "This attack has not yet been tested for binary classification with a single output classifier."
            )

        nb_samples = x.shape[0]
        nb_frames = x.shape[self.frame_index]
        x_adv = x.astype(ART_NUMPY_DTYPE)

        # Determine for which adversarial examples the attack fails:
        attack_failure = self._compute_attack_failure_array(x, targets, x_adv)

        # Determine the order in which to perturb frames, based on saliency scores:
        frames_to_perturb = self._compute_frames_to_perturb(x_adv, targets)

        # Generate adversarial perturbations. If the method is "iterative_saliency_refresh", we will use a mask so that
        # only the next frame to be perturbed is considered in the attack; moreover we keep track of the next frames to
        # be perturbed so they will not be perturbed again later on.
        mask = np.ones(x.shape)
        if self.method == "iterative_saliency_refresh":
            mask = np.zeros(x.shape)
            mask = np.swapaxes(mask, 1, self.frame_index)
            mask[:, frames_to_perturb[:, 0], ::] = 1
            mask = np.swapaxes(mask, 1, self.frame_index)
            disregard = np.zeros((nb_samples, nb_frames))
            disregard[:, frames_to_perturb[:, 0]] = np.inf

        x_adv_new = self.attacker.generate(x, targets, mask=mask)

        # Here starts the main iteration:
        for i in trange(nb_frames,
                        desc="Frame saliency",
                        disable=not self.verbose):
            # Check if attack has already succeeded for all inputs:
            if sum(attack_failure) == 0:
                break

            # Update designated frames with adversarial perturbations:
            x_adv = np.swapaxes(x_adv, 1, self.frame_index)
            x_adv_new = np.swapaxes(x_adv_new, 1, self.frame_index)
            x_adv[attack_failure,
                  frames_to_perturb[:, i][attack_failure], ::] = x_adv_new[
                      attack_failure, frames_to_perturb[:,
                                                        i][attack_failure], ::]
            x_adv = np.swapaxes(x_adv, 1, self.frame_index)
            x_adv_new = np.swapaxes(x_adv_new, 1, self.frame_index)

            # Update for which adversarial examples the attack still fails:
            attack_failure = self._compute_attack_failure_array(
                x, targets, x_adv)

            # For the "refresh" method, update the next frames to be perturbed (disregarding the frames that were
            # perturbed already) and also refresh the adversarial perturbations:
            if self.method == "iterative_saliency_refresh" and i < nb_frames - 1:
                frames_to_perturb = self._compute_frames_to_perturb(
                    x_adv, targets, disregard)
                mask = np.zeros(x.shape)
                mask = np.swapaxes(mask, 1, self.frame_index)
                mask[:, frames_to_perturb[:, i + 1], ::] = 1
                mask = np.swapaxes(mask, 1, self.frame_index)
                disregard[:, frames_to_perturb[:, i + 1]] = np.inf
                x_adv_new = self.attacker.generate(x_adv, targets, mask=mask)

        return x_adv
Exemple #14
0
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial examples and return them as an array.

        :param x: An array with the original inputs to be attacked.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,).
        :return: An array holding the adversarial examples.
        """
        if y is not None:
            y = check_and_transform_label_format(y,
                                                 self.estimator.nb_classes,
                                                 return_one_hot=False)
        x_adv = x.copy()

        for index in trange(x_adv.shape[0],
                            desc="Decision tree attack",
                            disable=not self.verbose):
            path = self.estimator.get_decision_path(x_adv[index])
            legitimate_class = int(
                np.argmax(self.estimator.predict(x_adv[index].reshape(1, -1))))
            position = -2
            adv_path = [-1]
            ancestor = path[position]
            while np.abs(position) < (len(path) - 1) or adv_path[0] == -1:
                ancestor = path[position]
                current_child = path[position + 1]
                # search in right subtree
                if current_child == self.estimator.get_left_child(ancestor):
                    if y is None:
                        adv_path = self._df_subtree(
                            self.estimator.get_right_child(ancestor),
                            legitimate_class)
                    else:
                        adv_path = self._df_subtree(
                            self.estimator.get_right_child(ancestor),
                            legitimate_class,
                            y[index],
                        )
                else:  # search in left subtree
                    if y is None:
                        adv_path = self._df_subtree(
                            self.estimator.get_left_child(ancestor),
                            legitimate_class)
                    else:
                        adv_path = self._df_subtree(
                            self.estimator.get_left_child(ancestor),
                            legitimate_class,
                            y[index],
                        )
                position = position - 1  # we are going the decision path upwards
            adv_path.append(ancestor)
            # we figured out which is the way to the target, now perturb
            # first one is leaf-> no threshold, cannot be perturbed
            for i in range(1, 1 + len(adv_path[1:])):
                go_for = adv_path[i - 1]
                threshold = self.estimator.get_threshold_at_node(adv_path[i])
                feature = self.estimator.get_feature_at_node(adv_path[i])
                # only perturb if the feature is actually wrong
                if x_adv[index][
                        feature] > threshold and go_for == self.estimator.get_left_child(
                            adv_path[i]):
                    x_adv[index][feature] = threshold - self.offset
                elif x_adv[index][
                        feature] <= threshold and go_for == self.estimator.get_right_child(
                            adv_path[i]):
                    x_adv[index][feature] = threshold + self.offset

        return x_adv
Exemple #15
0
    def _fit_cdr(self):
        import tensorflow as tf
        from .model import Model

        n_users = self.train_set.num_users
        n_items = self.train_set.num_items

        text_feature = self.train_set.item_text.batch_bow(
            np.arange(n_items))  # bag of word feature
        text_feature = (text_feature - text_feature.min()) / (
            text_feature.max() - text_feature.min())  # normalization

        # Build model
        layer_sizes = ([self.vocab_size] + self.autoencoder_structure +
                       [self.k] + self.autoencoder_structure +
                       [self.vocab_size])
        tf.set_random_seed(self.seed)
        model = Model(
            n_users=n_users,
            n_items=n_items,
            n_vocab=self.vocab_size,
            k=self.k,
            layers=layer_sizes,
            lambda_u=self.lambda_u,
            lambda_v=self.lambda_v,
            lambda_w=self.lambda_w,
            lambda_n=self.lambda_n,
            lr=self.learning_rate,
            dropout_rate=self.dropout_rate,
            U=self.U,
            V=self.V,
            act_fn=self.act_fn,
            seed=self.seed,
        )

        # Training model
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())

            loop = trange(self.max_iter, disable=not self.verbose)
            for _ in loop:
                corruption_mask = self.rng.binomial(1,
                                                    1 - self.corruption_rate,
                                                    (n_items, self.vocab_size))
                sum_loss = 0
                count = 0
                batch_count = 0
                for batch_u, batch_i, batch_j in self.train_set.uij_iter(
                        batch_size=self.batch_size, shuffle=True):
                    feed_dict = {
                        model.mask_input: corruption_mask[batch_i, :],
                        model.text_input: text_feature[batch_i, :],
                        model.batch_u: batch_u,
                        model.batch_i: batch_i,
                        model.batch_j: batch_j,
                    }

                    sess.run(model.opt1, feed_dict)  # train U, V
                    _, _loss = sess.run([model.opt2, model.loss],
                                        feed_dict)  # train SDAE

                    sum_loss += _loss
                    count += len(batch_u)
                    batch_count += 1
                    if batch_count % 10 == 0:
                        loop.set_postfix(loss=(sum_loss / count))

            self.U, self.V = sess.run([model.U, model.V])

        tf.reset_default_graph()

        if self.verbose:
            print("\nLearning completed")
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,).
        :return: An array holding the adversarial examples.
        """
        y = check_and_transform_label_format(y, self.estimator.nb_classes)

        # Check that `y` is provided for targeted attacks
        if self.targeted and y is None:  # pragma: no cover
            raise ValueError(
                "Target labels `y` need to be provided for a targeted attack.")

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(
                self.estimator.predict(x, batch_size=self.batch_size))

        if self.estimator.nb_classes == 2 and y.shape[
                1] == 1:  # pragma: no cover
            raise ValueError(
                "This attack has not yet been tested for binary classification with a single output classifier."
            )

        # Compute adversarial examples with implicit batching
        nb_batches = int(np.ceil(x.shape[0] / float(self.batch_size)))
        x_adv = []
        for batch_id in trange(nb_batches,
                               desc="ZOO",
                               disable=not self.verbose):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            x_batch = x[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]
            res = self._generate_batch(x_batch, y_batch)
            x_adv.append(res)
        x_adv = np.vstack(x_adv)

        # Apply clip
        if self.estimator.clip_values is not None:
            clip_min, clip_max = self.estimator.clip_values
            np.clip(x_adv, clip_min, clip_max, out=x_adv)

        # Log success rate of the ZOO attack
        logger.info(
            "Success rate of ZOO attack: %.2f%%",
            100 * compute_success(self.estimator,
                                  x,
                                  y,
                                  x_adv,
                                  self.targeted,
                                  batch_size=self.batch_size),
        )

        return x_adv
Exemple #17
0
def predict_gender(audios, intervals, complex):

    step_seconds = 0.04

    model_path = 'model/weights/max_pooling__n_layers=7__n_filters=64__downsampling=1__n_seconds=3.torch'

    model_type = model_path.split('/')[-1].split('__')[0]
    model_name = model_path.split('/')[-1].split('.')[0]
    model_params = {
        i.split('=')[0]: float(i.split('=')[1])
        for i in model_name.split('__')[1:]
    }

    # Here we assume that the model was trained on the LibriSpeech dataset
    model_sampling_rate = LIBRISPEECH_SAMPLING_RATE / model_params[
        'downsampling']
    model_num_samples = int(model_params['n_seconds'] * model_sampling_rate)

    if model_type == 'max_pooling':
        model = ConvNet(int(model_params['n_filters']),
                        int(model_params['n_layers']))
    elif model_type == 'dilated':
        model = DilatedNet(int(model_params['n_filters']),
                           int(model_params['n_depth']),
                           int(model_params['n_stacks']))
    else:
        raise (ValueError, 'Model type not recognised.')

    model.load_state_dict(torch.load(model_path))
    model.double()
    model.cuda()
    model.eval()
    for i in trange(len(audios), desc="speakers"):
        speaker = audios[i].replace('.wav', '')

        ##############
        # Load audio #
        ##############
        audio_path = PATH + '/raw/voc/simple_audio/' + audios[i]
        audio, audio_sampling_rate = sf.read(audio_path)
        audio_duration_seconds = audio.shape[0] * 1. / audio_sampling_rate
        audio_duration_minutes = audio_duration_seconds / 60.

        step_samples = int(step_seconds * model_sampling_rate)
        step_samples_at_audio_rate = int(step_seconds * audio_sampling_rate)
        default_shape = None
        batch = []
        start_min = []
        pred = []
        mean_pitch = []
        max_pitch = []
        min_pitch = []
        num_zeros = []
        std_pitch = []
        pitch_measurements = []

        for j in trange(len(intervals[speaker]), desc="intervals",
                        leave=False):
            start = float(intervals[speaker][j][0])
            end = float(intervals[speaker][j][1])
            start_samples = int(audio_sampling_rate * start)
            end_samples = int(audio_sampling_rate * end)
            step_samples = int(step_seconds * model_sampling_rate)
            step_samples_at_audio_rate = int(step_seconds *
                                             audio_sampling_rate)
            default_shape = None

            for lower in tqdm(range(start_samples, end_samples,
                                    step_samples_at_audio_rate),
                              desc="predictions",
                              leave=False):

                x = audio[lower:lower + (3 * audio_sampling_rate)]
                if x.shape[0] != 3 * audio_sampling_rate:
                    break

                sf.write(PATH + '/raw/clips/{}.wav'.format(speaker), x,
                         audio_sampling_rate)
                sound = parselmouth.Sound(PATH +
                                          '/raw/clips/{}.wav'.format(speaker))
                pitch = sound.to_pitch()
                pitch_values = pitch.selected_array['frequency']

                if pitch_values[pitch_values != 0].size != 0:
                    mean_pitch.append(np.mean(pitch_values[pitch_values != 0]))
                    std_pitch.append(np.std(pitch_values[pitch_values != 0]))
                    min_pitch.append(np.amin(pitch_values[pitch_values != 0]))
                    max_pitch.append(np.amax(pitch_values[pitch_values != 0]))
                    num_zeros.append(pitch_values[pitch_values == 0].size)
                    pitch_measurements.append(
                        pitch_values[pitch_values != 0].size)
                    start_min.append(lower / 44100.)

                else:
                    mean_pitch.append(0)
                    std_pitch.append(0)
                    min_pitch.append(0)
                    max_pitch.append(0)
                    num_zeros.append(pitch_values[pitch_values == 0].size)
                    pitch_measurements.append(0)
                    start_min.append(lower / 44100.)

                os.remove(PATH + '/raw/clips/{}.wav'.format(speaker))

                x = torch.from_numpy(x).reshape(1, -1)

                x = whiten(x)

                # For me the bottleneck is this scipy resample call, increasing batch size doesn't make it any faster
                x = torch.from_numpy(resample(x, model_num_samples,
                                              axis=1)).reshape(
                                                  (1, 1, model_num_samples))

                y_hat = model(x).item()

                pred.append(y_hat)
                start_min.append(lower / 44100.)

        df = pd.DataFrame(
            data={
                'speaker': speaker,
                'start_second': start_min,
                'p': pred,
                'mean_pitch': mean_pitch,
                'max_pitch': max_pitch,
                'min_pitch': min_pitch,
                'num_zeros': num_zeros,
                'std_pitch': std_pitch,
                'pitch_measurements': pitch_measurements
            })

        df = df.assign(
            # Time in seconds of the end of the prediction fragment
            t_end=df['start_second'] + model_params['n_seconds'] / 60,
            # Time in seconds of the center of the prediction fragment
            t_center=df['start_second'] * 60 + model_params['n_seconds'] / 2.)
        df.to_csv(PATH + 'analyses/results/results_for_' + speaker + '.csv',
                  index=False)
Exemple #18
0
def main():
    init_output_dir(output_dir)
    # prepare dataset
    task = get_task(task_name, dataset_path)
    label_list = task.get_labels()
    label_map = {v: i for i, v in enumerate(label_list)}

    print("loading raw data ... ")
    train_examples = task.get_train_examples()
    val_examples = task.get_dev_examples()
    test_examples = task.get_test_examples()

    print("converting to data loader ... ")
    train_loader = get_dataloader(train_examples, label_map)
    val_loader = get_dataloader(val_examples, label_map)
    test_loader = get_dataloader(test_examples, label_map)

    # load model
    print("loading model ... ")
    model = InferSent(config)
    model.load_state_dict(torch.load(model_path))
    model = model.cuda() if config['use_cuda'] else model
    model.set_w2v_path(word_emb_path)
    print("building model vocabs ... ")
    model.build_vocab_k_words(K=100000, verbose=True)

    # run embedding for train set
    print("Run embedding for train set")
    for _ in trange(1, desc="Epoch"):
        run_encoding(loader=train_loader,
                     model=model,
                     mode='train')

    print("Run embedding for dev set")
    for _ in trange(1, desc="Epoch"):
        run_encoding(loader=val_loader,
                     model=model,
                     mode='dev')

    print("Run embedding for test set")
    for _ in trange(1, desc="Epoch"):
        run_encoding(loader=test_loader,
                     model=model,
                     mode='test')

    # HACK FOR MNLI mis-matched
    if task_name == 'mnli':
        print("Run Embedding for MNLI Mis-Matched Datasets")
        print("loading raw data ... ")
        mm_val_example = MnliMismatchedProcessor().get_dev_examples(dataset_path)
        mm_test_examples = MnliMismatchedProcessor().get_test_examples(dataset_path)
        print("converting to data loader ... ")
        mm_val_loader = get_dataloader(mm_val_example, label_map)
        mm_test_loader = get_dataloader(mm_test_examples, label_map)

        print("Run embedding for mm_dev set")
        for _ in trange(1, desc="Epoch"):
            run_encoding(loader=mm_val_loader,
                         model=model,
                         mode='mm_dev')

        print("Run embedding for test set")
        for _ in trange(1, desc="Epoch"):
            run_encoding(loader=mm_test_loader,
                         model=model,
                         mode='mm_test')
Exemple #19
0
    def train(
        self,
        train_dataset,
        output_dir,
        multi_label=False,
        show_running_loss=True,
        eval_df=None,
        verbose=True,
        **kwargs,
    ):
        """
        Trains the model on train_dataset.

        Utility function to be used by the train_model() method. Not intended to be used directly.
        """

        device = self.device
        model = self.model
        args = self.args

        tb_writer = SummaryWriter(logdir=args["tensorboard_dir"])
        train_sampler = RandomSampler(train_dataset)
        train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args["train_batch_size"])

        t_total = len(train_dataloader) // args["gradient_accumulation_steps"] * args["num_train_epochs"]

        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": args["weight_decay"],
            },
            {
                "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": 0.0,
            },
        ]

        warmup_steps = math.ceil(t_total * args["warmup_ratio"])
        args["warmup_steps"] = warmup_steps if args["warmup_steps"] == 0 else args["warmup_steps"]

        optimizer = AdamW(optimizer_grouped_parameters, lr=args["learning_rate"], eps=args["adam_epsilon"])
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=args["warmup_steps"], num_training_steps=t_total
        )

        if args["fp16"]:
            try:
                from apex import amp
            except ImportError:
                raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")

            model, optimizer = amp.initialize(model, optimizer, opt_level=args["fp16_opt_level"])

        if args["n_gpu"] > 1:
            model = torch.nn.DataParallel(model)

        global_step = 0
        tr_loss, logging_loss = 0.0, 0.0
        model.zero_grad()
        train_iterator = trange(int(args["num_train_epochs"]), desc="Epoch", disable=args["silent"])
        epoch_number = 0
        best_eval_loss = None
        early_stopping_counter = 0

        if args["evaluate_during_training"]:
            training_progress_scores = self._create_training_progress_scores(multi_label, **kwargs)

        if args["wandb_project"]:
            wandb.init(project=args["wandb_project"], config={**args}, **args["wandb_kwargs"])
            wandb.watch(self.model)

        model.train()
        for _ in train_iterator:
            # epoch_iterator = tqdm(train_dataloader, desc="Iteration")
            for step, batch in enumerate(tqdm(train_dataloader, desc="Current iteration", disable=args["silent"])):
                batch = tuple(t.to(device) for t in batch)

                inputs = self._get_inputs_dict(batch)
                outputs = model(**inputs)
                # model outputs are always tuple in pytorch-transformers (see doc)
                loss = outputs[0]

                if args["n_gpu"] > 1:
                    loss = loss.mean()  # mean() to average on multi-gpu parallel training

                current_loss = loss.item()

                if show_running_loss:
                    print("\rRunning loss: %f" % loss, end="")

                if args["gradient_accumulation_steps"] > 1:
                    loss = loss / args["gradient_accumulation_steps"]

                if args["fp16"]:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                    # torch.nn.utils.clip_grad_norm_(
                    #     amp.master_params(optimizer), args["max_grad_norm"]
                    # )
                else:
                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(
                    #     model.parameters(), args["max_grad_norm"]
                    # )

                tr_loss += loss.item()
                if (step + 1) % args["gradient_accumulation_steps"] == 0:
                    if args["fp16"]:
                        torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args["max_grad_norm"])
                    else:
                        torch.nn.utils.clip_grad_norm_(model.parameters(), args["max_grad_norm"])

                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    model.zero_grad()
                    global_step += 1

                    if args["logging_steps"] > 0 and global_step % args["logging_steps"] == 0:
                        # Log metrics
                        tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
                        tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args["logging_steps"], global_step)
                        logging_loss = tr_loss
                        if args["wandb_project"]:
                            wandb.log(
                                {
                                    "Training loss": current_loss,
                                    "lr": scheduler.get_lr()[0],
                                    "global_step": global_step,
                                }
                            )

                    if args["save_steps"] > 0 and global_step % args["save_steps"] == 0:
                        # Save model checkpoint
                        output_dir_current = os.path.join(output_dir, "checkpoint-{}".format(global_step))

                        self._save_model(output_dir_current, model=model)

                    if args["evaluate_during_training"] and (
                        args["evaluate_during_training_steps"] > 0
                        and global_step % args["evaluate_during_training_steps"] == 0
                    ):
                        # Only evaluate when single GPU otherwise metrics may not average well
                        results, _, _ = self.eval_model(
                            eval_df, verbose=verbose and args["evaluate_during_training_verbose"], silent=True, **kwargs
                        )
                        for key, value in results.items():
                            tb_writer.add_scalar("eval_{}".format(key), value, global_step)

                        output_dir_current = os.path.join(output_dir, "checkpoint-{}".format(global_step))

                        if args["save_eval_checkpoints"]:
                            self._save_model(output_dir_current, model=model, results=results)

                        training_progress_scores["global_step"].append(global_step)
                        training_progress_scores["train_loss"].append(current_loss)
                        for key in results:
                            training_progress_scores[key].append(results[key])
                        report = pd.DataFrame(training_progress_scores)
                        report.to_csv(
                            args["output_dir"] + "training_progress_scores.csv", index=False,
                        )

                        if args["wandb_project"]:
                            wandb.log(self._get_last_metrics(training_progress_scores))

                        if not best_eval_loss:
                            best_eval_loss = results["eval_loss"]
                            self._save_model(args["best_model_dir"], model=model, results=results)
                        elif results["eval_loss"] - best_eval_loss < args["early_stopping_delta"]:
                            best_eval_loss = results["eval_loss"]
                            self._save_model(args["best_model_dir"], model=model, results=results)
                            early_stopping_counter = 0
                        else:
                            if early_stopping_counter < args["early_stopping_patience"]:
                                early_stopping_counter += 1
                                if verbose:
                                    print()
                                    print(f"No improvement in eval_loss for {early_stopping_counter} steps.")
                                    print(f"Training will stop at {args['early_stopping_patience']} steps.")
                                    print()
                            else:
                                if verbose:
                                    print()
                                    print(f"Patience of {args['early_stopping_patience']} steps reached.")
                                    print("Training terminated.")
                                    print()
                                return global_step, tr_loss / global_step

            epoch_number += 1
            output_dir_current = os.path.join(output_dir, "checkpoint-{}-epoch-{}".format(global_step, epoch_number))

            if (args["save_model_every_epoch"] or args["evaluate_during_training"]) and not os.path.exists(
                output_dir_current
            ):
                os.makedirs(output_dir_current)

            if args["save_model_every_epoch"]:
                self._save_model(output_dir_current, model=model)

            if args["evaluate_during_training"]:
                results, _, _ = self.eval_model(
                    eval_df, verbose=verbose and args["evaluate_during_training_verbose"], silent=True, **kwargs
                )

                self._save_model(output_dir_current, results=results)

                training_progress_scores["global_step"].append(global_step)
                training_progress_scores["train_loss"].append(current_loss)
                for key in results:
                    training_progress_scores[key].append(results[key])
                report = pd.DataFrame(training_progress_scores)
                report.to_csv(args["output_dir"] + "training_progress_scores.csv", index=False)

        return global_step, tr_loss / global_step
def train(args, train_dataset, model: PreTrainedModel,
          tokenizer: PreTrainedTokenizer) -> Tuple[int, float]:
    """ Train the model """
    if args.local_rank in [-1, 0]:
        tb_writer = SummaryWriter()

    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)

    def collate(examples: List[torch.Tensor]):
        if tokenizer._pad_token is None:
            return pad_sequence(examples, batch_first=True)
        return pad_sequence(examples,
                            batch_first=True,
                            padding_value=tokenizer.pad_token_id)

    train_sampler = RandomSampler(
        train_dataset) if args.local_rank == -1 else DistributedSampler(
            train_dataset)
    train_dataloader = DataLoader(train_dataset,
                                  sampler=train_sampler,
                                  batch_size=args.train_batch_size,
                                  collate_fn=collate)

    if args.max_steps > 0:
        t_total = args.max_steps
        args.num_train_epochs = args.max_steps // (
            len(train_dataloader) // args.gradient_accumulation_steps) + 1
    else:
        t_total = len(
            train_dataloader
        ) // args.gradient_accumulation_steps * args.num_train_epochs

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args.weight_decay,
        },
        {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0
        },
    ]
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=args.warmup_steps,
        num_training_steps=t_total)

    # Check if saved optimizer or scheduler states exist
    if (args.model_name_or_path and os.path.isfile(
            os.path.join(args.model_name_or_path, "optimizer.pt"))
            and os.path.isfile(
                os.path.join(args.model_name_or_path, "scheduler.pt"))):
        # Load in optimizer and scheduler states
        optimizer.load_state_dict(
            torch.load(os.path.join(args.model_name_or_path, "optimizer.pt")))
        scheduler.load_state_dict(
            torch.load(os.path.join(args.model_name_or_path, "scheduler.pt")))

    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.fp16_opt_level)

    # multi-gpu training (should be after apex fp16 initialization)
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Distributed training (should be after apex fp16 initialization)
    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)

    # Train!
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Instantaneous batch size per GPU = %d",
                args.per_gpu_train_batch_size)
    logger.info(
        "  Total train batch size (w. parallel, distributed & accumulation) = %d",
        args.train_batch_size * args.gradient_accumulation_steps *
        (torch.distributed.get_world_size() if args.local_rank != -1 else 1),
    )
    logger.info("  Gradient Accumulation steps = %d",
                args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)

    global_step = 0
    epochs_trained = 0
    steps_trained_in_current_epoch = 0
    # Check if continuing training from a checkpoint
    if args.model_name_or_path and os.path.exists(args.model_name_or_path):
        try:
            # set global_step to gobal_step of last saved checkpoint from model path
            checkpoint_suffix = args.model_name_or_path.split("-")[-1].split(
                "/")[0]
            global_step = int(checkpoint_suffix)
            epochs_trained = global_step // (len(train_dataloader) //
                                             args.gradient_accumulation_steps)
            steps_trained_in_current_epoch = global_step % (
                len(train_dataloader) // args.gradient_accumulation_steps)

            logger.info(
                "  Continuing training from checkpoint, will skip to saved global_step"
            )
            logger.info("  Continuing training from epoch %d", epochs_trained)
            logger.info("  Continuing training from global step %d",
                        global_step)
            logger.info("  Will skip the first %d steps in the first epoch",
                        steps_trained_in_current_epoch)
        except ValueError:
            logger.info("  Starting fine-tuning.")

    tr_loss, logging_loss = 0.0, 0.0

    model_to_resize = model.module if hasattr(
        model,
        "module") else model  # Take care of distributed/parallel training
    model_to_resize.resize_token_embeddings(len(tokenizer))

    model.zero_grad()
    train_iterator = trange(epochs_trained,
                            int(args.num_train_epochs),
                            desc="Epoch",
                            disable=args.local_rank not in [-1, 0])
    set_seed(args)  # Added here for reproducibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=True)
        for step, batch in enumerate(epoch_iterator):

            # Skip past any already trained steps if resuming training
            if steps_trained_in_current_epoch > 0:
                steps_trained_in_current_epoch -= 1
                continue

            inputs, labels = mask_tokens(batch, tokenizer,
                                         args) if args.mlm else (batch, batch)
            inputs = inputs.to(args.device)
            labels = labels.to(args.device)
            model.train()
            outputs = model(inputs,
                            masked_lm_labels=labels) if args.mlm else model(
                                inputs, labels=labels)
            loss = outputs[
                0]  # model outputs are always tuple in transformers (see doc)

            if args.n_gpu > 1:
                loss = loss.mean(
                )  # mean() to average on multi-gpu parallel training
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                if args.fp16:
                    torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), args.max_grad_norm)
                else:
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   args.max_grad_norm)
                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1

                if args.local_rank in [
                        -1, 0
                ] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                    # Log metrics
                    if (
                            args.local_rank == -1
                            and args.evaluate_during_training
                    ):  # Only evaluate when single GPU otherwise metrics may not average well
                        results = evaluate(args, model, tokenizer)
                        for key, value in results.items():
                            tb_writer.add_scalar("eval_{}".format(key), value,
                                                 global_step)
                    tb_writer.add_scalar("lr",
                                         scheduler.get_lr()[0], global_step)
                    tb_writer.add_scalar("loss", (tr_loss - logging_loss) /
                                         args.logging_steps, global_step)
                    logging_loss = tr_loss

                if args.local_rank in [
                        -1, 0
                ] and args.save_steps > 0 and global_step % args.save_steps == 0:
                    checkpoint_prefix = "checkpoint"
                    # Save model checkpoint
                    output_dir = os.path.join(
                        args.output_dir,
                        "{}-{}".format(checkpoint_prefix, global_step))
                    os.makedirs(output_dir, exist_ok=True)
                    model_to_save = (
                        model.module if hasattr(model, "module") else model
                    )  # Take care of distributed/parallel training
                    model_to_save.save_pretrained(output_dir)
                    tokenizer.save_pretrained(output_dir)

                    torch.save(args,
                               os.path.join(output_dir, "training_args.bin"))
                    logger.info("Saving model checkpoint to %s", output_dir)

                    _rotate_checkpoints(args, checkpoint_prefix)

                    torch.save(optimizer.state_dict(),
                               os.path.join(output_dir, "optimizer.pt"))
                    torch.save(scheduler.state_dict(),
                               os.path.join(output_dir, "scheduler.pt"))
                    logger.info("Saving optimizer and scheduler states to %s",
                                output_dir)

            if args.max_steps > 0 and global_step > args.max_steps:
                epoch_iterator.close()
                break
        if args.max_steps > 0 and global_step > args.max_steps:
            train_iterator.close()
            break

    if args.local_rank in [-1, 0]:
        tb_writer.close()

    return global_step, tr_loss / global_step
Exemple #21
0
                                       writer=writer,
                                       **(sampler_kwargs or {}))
 if equilibrate:
     log.info('Equilibrating...')
     with tqdm(count(), desc='equilibrating', disable=None) as steps:
         next(
             sample_wf(wf,
                       sampler.iter_with_info(),
                       steps,
                       equilibrate=equilibrate))
     log.info('Equilibrated')
 log.info('Initializing training')
 steps = trange(
     init_step,
     n_steps,
     initial=init_step,
     total=n_steps,
     desc='training',
     disable=None,
 )
 chkpts = chkpts if chkpts is not None else []
 last_log = 0
 try:
     for step, _ in fit_wf(
             wf,
             LossEnergy(),
             opt,
             sampler.iter_batches(
                 batch_size=batch_size,
                 epoch_size=epoch_size,
                 range=partial(trange,
                               desc='sampling',
    def train(
        self, train_dataset, output_dir, show_running_loss=True, eval_data=None, verbose=True, **kwargs,
    ):
        """
        Trains the model on train_dataset.

        Utility function to be used by the train_model() method. Not intended to be used directly.
        """

        model = self.model
        args = self.args

        tb_writer = SummaryWriter(logdir=args.tensorboard_dir)
        train_sampler = RandomSampler(train_dataset)
        train_dataloader = DataLoader(
            train_dataset,
            sampler=train_sampler,
            batch_size=args.train_batch_size,
            num_workers=self.args.dataloader_num_workers,
        )

        if args.max_steps > 0:
            t_total = args.max_steps
            args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1
        else:
            t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs

        no_decay = ["bias", "LayerNorm.weight"]

        optimizer_grouped_parameters = []
        custom_parameter_names = set()
        for group in self.args.custom_parameter_groups:
            params = group.pop("params")
            custom_parameter_names.update(params)
            param_group = {**group}
            param_group["params"] = [p for n, p in model.named_parameters() if n in params]
            optimizer_grouped_parameters.append(param_group)

        for group in self.args.custom_layer_parameters:
            layer_number = group.pop("layer")
            layer = f"layer.{layer_number}."
            group_d = {**group}
            group_nd = {**group}
            group_nd["weight_decay"] = 0.0
            params_d = []
            params_nd = []
            for n, p in model.named_parameters():
                if n not in custom_parameter_names and layer in n:
                    if any(nd in n for nd in no_decay):
                        params_nd.append(p)
                    else:
                        params_d.append(p)
                    custom_parameter_names.add(n)
            group_d["params"] = params_d
            group_nd["params"] = params_nd

            optimizer_grouped_parameters.append(group_d)
            optimizer_grouped_parameters.append(group_nd)

        if not self.args.train_custom_parameters_only:
            optimizer_grouped_parameters.extend(
                [
                    {
                        "params": [
                            p
                            for n, p in model.named_parameters()
                            if n not in custom_parameter_names and not any(nd in n for nd in no_decay)
                        ],
                        "weight_decay": args.weight_decay,
                    },
                    {
                        "params": [
                            p
                            for n, p in model.named_parameters()
                            if n not in custom_parameter_names and any(nd in n for nd in no_decay)
                        ],
                        "weight_decay": 0.0,
                    },
                ]
            )

        warmup_steps = math.ceil(t_total * args.warmup_ratio)
        args.warmup_steps = warmup_steps if args.warmup_steps == 0 else args.warmup_steps

        # TODO: Use custom optimizer like with BertSum?
        optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total
        )

        if (
            args.model_name
            and os.path.isfile(os.path.join(args.model_name, "optimizer.pt"))
            and os.path.isfile(os.path.join(args.model_name, "scheduler.pt"))
        ):
            # Load in optimizer and scheduler states
            optimizer.load_state_dict(torch.load(os.path.join(args.model_name, "optimizer.pt")))
            scheduler.load_state_dict(torch.load(os.path.join(args.model_name, "scheduler.pt")))

        if args.n_gpu > 1:
            model = torch.nn.DataParallel(model)

        logger.info(" Training started")

        global_step = 0
        tr_loss, logging_loss = 0.0, 0.0
        model.zero_grad()
        train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.silent, mininterval=0)
        epoch_number = 0
        best_eval_metric = None
        early_stopping_counter = 0
        steps_trained_in_current_epoch = 0
        epochs_trained = 0

        if args.model_name and os.path.exists(args.model_name):
            try:
                # set global_step to gobal_step of last saved checkpoint from model path
                checkpoint_suffix = args.model_name.split("/")[-1].split("-")
                if len(checkpoint_suffix) > 2:
                    checkpoint_suffix = checkpoint_suffix[1]
                else:
                    checkpoint_suffix = checkpoint_suffix[-1]
                global_step = int(checkpoint_suffix)
                epochs_trained = global_step // (len(train_dataloader) // args.gradient_accumulation_steps)
                steps_trained_in_current_epoch = global_step % (
                    len(train_dataloader) // args.gradient_accumulation_steps
                )

                logger.info("   Continuing training from checkpoint, will skip to saved global_step")
                logger.info("   Continuing training from epoch %d", epochs_trained)
                logger.info("   Continuing training from global step %d", global_step)
                logger.info("   Will skip the first %d steps in the current epoch", steps_trained_in_current_epoch)
            except ValueError:
                logger.info("   Starting fine-tuning.")

        if args.evaluate_during_training:
            training_progress_scores = self._create_training_progress_scores(**kwargs)

        if args.wandb_project:
            wandb.init(project=args.wandb_project, config={**asdict(args)}, **args.wandb_kwargs)
            wandb.watch(self.model)

        if args.fp16:
            scaler = amp.GradScaler()

        model.train()
        for current_epoch in train_iterator:
            if epochs_trained > 0:
                epochs_trained -= 1
                continue
            train_iterator.set_description(f"Epoch {epoch_number + 1} of {args.num_train_epochs}")
            batch_iterator = tqdm(
                train_dataloader,
                desc=f"Running Epoch {epoch_number} of {args.num_train_epochs}",
                disable=args.silent,
                mininterval=0,
            )
            for step, batch in enumerate(batch_iterator):
                if steps_trained_in_current_epoch > 0:
                    steps_trained_in_current_epoch -= 1
                    continue
                # batch = tuple(t.to(device) for t in batch)

                inputs = self._get_inputs_dict(batch)
                with amp.autocast() if args.fp16 else nullcontext():
                    outputs = model(**inputs)
                    # model outputs are always tuple in pytorch-transformers (see doc)
                    loss = outputs[0]

                if args.n_gpu > 1:
                    loss = loss.mean()  # mean() to average on multi-gpu parallel training

                current_loss = loss.item()

                if show_running_loss:
                    batch_iterator.set_description(
                        f"Epochs {epoch_number}/{args.num_train_epochs}. Running Loss: {current_loss:9.4f}"
                    )

                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps

                if args.fp16:
                    scaler.scale(loss).backward()
                else:
                    loss.backward()

                tr_loss += loss.item()
                if (step + 1) % args.gradient_accumulation_steps == 0:
                    if args.fp16:
                        scaler.unscale_(optimizer)
                    torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)

                    if args.fp16:
                        scaler.step(optimizer)
                        scaler.update()
                    else:
                        optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    model.zero_grad()
                    global_step += 1

                    if args.logging_steps > 0 and global_step % args.logging_steps == 0:
                        # Log metrics
                        tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
                        tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step)
                        logging_loss = tr_loss
                        if args.wandb_project:
                            wandb.log(
                                {
                                    "Training loss": current_loss,
                                    "lr": scheduler.get_lr()[0],
                                    "global_step": global_step,
                                }
                            )

                    if args.save_steps > 0 and global_step % args.save_steps == 0:
                        # Save model checkpoint
                        output_dir_current = os.path.join(output_dir, "checkpoint-{}".format(global_step))

                        self._save_model(output_dir_current, optimizer, scheduler, model=model)

                    if args.evaluate_during_training and (
                        args.evaluate_during_training_steps > 0
                        and global_step % args.evaluate_during_training_steps == 0
                    ):
                        # Only evaluate when single GPU otherwise metrics may not average well
                        results = self.eval_model(
                            eval_data,
                            verbose=verbose and args.evaluate_during_training_verbose,
                            silent=args.evaluate_during_training_silent,
                            **kwargs,
                        )
                        for key, value in results.items():
                            tb_writer.add_scalar("eval_{}".format(key), value, global_step)

                        output_dir_current = os.path.join(output_dir, "checkpoint-{}".format(global_step))

                        if args.save_eval_checkpoints:
                            self._save_model(output_dir_current, optimizer, scheduler, model=model, results=results)

                        training_progress_scores["global_step"].append(global_step)
                        training_progress_scores["train_loss"].append(current_loss)
                        for key in results:
                            training_progress_scores[key].append(results[key])
                        report = pd.DataFrame(training_progress_scores)
                        report.to_csv(
                            os.path.join(args.output_dir, "training_progress_scores.csv"), index=False,
                        )

                        if args.wandb_project:
                            wandb.log(self._get_last_metrics(training_progress_scores))

                        if not best_eval_metric:
                            best_eval_metric = results[args.early_stopping_metric]
                            if args.save_best_model:
                                self._save_model(
                                    args.best_model_dir, optimizer, scheduler, model=model, results=results
                                )
                        if best_eval_metric and args.early_stopping_metric_minimize:
                            if results[args.early_stopping_metric] - best_eval_metric < args.early_stopping_delta:
                                best_eval_metric = results[args.early_stopping_metric]
                                if args.save_best_model:
                                    self._save_model(
                                        args.best_model_dir, optimizer, scheduler, model=model, results=results
                                    )
                                early_stopping_counter = 0
                            else:
                                if args.use_early_stopping:
                                    if early_stopping_counter < args.early_stopping_patience:
                                        early_stopping_counter += 1
                                        if verbose:
                                            logger.info(f" No improvement in {args.early_stopping_metric}")
                                            logger.info(f" Current step: {early_stopping_counter}")
                                            logger.info(f" Early stopping patience: {args.early_stopping_patience}")
                                    else:
                                        if verbose:
                                            logger.info(f" Patience of {args.early_stopping_patience} steps reached")
                                            logger.info(" Training terminated.")
                                            train_iterator.close()
                                        return global_step, tr_loss / global_step
                        else:
                            if results[args.early_stopping_metric] - best_eval_metric > args.early_stopping_delta:
                                best_eval_metric = results[args.early_stopping_metric]
                                if args.save_best_model:
                                    self._save_model(
                                        args.best_model_dir, optimizer, scheduler, model=model, results=results
                                    )
                                early_stopping_counter = 0
                            else:
                                if args.use_early_stopping:
                                    if early_stopping_counter < args.early_stopping_patience:
                                        early_stopping_counter += 1
                                        if verbose:
                                            logger.info(f" No improvement in {args.early_stopping_metric}")
                                            logger.info(f" Current step: {early_stopping_counter}")
                                            logger.info(f" Early stopping patience: {args.early_stopping_patience}")
                                    else:
                                        if verbose:
                                            logger.info(f" Patience of {args.early_stopping_patience} steps reached")
                                            logger.info(" Training terminated.")
                                            train_iterator.close()
                                        return global_step, tr_loss / global_step

            epoch_number += 1
            output_dir_current = os.path.join(output_dir, "checkpoint-{}-epoch-{}".format(global_step, epoch_number))

            if args.save_model_every_epoch or args.evaluate_during_training:
                os.makedirs(output_dir_current, exist_ok=True)

            if args.save_model_every_epoch:
                self._save_model(output_dir_current, optimizer, scheduler, model=model)

            if args.evaluate_during_training:
                results = self.eval_model(
                    eval_data,
                    verbose=verbose and args.evaluate_during_training_verbose,
                    silent=args.evaluate_during_training_silent,
                    **kwargs,
                )

                if args.save_eval_checkpoints:
                    self._save_model(output_dir_current, optimizer, scheduler, results=results)

                training_progress_scores["global_step"].append(global_step)
                training_progress_scores["train_loss"].append(current_loss)
                for key in results:
                    training_progress_scores[key].append(results[key])
                report = pd.DataFrame(training_progress_scores)
                report.to_csv(os.path.join(args.output_dir, "training_progress_scores.csv"), index=False)

                if args.wandb_project:
                    wandb.log(self._get_last_metrics(training_progress_scores))

                if not best_eval_metric:
                    best_eval_metric = results[args.early_stopping_metric]
                    if args.save_best_model:
                        self._save_model(args.best_model_dir, optimizer, scheduler, model=model, results=results)
                if best_eval_metric and args.early_stopping_metric_minimize:
                    if results[args.early_stopping_metric] - best_eval_metric < args.early_stopping_delta:
                        best_eval_metric = results[args.early_stopping_metric]
                        if args.save_best_model:
                            self._save_model(args.best_model_dir, optimizer, scheduler, model=model, results=results)
                        early_stopping_counter = 0
                    else:
                        if args.use_early_stopping and args.early_stopping_consider_epochs:
                            if early_stopping_counter < args.early_stopping_patience:
                                early_stopping_counter += 1
                                if verbose:
                                    logger.info(f" No improvement in {args.early_stopping_metric}")
                                    logger.info(f" Current step: {early_stopping_counter}")
                                    logger.info(f" Early stopping patience: {args.early_stopping_patience}")
                            else:
                                if verbose:
                                    logger.info(f" Patience of {args.early_stopping_patience} steps reached")
                                    logger.info(" Training terminated.")
                                    train_iterator.close()
                                return global_step, tr_loss / global_step
                else:
                    if results[args.early_stopping_metric] - best_eval_metric > args.early_stopping_delta:
                        best_eval_metric = results[args.early_stopping_metric]
                        if args.save_best_model:
                            self._save_model(args.best_model_dir, optimizer, scheduler, model=model, results=results)
                        early_stopping_counter = 0
                    else:
                        if args.use_early_stopping and args.early_stopping_consider_epochs:
                            if early_stopping_counter < args.early_stopping_patience:
                                early_stopping_counter += 1
                                if verbose:
                                    logger.info(f" No improvement in {args.early_stopping_metric}")
                                    logger.info(f" Current step: {early_stopping_counter}")
                                    logger.info(f" Early stopping patience: {args.early_stopping_patience}")
                            else:
                                if verbose:
                                    logger.info(f" Patience of {args.early_stopping_patience} steps reached")
                                    logger.info(" Training terminated.")
                                    train_iterator.close()
                                return global_step, tr_loss / global_step

        return global_step, tr_loss / global_step
Exemple #23
0
def simulate_recon(
    measured_sino,
    ctim,
    scanner_params,
    simulate_3d=False,
    nitr=60,
    fwhm_rm=0.,
    slice_idx=-1,
    randoms=None,
    scatter=None,
    mu_input=False,
    msk_radius=29.,
    psf=None,
):
    '''
    Reconstruct PET image from simulated input data
    using the EM-ML (2D) or OSEM (3D) algorithm.

    measured_sino  : simulated emission data with photon attenuation
    ctim  : either a 2D CT image or a 3D CT image from which a 2D slice
        is chosen (slice_idx) for estimation of the attenuation factors
    slice_idx  : index to extract one 2D slice for this simulation
        if input image is 3D
    nitr  : number of iterations used for the EM-ML reconstruction algorithm
    scanner_params  : scanner parameters containing scanner constants and
        axial and transaxial look up tables (LUTs)
    randoms  : randoms and scatter events (optional)
    '''
    # > decompose the scanner constants and LUTs for easier access
    Cnt = scanner_params['Cnt']
    txLUT = scanner_params['txLUT']
    axLUT = scanner_params['axLUT']
    psfkernel = mmrrec.psf_config(psf, Cnt)

    if simulate_3d:
        if ctim.ndim!=3 \
                or ctim.shape!=(Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']):
            raise ValueError(
                'The CT/mu-map image does not match the scanner image shape.')
    else:
        # > 2D case with reduced rings
        if len(ctim.shape) == 3:
            # make sure that the shape of the input image matches the image size of the scanner
            if ctim.shape[1:] != (Cnt['SO_IMY'], Cnt['SO_IMX']):
                raise ValueError(
                    'The input image shape for x and y does not match the scanner image size.'
                )
            # pick the right slice index (slice_idx) if not given or mistaken
            if slice_idx < 0:
                log.warning(
                    'the axial index <slice_idx> is chosen to be in the middle of axial FOV.'
                )
                slice_idx = ctim.shape[0] / 2
            if slice_idx >= ctim.shape[0]:
                raise ValueError(
                    'The axial index for 2D slice selection is outside the image.'
                )
        elif len(ctim.shape) == 2:
            # make sure that the shape of the input image matches the image size of the scanner
            if ctim.shape != (Cnt['SO_IMY'], Cnt['SO_IMX']):
                raise ValueError(
                    'The input image shape for x and y does not match the scanner image size.'
                )
            ctim.shape = (1, ) + ctim.shape
            slice_idx = 0

        if 'rSZ_IMZ' not in Cnt:
            raise ValueError('Missing reduced axial FOV parameters.')

    # --------------------
    if mu_input:
        mui = ctim
    else:
        # > get the mu-map [1/cm] from CT [HU]
        mui = nimpa.ct2mu(ctim)

    # > get rid of negative values
    mui[mui < 0] = 0
    # --------------------

    if simulate_3d:
        rmu = mui
        # > number of axial sinograms
        nsinos = Cnt['NSN11']
    else:
        # --------------------
        # > create a number of slides of the same chosen image slice
        # for reduced (fast) 3D simulation
        rmu = mui[slice_idx, :, :]
        rmu.shape = (1, ) + rmu.shape
        rmu = np.repeat(rmu, Cnt['rSZ_IMZ'], axis=0)
        # --------------------
        # > number of axial sinograms
        nsinos = Cnt['rNSN1']

    # import pdb; pdb.set_trace()

    # > attenuation factor sinogram
    attsino = mmrprj.frwd_prj(rmu,
                              scanner_params,
                              attenuation=True,
                              dev_out=True)

    nrmsino = np.ones(attsino.shape, dtype=np.float32)

    # > randoms and scatter put together
    if isinstance(randoms,
                  np.ndarray) and measured_sino.shape == randoms.shape:
        rsng = mmraux.remgaps(randoms, txLUT, Cnt)
    else:
        rsng = 1e-5 * np.ones((Cnt['Naw'], nsinos), dtype=np.float32)

    if isinstance(scatter,
                  np.ndarray) and measured_sino.shape == scatter.shape:
        ssng = mmraux.remgaps(scatter, txLUT, Cnt)
    else:
        ssng = 1e-5 * np.ones((Cnt['Naw'], nsinos), dtype=np.float32)

    # resolution modelling
    Cnt['SIGMA_RM'] = mmrrec.fwhm2sig(fwhm_rm, voxsize=Cnt['SZ_VOXZ'] *
                                      10) if fwhm_rm else 0

    if simulate_3d:
        log.debug('------ OSEM (%d) -------', nitr)

        # measured sinogram in GPU-enabled shape
        psng = mmraux.remgaps(measured_sino.astype(np.uint16), txLUT, Cnt)

        # > mask for reconstructed image.  anything outside it is set to zero
        msk = mmrimg.get_cylinder(
            Cnt, rad=msk_radius, xo=0, yo=0, unival=1, gpu_dim=True) > 0.9

        # > init image
        eimg = np.ones((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']),
                       dtype=np.float32)

        # ------------------------------------
        Sn = 14  # number of subsets

        # -get one subset to get number of projection bins in a subset
        Sprj, s = mmrrec.get_subsets14(0, scanner_params)
        Nprj = len(Sprj)

        # > init subset array and sensitivity image for a given subset
        sinoTIdx = np.zeros((Sn, Nprj + 1), dtype=np.int32)

        # > init sensitivity images for each subset
        sim = np.zeros((Sn, Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']),
                       dtype=np.float32)
        tmpsim = cu.zeros((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']),
                          dtype=np.float32)

        for n in trange(Sn,
                        desc="sensitivity",
                        leave=log.getEffectiveLevel() < logging.INFO):
            # first number of projection for the given subset
            sinoTIdx[n, 0] = Nprj
            sinoTIdx[n, 1:], s = mmrrec.get_subsets14(n, scanner_params)

            # > sensitivity image
            petprj.bprj(tmpsim.cuvec,
                        cu.asarray(attsino[sinoTIdx[n, 1:], :]).cuvec, txLUT,
                        axLUT, sinoTIdx[n, 1:], Cnt)
            sim[n] = tmpsim
        del tmpsim
        # -------------------------------------

        for _ in trange(nitr,
                        desc="OSEM",
                        disable=log.getEffectiveLevel() > logging.INFO,
                        leave=log.getEffectiveLevel() < logging.INFO):
            petprj.osem(eimg, psng, rsng, ssng, nrmsino, attsino, sinoTIdx,
                        sim, msk, psfkernel, txLUT, axLUT, Cnt)
        eim = mmrimg.convert2e7(eimg, Cnt)

    else:

        def psf(x, output=None):
            if Cnt['SIGMA_RM']:
                x = ndi.gaussian_filter(x,
                                        sigma=Cnt['SIGMA_RM'],
                                        mode='constant',
                                        output=None)
            return x

        # > estimated image, initialised to ones
        eim = np.ones(rmu.shape, dtype=np.float32)

        msk = mmrimg.get_cylinder(
            Cnt, rad=msk_radius, xo=0, yo=0, unival=1, gpu_dim=False) > 0.9

        # > sensitivity image for the EM-ML reconstruction
        sim = mmrprj.back_prj(attsino, scanner_params)
        sim_inv = 1 / psf(sim)
        sim_inv[~msk] = 0

        rndsct = rsng + ssng
        for _ in trange(nitr,
                        desc="MLEM",
                        disable=log.getEffectiveLevel() > logging.INFO,
                        leave=log.getEffectiveLevel() < logging.INFO):
            # > remove gaps from the measured sinogram
            # > then forward project the estimated image
            # > after which divide the measured sinogram
            # by the estimated sinogram (forward projected)
            crrsino = (
                mmraux.remgaps(measured_sino, txLUT, Cnt) /
                (mmrprj.frwd_prj(psf(eim), scanner_params, dev_out=True) +
                 rndsct))

            # > back project the correction factors sinogram
            bim = mmrprj.back_prj(crrsino, scanner_params)
            bim = psf(bim, output=bim)

            # > divide the back-projected image by the sensitivity image
            # > update the estimated image and remove NaNs
            eim *= bim * sim_inv
            eim[np.isnan(eim)] = 0

    return eim
Exemple #24
0
    def generate(  # pylint: disable=W0221
        self,
        x: np.ndarray,
        y: Optional[np.ndarray] = None,
        sample_sizes: Optional[np.ndarray] = None,
        automatically_append: bool = True,
        verify_input_data: bool = True,
        perturb_sizes: Optional[List[List[int]]] = None,
        perturb_starts: Optional[List[List[int]]] = None,
        **kwargs,
    ) -> np.ndarray:
        """
        Generates the adversarial examples. x needs to be composed of valid files by default which can support the
        adversarial perturbation and so are malicious and can support the assigned L0 budget. They can obtained by
        using `pull_out_valid_samples` on the data.

        This check on the input data can be over-ridden by toggling the flag verify_input_data
        This will result in only the data which can be made adversarial being perturbed and so the resulting batch will
        be a mixture of adversarial and unperturbed data.

        To assign the L0 budget we go through each list in perturb_sizes and perturb_starts in order, and
        assign the budget based on the sizes given until the l0 budget is exhausted.

        After all the regions marked in perturb_sizes and perturb_starts have been assigned and automatically_append is
        set to true and remaining l0 perturbation the extra perturbation is added at the end in an append style attack.

        :param x: A array with input data.
        :param y: (N, 1) binary labels to make sure the benign files are zero masked.
        :param sample_sizes: The size of the original file, before it was padded to the input size required by MalConv
        :param automatically_append: Whether to automatically append extra spare perturbation at the end of the file.
        :param verify_input_data: If to check that all the data supplied is valid for adversarial perturbations.
        :param perturb_sizes: A list of length batch size, each element is in itself a list containing
                              the size of the allowable perturbation region
        :param perturb_starts: A list of length batch size, each element is in itself a list containing
                               the start of perturbation region.
        :return x: our adversarial examples.
        """
        import tensorflow as tf  # lgtm [py/repeated-import]

        # make copy so original data is not modified.
        adv_x = x.copy()
        if sample_sizes is None:  # pragma: no cover
            raise ValueError(
                "The size of the original files needs to be supplied")
        if y is None:  # pragma: no cover
            raise ValueError(
                "Labels need to be provided so we only modify the malware")

        # check that the dimensions all match
        assert len(adv_x) == len(y)
        assert len(y) == len(sample_sizes)
        if perturb_sizes is not None:
            assert len(y) == len(perturb_sizes)
        if perturb_starts is not None:
            assert len(y) == len(perturb_starts)

        # check that if perturb_starts is provided perturb_sizes is also provided and vise versa
        if perturb_starts is not None:
            assert perturb_sizes is not None
        if perturb_sizes is not None:
            assert perturb_starts is not None

        # if we do not automatically append then make sure that we have supplied
        # start and end positions for the perturbation.
        if not automatically_append:
            assert perturb_sizes is not None
            assert perturb_starts is not None

        perturbation_size = np.zeros(len(sample_sizes), dtype=int)
        for i, sample_size in enumerate(sample_sizes):
            if self.l_0 < 1:  # l0 is a fraction of the filesize
                perturbation_size[i] = int(sample_size * self.l_0)
            else:  # or l0 is interpreted as total perturbation size
                perturbation_size[i] = int(self.l_0)
        self.total_perturbation = np.copy(perturbation_size)

        if perturb_sizes is not None and perturb_starts is not None:
            perturbation_size, perturb_sizes = self.compute_perturbation_regions(
                perturbation_size, perturb_sizes, automatically_append)

        y = self.check_valid_size(y, sample_sizes, perturbation_size)

        if verify_input_data:
            if np.sum(y) != len(y):
                raise ValueError(  # pragma: no cover
                    f"{len(y) - np.sum(y)} invalid samples found in batch which cannot support the assigned "
                    f"perturbation or are benign To filter for samples that can be processed use "
                    f"pull_out_valid_samples on the samples. Checking can be disabled by using verify_input_data"
                )

        adv_x = self.initialise_sample(adv_x,
                                       y,
                                       sample_sizes,
                                       perturbation_size,
                                       perturb_sizes=perturb_sizes,
                                       perturb_starts=perturb_starts)

        mask = self.generate_mask(adv_x,
                                  y,
                                  sample_sizes,
                                  perturbation_size,
                                  perturb_sizes=perturb_sizes,
                                  perturb_starts=perturb_starts)

        embeddings = tf.nn.embedding_lookup(params=self.embedding_weights,
                                            ids=adv_x.astype("int32"))

        for _ in trange(self.num_of_iterations,
                        desc="PE Adv. Malware",
                        disable=not self.verbose):
            gradients = self.estimator.class_gradient(embeddings, label=0)
            # go from (bsize x 1 x features x embedding size) -> (bsize x features x embedding size) in a
            # framework agnostic manner.
            gradients = gradients[:, 0, :, :]
            gradients = -1 * gradients
            embeddings = self.update_embeddings(embeddings, gradients, mask)

        adv_x = self.get_adv_malware(
            embeddings=embeddings,
            data=adv_x,
            labels=y,
            fsize=sample_sizes,
            perturbation_size=perturbation_size,
            perturb_sizes=perturb_sizes,
            perturb_starts=perturb_starts,
        )

        return adv_x
    def train(self,
              train_dataset,
              output_dir,
              multi_label=False,
              show_running_loss=True,
              eval_df=None,
              **kwargs):
        """
        Trains the model on train_dataset.

        Utility function to be used by the train_model() method. Not intended to be used directly.
        """

        tokenizer = self.tokenizer
        device = self.device
        model = self.model
        args = self.args

        tb_writer = SummaryWriter(logdir=args["tensorboard_dir"])
        train_sampler = RandomSampler(train_dataset)
        train_dataloader = DataLoader(train_dataset,
                                      sampler=train_sampler,
                                      batch_size=args["train_batch_size"])

        t_total = len(train_dataloader) // args[
            "gradient_accumulation_steps"] * args["num_train_epochs"]

        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [{
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args["weight_decay"]
        }, {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0
        }]

        warmup_steps = math.ceil(t_total * args["warmup_ratio"])
        args["warmup_steps"] = warmup_steps if args[
            "warmup_steps"] == 0 else args["warmup_steps"]

        optimizer = AdamW(optimizer_grouped_parameters,
                          lr=args["learning_rate"],
                          eps=args["adam_epsilon"])
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=args["warmup_steps"],
            num_training_steps=t_total)

        if args["fp16"]:
            try:
                from apex import amp
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
                )

            model, optimizer = amp.initialize(model,
                                              optimizer,
                                              opt_level=args["fp16_opt_level"])

        if args["n_gpu"] > 1:
            model = torch.nn.DataParallel(model)

        global_step = 0
        tr_loss, logging_loss = 0.0, 0.0
        model.zero_grad()
        train_iterator = trange(int(args["num_train_epochs"]),
                                desc="Epoch",
                                disable=args['silent'])
        epoch_number = 0
        if args['evaluate_during_training']:
            extra_metrics = {key: [] for key in kwargs}
            if multi_label:
                training_progress_scores = {
                    'global_step': [],
                    'LRAP': [],
                    'train_loss': [],
                    'eval_loss': [],
                    **extra_metrics
                }
            else:
                if self.model.num_labels == 2:
                    training_progress_scores = {
                        'global_step': [],
                        'tp': [],
                        'tn': [],
                        'fp': [],
                        'fn': [],
                        'mcc': [],
                        'train_loss': [],
                        'eval_loss': [],
                        **extra_metrics
                    }
                else:
                    training_progress_scores = {
                        'global_step': [],
                        'mcc': [],
                        'train_loss': [],
                        'eval_loss': [],
                        **extra_metrics
                    }

        if args['wandb_project']:
            wandb.init(project=args['wandb_project'], config={**args})
            wandb.watch(self.model)

        model.train()
        for _ in train_iterator:
            # epoch_iterator = tqdm(train_dataloader, desc="Iteration")
            for step, batch in enumerate(
                    tqdm(train_dataloader,
                         desc="Current iteration",
                         disable=args['silent'])):
                batch = tuple(t.to(device) for t in batch)

                inputs = self._get_inputs_dict(batch)
                outputs = model(**inputs)
                # model outputs are always tuple in pytorch-transformers (see doc)
                loss = outputs[0]

                if args['n_gpu'] > 1:
                    loss = loss.mean(
                    )  # mean() to average on multi-gpu parallel training

                current_loss = loss.item()

                if show_running_loss:
                    print("\rRunning loss: %f" % loss, end="")

                if args["gradient_accumulation_steps"] > 1:
                    loss = loss / args["gradient_accumulation_steps"]

                if args["fp16"]:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                    torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), args["max_grad_norm"])
                else:
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   args["max_grad_norm"])

                tr_loss += loss.item()
                if (step + 1) % args["gradient_accumulation_steps"] == 0:
                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    model.zero_grad()
                    global_step += 1

                    if args["logging_steps"] > 0 and global_step % args[
                            "logging_steps"] == 0:
                        # Log metrics
                        tb_writer.add_scalar("lr",
                                             scheduler.get_lr()[0],
                                             global_step)
                        tb_writer.add_scalar("loss", (tr_loss - logging_loss) /
                                             args["logging_steps"],
                                             global_step)
                        logging_loss = tr_loss
                        if args['wandb_project']:
                            wandb.log({
                                'Training loss': current_loss,
                                'lr': scheduler.get_lr()[0],
                                'global_step': global_step
                            })

                    if args["save_steps"] > 0 and global_step % args[
                            "save_steps"] == 0:
                        # Save model checkpoint
                        output_dir_current = os.path.join(
                            output_dir, "checkpoint-{}".format(global_step))

                        if not os.path.exists(output_dir_current):
                            os.makedirs(output_dir_current)

                        # Take care of distributed/parallel training
                        model_to_save = model.module if hasattr(
                            model, "module") else model
                        model_to_save.save_pretrained(output_dir_current)
                        self.tokenizer.save_pretrained(output_dir_current)

                    if args['evaluate_during_training'] and (
                            args["evaluate_during_training_steps"] > 0
                            and global_step %
                            args["evaluate_during_training_steps"] == 0):
                        # Only evaluate when single GPU otherwise metrics may not average well
                        results, _, _ = self.eval_model(eval_df,
                                                        verbose=True,
                                                        **kwargs)
                        for key, value in results.items():
                            tb_writer.add_scalar('eval_{}'.format(key), value,
                                                 global_step)

                        output_dir_current = os.path.join(
                            output_dir, "checkpoint-{}".format(global_step))

                        if not os.path.exists(output_dir_current):
                            os.makedirs(output_dir_current)

                        if args['save_eval_checkpoints']:
                            model_to_save = model.module if hasattr(
                                model, "module") else model
                            model_to_save.save_pretrained(output_dir_current)
                            self.tokenizer.save_pretrained(output_dir_current)

                        output_eval_file = os.path.join(
                            output_dir_current, "eval_results.txt")
                        with open(output_eval_file, "w") as writer:
                            for key in sorted(results.keys()):
                                writer.write("{} = {}\n".format(
                                    key, str(results[key])))

                        training_progress_scores['global_step'].append(
                            global_step)
                        training_progress_scores['train_loss'].append(
                            current_loss)
                        for key in results:
                            training_progress_scores[key].append(results[key])
                        report = pd.DataFrame(training_progress_scores)
                        report.to_csv(args['output_dir'] +
                                      'training_progress_scores.csv',
                                      index=False)

                        if args['wandb_project']:
                            wandb.log(
                                self._get_last_metrics(
                                    training_progress_scores))

            epoch_number += 1
            output_dir_current = os.path.join(output_dir,
                                              "epoch-{}".format(epoch_number))

            if not os.path.exists(output_dir_current):
                os.makedirs(output_dir_current)

            model_to_save = model.module if hasattr(model, "module") else model
            model_to_save.save_pretrained(output_dir_current)
            self.tokenizer.save_pretrained(output_dir_current)

            if args['evaluate_during_training']:
                results, _, _ = self.eval_model(eval_df,
                                                verbose=True,
                                                **kwargs)

                output_eval_file = os.path.join(output_dir_current,
                                                "eval_results.txt")
                with open(output_eval_file, "w") as writer:
                    for key in sorted(results.keys()):
                        writer.write("{} = {}\n".format(
                            key, str(results[key])))

        return global_step, tr_loss / global_step
    def train(self,
              train_dataset,
              output_dir,
              show_running_loss=True,
              eval_data=None):
        """
        Trains the model on train_dataset.

        Utility function to be used by the train_model() method. Not intended to be used directly.
        """

        tokenizer = self.tokenizer
        device = self.device
        model = self.model
        args = self.args

        tb_writer = SummaryWriter()
        train_sampler = RandomSampler(train_dataset)
        train_dataloader = DataLoader(train_dataset,
                                      sampler=train_sampler,
                                      batch_size=args["train_batch_size"])

        t_total = len(train_dataloader) // args[
            "gradient_accumulation_steps"] * args["num_train_epochs"]

        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [{
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args["weight_decay"]
        }, {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0
        }]

        warmup_steps = math.ceil(t_total * args["warmup_ratio"])
        args["warmup_steps"] = warmup_steps if args[
            "warmup_steps"] == 0 else args["warmup_steps"]

        optimizer = AdamW(optimizer_grouped_parameters,
                          lr=args["learning_rate"],
                          eps=args["adam_epsilon"])
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=args["warmup_steps"],
            num_training_steps=t_total)

        if args["fp16"]:
            try:
                from apex import amp
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
                )

            model, optimizer = amp.initialize(model,
                                              optimizer,
                                              opt_level=args["fp16_opt_level"])

        if args["n_gpu"] > 1:
            model = torch.nn.DataParallel(model)

        global_step = 0
        tr_loss, logging_loss = 0.0, 0.0
        model.zero_grad()
        train_iterator = trange(int(args["num_train_epochs"]),
                                desc="Epoch",
                                disable=args['silent'])

        model.train()
        for _ in train_iterator:
            # epoch_iterator = tqdm(train_dataloader, desc="Iteration")
            for step, batch in enumerate(
                    tqdm(train_dataloader,
                         desc="Current iteration",
                         disable=args['silent'])):
                batch = tuple(t.to(device) for t in batch)

                inputs = {
                    'input_ids': batch[0],
                    'attention_mask': batch[1],
                    'start_positions': batch[3],
                    'end_positions': batch[4]
                }

                if args['model_type'] != 'distilbert':
                    inputs['token_type_ids'] = None if args[
                        'model_type'] == 'xlm' else batch[2]
                if args['model_type'] in ['xlnet', 'xlm']:
                    inputs.update({'cls_index': batch[5], 'p_mask': batch[6]})

                outputs = model(**inputs)
                # model outputs are always tuple in pytorch-transformers (see doc)
                loss = outputs[0]
                if show_running_loss:
                    print("\rRunning loss: %f" % loss, end="")

                if args['n_gpu'] > 1:
                    loss = loss.mean(
                    )  # mean() to average on multi-gpu parallel training
                if args["gradient_accumulation_steps"] > 1:
                    loss = loss / args["gradient_accumulation_steps"]

                if args["fp16"]:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                    torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), args["max_grad_norm"])
                else:
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   args["max_grad_norm"])

                tr_loss += loss.item()
                if (step + 1) % args["gradient_accumulation_steps"] == 0:
                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    model.zero_grad()
                    global_step += 1

                    if args["logging_steps"] > 0 and global_step % args[
                            "logging_steps"] == 0:
                        # Log metrics
                        if args['evaluate_during_training']:
                            # Only evaluate when single GPU otherwise metrics may not average well
                            results, _, _ = self.eval_model(eval_data,
                                                            verbose=True)
                            for key, value in results.items():
                                tb_writer.add_scalar('eval_{}'.format(key),
                                                     value, global_step)
                        tb_writer.add_scalar("lr",
                                             scheduler.get_lr()[0],
                                             global_step)
                        tb_writer.add_scalar("loss", (tr_loss - logging_loss) /
                                             args["logging_steps"],
                                             global_step)
                        logging_loss = tr_loss

                    if args["save_steps"] > 0 and global_step % args[
                            "save_steps"] == 0:
                        # Save model checkpoint
                        output_dir_current = os.path.join(
                            output_dir, "checkpoint-{}".format(global_step))

                        if not os.path.exists(output_dir_current):
                            os.makedirs(output_dir_current)

                        # Take care of distributed/parallel training
                        model_to_save = model.module if hasattr(
                            model, "module") else model
                        model_to_save.save_pretrained(output_dir_current)
                        self.tokenizer.save_pretrained(output_dir_current)

        return global_step, tr_loss / global_step
Exemple #27
0
    def train(self, model_path: Optional[str] = None):
        """
        Main training entry point.

        Args:
            model_path:
                (Optional) Local path to model if model to train has been instantiated from a local path
                If present, we will try reloading the optimizer/scheduler states from there.
        """
        train_dataloader = self.get_train_dataloader()
        if self.args.max_steps > 0:
            t_total = self.args.max_steps
            num_train_epochs = (
                self.args.max_steps // (len(train_dataloader) // self.args.gradient_accumulation_steps) + 1
            )
        else:
            t_total = int(len(train_dataloader) // self.args.gradient_accumulation_steps * self.args.num_train_epochs)
            num_train_epochs = self.args.num_train_epochs

        optimizer, scheduler = self.get_optimizers(num_training_steps=t_total)

        # Check if saved optimizer or scheduler states exist
        if (
            model_path is not None
            and os.path.isfile(os.path.join(model_path, "optimizer.pt"))
            and os.path.isfile(os.path.join(model_path, "scheduler.pt"))
        ):
            # Load in optimizer and scheduler states
            optimizer.load_state_dict(torch.load(os.path.join(model_path, "optimizer.pt")))
            scheduler.load_state_dict(torch.load(os.path.join(model_path, "scheduler.pt")))

        model = self.model
        model.to(self.args.device)
        if self.args.fp16:
            if not is_apex_available():
                raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
            model, optimizer = amp.initialize(model, optimizer, opt_level=self.args.fp16_opt_level)

        # multi-gpu training (should be after apex fp16 initialization)
        if self.args.n_gpu > 1:
            model = torch.nn.DataParallel(model)

        # Distributed training (should be after apex fp16 initialization)
        if self.args.local_rank != -1:
            model = torch.nn.parallel.DistributedDataParallel(
                model,
                device_ids=[self.args.local_rank],
                output_device=self.args.local_rank,
                find_unused_parameters=True,
            )

        if self.tb_writer is not None:
            self.tb_writer.add_text("args", self.args.to_json_string())
            self.tb_writer.add_hparams(self.args.to_sanitized_dict(), metric_dict={})
        if is_wandb_available():
            self._setup_wandb()

        # Train!
        if is_tpu_available():
            total_train_batch_size = self.args.train_batch_size * xm.xrt_world_size()
        else:
            total_train_batch_size = (
                self.args.train_batch_size
                * self.args.gradient_accumulation_steps
                * (torch.distributed.get_world_size() if self.args.local_rank != -1 else 1)
            )
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", self.num_examples(train_dataloader))
        logger.info("  Num Epochs = %d", num_train_epochs)
        logger.info("  Instantaneous batch size per device = %d", self.args.per_gpu_train_batch_size)
        logger.info("  Total train batch size (w. parallel, distributed & accumulation) = %d", total_train_batch_size)
        logger.info("  Gradient Accumulation steps = %d", self.args.gradient_accumulation_steps)
        logger.info("  Total optimization steps = %d", t_total)

        global_step = 0
        epochs_trained = 0
        steps_trained_in_current_epoch = 0
        # Check if continuing training from a checkpoint
        if model_path is not None:
            # set global_step to global_step of last saved checkpoint from model path
            try:
                global_step = int(model_path.split("-")[-1].split("/")[0])
                epochs_trained = global_step // (len(train_dataloader) // self.args.gradient_accumulation_steps)
                steps_trained_in_current_epoch = global_step % (
                    len(train_dataloader) // self.args.gradient_accumulation_steps
                )

                logger.info("  Continuing training from checkpoint, will skip to saved global_step")
                logger.info("  Continuing training from epoch %d", epochs_trained)
                logger.info("  Continuing training from global step %d", global_step)
                logger.info("  Will skip the first %d steps in the first epoch", steps_trained_in_current_epoch)
            except ValueError:
                global_step = 0
                logger.info("  Starting fine-tuning.")

        tr_loss = 0.0
        logging_loss = 0.0
        model.zero_grad()
        train_iterator = trange(
            epochs_trained, int(num_train_epochs), desc="Epoch", disable=not self.is_local_master()
        )
        for epoch in train_iterator:
            epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=not self.is_local_master())
            for step, inputs in enumerate(epoch_iterator):

                # Skip past any already trained steps if resuming training
                if steps_trained_in_current_epoch > 0:
                    steps_trained_in_current_epoch -= 1
                    continue

                tr_loss += self._training_step(model, inputs, optimizer)

                if (step + 1) % self.args.gradient_accumulation_steps == 0 or (
                    # last step in epoch but step is always smaller than gradient_accumulation_steps
                    len(epoch_iterator) <= self.args.gradient_accumulation_steps
                    and (step + 1) == len(epoch_iterator)
                ):
                    if self.args.fp16:
                        torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), self.args.max_grad_norm)
                    else:
                        torch.nn.utils.clip_grad_norm_(model.parameters(), self.args.max_grad_norm)

                    if is_tpu_available():
                        xm.optimizer_step(optimizer)
                    else:
                        optimizer.step()

                    scheduler.step()
                    model.zero_grad()
                    global_step += 1

                    if self.is_local_master():
                        if (self.args.logging_steps > 0 and global_step % self.args.logging_steps == 0) or (
                            global_step == 1 and self.args.logging_first_step
                        ):
                            logs = {}
                            if self.args.evaluate_during_training:
                                results = self.evaluate()
                                for key, value in results.items():
                                    eval_key = "eval_{}".format(key)
                                    logs[eval_key] = value

                            loss_scalar = (tr_loss - logging_loss) / self.args.logging_steps
                            learning_rate_scalar = scheduler.get_last_lr()[0]
                            logs["learning_rate"] = learning_rate_scalar
                            logs["loss"] = loss_scalar
                            logging_loss = tr_loss

                            if self.tb_writer:
                                for k, v in logs.items():
                                    self.tb_writer.add_scalar(k, v, global_step)
                            if is_wandb_available():
                                wandb.log(logs, step=global_step)

                            epoch_iterator.write(json.dumps({**logs, **{"step": global_step}}))

                        if self.args.save_steps > 0 and global_step % self.args.save_steps == 0:
                            # In all cases (even distributed/parallel), self.model is always a reference
                            # to the model we want to save.
                            if hasattr(model, "module"):
                                assert model.module is self.model
                            else:
                                assert model is self.model
                            # Save model checkpoint
                            output_dir = os.path.join(self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{global_step}")

                            self.save_model(output_dir)
                            self._rotate_checkpoints()
                            torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt"))
                            torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt"))
                            logger.info("Saving optimizer and scheduler states to %s", output_dir)

                if self.args.max_steps > 0 and global_step > self.args.max_steps:
                    epoch_iterator.close()
                    break
            if self.args.max_steps > 0 and global_step > self.args.max_steps:
                train_iterator.close()
                break
            if self.args.tpu_metrics_debug:
                # tpu-comment: Logging debug metrics for PyTorch/XLA (compile, execute times, ops, etc.)
                xm.master_print(met.metrics_report())

        if self.tb_writer:
            self.tb_writer.close()

        logger.info("\n\nTraining completed. Do not forget to share your model on huggingface.co/models =)\n\n")
        return TrainOutput(global_step, tr_loss / global_step)
    def _attack(
        self,
        initial_sample: np.ndarray,
        original_sample: np.ndarray,
        y_p: int,
        target: int,
        initial_delta: float,
        initial_epsilon: float,
        clip_min: float,
        clip_max: float,
    ) -> np.ndarray:
        """
        Main function for the boundary attack.

        :param initial_sample: An initial adversarial example.
        :param original_sample: The original input.
        :param y_p: The predicted label of the original input.
        :param target: The target label.
        :param initial_delta: Initial step size for the orthogonal step.
        :param initial_epsilon: Initial step size for the step towards the target.
        :param clip_min: Minimum value of an example.
        :param clip_max: Maximum value of an example.
        :return: an adversarial example.
        """
        # Get initialization for some variables
        x_adv = initial_sample
        self.curr_delta = initial_delta
        self.curr_epsilon = initial_epsilon

        self.curr_adv = x_adv

        # Main loop to wander around the boundary
        for _ in trange(self.max_iter,
                        desc="Boundary attack - iterations",
                        disable=not self.verbose):
            # Trust region method to adjust delta
            for _ in range(self.num_trial):
                potential_advs = []
                for _ in range(self.sample_size):
                    potential_adv = x_adv + self._orthogonal_perturb(
                        self.curr_delta, x_adv, original_sample)
                    potential_adv = np.clip(potential_adv, clip_min, clip_max)
                    potential_advs.append(potential_adv)

                preds = np.argmax(
                    self.estimator.predict(np.array(potential_advs),
                                           batch_size=self.batch_size),
                    axis=1,
                )

                if self.targeted:
                    satisfied = preds == target
                else:
                    satisfied = preds != y_p

                delta_ratio = np.mean(satisfied)

                if delta_ratio < 0.2:
                    self.curr_delta *= self.step_adapt
                elif delta_ratio > 0.5:
                    self.curr_delta /= self.step_adapt

                if delta_ratio > 0:
                    x_advs = np.array(potential_advs)[np.where(satisfied)[0]]
                    break
            else:
                logger.warning("Adversarial example found but not optimal.")
                return x_adv

            # Trust region method to adjust epsilon
            for _ in range(self.num_trial):
                perturb = np.repeat(
                    np.array([original_sample]), len(x_advs), axis=0) - x_advs
                perturb *= self.curr_epsilon
                potential_advs = x_advs + perturb
                potential_advs = np.clip(potential_advs, clip_min, clip_max)
                preds = np.argmax(
                    self.estimator.predict(potential_advs,
                                           batch_size=self.batch_size),
                    axis=1,
                )

                if self.targeted:
                    satisfied = preds == target
                else:
                    satisfied = preds != y_p

                epsilon_ratio = np.mean(satisfied)

                if epsilon_ratio < 0.2:
                    self.curr_epsilon *= self.step_adapt
                elif epsilon_ratio > 0.5:
                    self.curr_epsilon /= self.step_adapt

                if epsilon_ratio > 0:
                    x_adv = self._best_adv(
                        original_sample,
                        potential_advs[np.where(satisfied)[0]])
                    self.curr_adv = x_adv
                    break
            else:
                logger.warning("Adversarial example found but not optimal.")
                return self._best_adv(original_sample, x_advs)

            if self.min_epsilon is not None and self.curr_epsilon < self.min_epsilon:
                return x_adv

        return x_adv
Exemple #29
0
    def train(
        self,
        train_dataset,
        output_dir,
        files_list=None,
        image_path=None,
        text_label=None,
        labels_label=None,
        images_label=None,
        image_type_extension=None,
        data_type_extension=None,
        show_running_loss=True,
        eval_data=None,
        verbose=True,
        **kwargs,
    ):
        """
        Trains the model on train_dataset.

        Utility function to be used by the train_model() method. Not intended to be used directly.
        """

        device = self.device
        model = self.model
        args = self.args
        multi_label = self.multi_label

        tb_writer = SummaryWriter(logdir=args.tensorboard_dir)
        train_sampler = RandomSampler(train_dataset)
        train_dataloader = DataLoader(
            train_dataset,
            sampler=train_sampler,
            batch_size=args.train_batch_size,
            collate_fn=collate_fn,
            num_workers=args.process_count,
        )

        t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs

        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": args.weight_decay,
            },
            {
                "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": 0.0,
            },
        ]

        warmup_steps = math.ceil(t_total * args.warmup_ratio)
        args.warmup_steps = warmup_steps if args.warmup_steps == 0 else args.warmup_steps

        optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total
        )

        if args.fp16:
            try:
                from apex import amp
            except ImportError:
                raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")

            model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level)

        if args.n_gpu > 1:
            model = torch.nn.DataParallel(model)

        global_step = 0
        tr_loss, logging_loss = 0.0, 0.0
        model.zero_grad()
        train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.silent)
        epoch_number = 0
        best_eval_metric = None
        early_stopping_counter = 0

        if args.evaluate_during_training:
            training_progress_scores = self._create_training_progress_scores(multi_label, **kwargs)

        if args.wandb_project:
            wandb.init(project=args.wandb_project, config={**args}, **args.wandb_kwargs)
            wandb.watch(self.model)

        model.train()
        for _ in train_iterator:
            train_iterator.set_description(f"Epoch {epoch_number} of {args.num_train_epochs}")
            for step, batch in enumerate(
                tqdm(train_dataloader, desc=f"Running Epoch {epoch_number}", disable=args.silent)
            ):
                batch = tuple(t.to(device) for t in batch)
                labels = batch[5]

                inputs = self._get_inputs_dict(batch)
                outputs = model(**inputs)
                # model outputs are always tuple in pytorch-transformers (see doc)
                logits = outputs[0]  # Different from default behaviour
                loss = self.criterion(logits, labels)

                if args.n_gpu > 1:
                    loss = loss.mean()  # mean() to average on multi-gpu parallel training

                current_loss = loss.item()

                if show_running_loss:
                    print("\rRunning loss: %f" % loss, end="")

                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps

                if args.fp16:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                    # torch.nn.utils.clip_grad_norm_(
                    #     amp.master_params(optimizer), args.max_grad_norm
                    # )
                else:
                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(
                    #     model.parameters(), args.max_grad_norm
                    # )

                tr_loss += loss.item()
                if (step + 1) % args.gradient_accumulation_steps == 0:
                    if args.fp16:
                        torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm)
                    else:
                        torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)

                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    model.zero_grad()
                    global_step += 1

                    if args.logging_steps > 0 and global_step % args.logging_steps == 0:
                        # Log metrics
                        tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
                        tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step)
                        logging_loss = tr_loss
                        if args.wandb_project:
                            wandb.log(
                                {
                                    "Training loss": current_loss,
                                    "lr": scheduler.get_lr()[0],
                                    "global_step": global_step,
                                }
                            )

                    if args.save_steps > 0 and global_step % args.save_steps == 0:
                        # Save model checkpoint
                        output_dir_current = os.path.join(output_dir, "checkpoint-{}".format(global_step))

                        self._save_model(output_dir_current, model=model)

                    if args.evaluate_during_training and (
                        args.evaluate_during_training_steps > 0
                        and global_step % args.evaluate_during_training_steps == 0
                    ):
                        # Only evaluate when single GPU otherwise metrics may not average well
                        results, _ = self.eval_model(
                            eval_data,
                            files_list=files_list,
                            image_path=image_path,
                            text_label=text_label,
                            labels_label=labels_label,
                            images_label=images_label,
                            image_type_extension=image_type_extension,
                            data_type_extension=data_type_extension,
                            verbose=verbose and args.evaluate_during_training_verbose,
                            silent=args.evaluate_during_training_silent,
                            **kwargs,
                        )
                        for key, value in results.items():
                            tb_writer.add_scalar("eval_{}".format(key), value, global_step)

                        output_dir_current = os.path.join(output_dir, "checkpoint-{}".format(global_step))

                        if args.save_eval_checkpoints:
                            self._save_model(output_dir_current, model=model, results=results)

                        training_progress_scores["global_step"].append(global_step)
                        training_progress_scores["train_loss"].append(current_loss)
                        for key in results:
                            training_progress_scores[key].append(results[key])
                        report = pd.DataFrame(training_progress_scores)
                        report.to_csv(
                            os.path.join(args.output_dir, "training_progress_scores.csv"), index=False,
                        )

                        if args.wandb_project:
                            wandb.log(self._get_last_metrics(training_progress_scores))

                        if not best_eval_metric:
                            best_eval_metric = results[args.early_stopping_metric]
                            self._save_model(args.best_model_dir, model=model, results=results)
                        if best_eval_metric and args.early_stopping_metric_minimize:
                            if results[args.early_stopping_metric] - best_eval_metric < args.early_stopping_delta:
                                best_eval_metric = results[args.early_stopping_metric]
                                self._save_model(args.best_model_dir, model=model, results=results)
                                early_stopping_counter = 0
                            else:
                                if args.use_early_stopping:
                                    if early_stopping_counter < args.early_stopping_patience:
                                        early_stopping_counter += 1
                                        if verbose:
                                            logger.info(f" No improvement in {args.early_stopping_metric}")
                                            logger.info(f" Current step: {early_stopping_counter}")
                                            logger.info(f" Early stopping patience: {args.early_stopping_patience}")
                                    else:
                                        if verbose:
                                            logger.info(f" Patience of {args.early_stopping_patience} steps reached")
                                            logger.info(" Training terminated.")
                                            train_iterator.close()
                                        return global_step, tr_loss / global_step
                        else:
                            if results[args.early_stopping_metric] - best_eval_metric > args.early_stopping_delta:
                                best_eval_metric = results[args.early_stopping_metric]
                                self._save_model(args.best_model_dir, model=model, results=results)
                                early_stopping_counter = 0
                            else:
                                if args.use_early_stopping:
                                    if early_stopping_counter < args.early_stopping_patience:
                                        early_stopping_counter += 1
                                        if verbose:
                                            logger.info(f" No improvement in {args.early_stopping_metric}")
                                            logger.info(f" Current step: {early_stopping_counter}")
                                            logger.info(f" Early stopping patience: {args.early_stopping_patience}")
                                    else:
                                        if verbose:
                                            logger.info(f" Patience of {args.early_stopping_patience} steps reached")
                                            logger.info(" Training terminated.")
                                            train_iterator.close()
                                        return global_step, tr_loss / global_step

            epoch_number += 1
            output_dir_current = os.path.join(output_dir, "checkpoint-{}-epoch-{}".format(global_step, epoch_number))

            if args.save_model_every_epoch or args.evaluate_during_training:
                os.makedirs(output_dir_current, exist_ok=True)

            if args.save_model_every_epoch:
                self._save_model(output_dir_current, model=model)

            if args.evaluate_during_training:
                results, _ = self.eval_model(
                    eval_data,
                    files_list=files_list,
                    image_path=image_path,
                    text_label=text_label,
                    labels_label=labels_label,
                    images_label=images_label,
                    image_type_extension=image_type_extension,
                    data_type_extension=data_type_extension,
                    verbose=verbose and args.evaluate_during_training_verbose,
                    silent=args.evaluate_during_training_silent,
                    **kwargs,
                )

                self._save_model(output_dir_current, results=results)

                training_progress_scores["global_step"].append(global_step)
                training_progress_scores["train_loss"].append(current_loss)
                for key in results:
                    training_progress_scores[key].append(results[key])
                report = pd.DataFrame(training_progress_scores)
                report.to_csv(
                    os.path.join(args.output_dir, "training_progress_scores.csv"), index=False,
                )

                if not best_eval_metric:
                    best_eval_metric = results[args.early_stopping_metric]
                    self._save_model(args.best_model_dir, model=model, results=results)
                if best_eval_metric and args.early_stopping_metric_minimize:
                    if results[args.early_stopping_metric] - best_eval_metric < args.early_stopping_delta:
                        best_eval_metric = results[args.early_stopping_metric]
                        self._save_model(args.best_model_dir, model=model, results=results)
                        early_stopping_counter = 0
                    else:
                        if args.use_early_stopping and args.early_stopping_consider_epochs:
                            if early_stopping_counter < args.early_stopping_patience:
                                early_stopping_counter += 1
                                if verbose:
                                    logger.info(f" No improvement in {args.early_stopping_metric}")
                                    logger.info(f" Current step: {early_stopping_counter}")
                                    logger.info(f" Early stopping patience: {args.early_stopping_patience}")
                            else:
                                if verbose:
                                    logger.info(f" Patience of {args.early_stopping_patience} steps reached")
                                    logger.info(" Training terminated.")
                                    train_iterator.close()
                                return global_step, tr_loss / global_step
                else:
                    if results[args.early_stopping_metric] - best_eval_metric > args.early_stopping_delta:
                        best_eval_metric = results[args.early_stopping_metric]
                        self._save_model(args.best_model_dir, model=model, results=results)
                        early_stopping_counter = 0
                    else:
                        if args.use_early_stopping and args.early_stopping_consider_epochs:
                            if early_stopping_counter < args.early_stopping_patience:
                                early_stopping_counter += 1
                                if verbose:
                                    logger.info(f" No improvement in {args.early_stopping_metric}")
                                    logger.info(f" Current step: {early_stopping_counter}")
                                    logger.info(f" Early stopping patience: {args.early_stopping_patience}")
                            else:
                                if verbose:
                                    logger.info(f" Patience of {args.early_stopping_patience} steps reached")
                                    logger.info(" Training terminated.")
                                    train_iterator.close()
                                return global_step, tr_loss / global_step

        return global_step, tr_loss / global_step
Exemple #30
0
    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,). If `self.targeted` is true, then `y_val` represents the target labels. Otherwise, the
                  targets are the original class labels.
        :return: An array holding the adversarial examples.
        """
        y = check_and_transform_label_format(y, self.estimator.nb_classes)
        x_adv = x.astype(ART_NUMPY_DTYPE)

        if self.estimator.clip_values is not None:
            clip_min_per_pixel, clip_max_per_pixel = self.estimator.clip_values
        else:
            clip_min_per_pixel, clip_max_per_pixel = np.amin(x), np.amax(x)

        # Assert that, if attack is targeted, y_val is provided:
        if self.targeted and y is None:
            raise ValueError("Target labels `y` need to be provided for a targeted attack.")

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(self.estimator.predict(x, batch_size=self.batch_size))

        # Compute perturbation with implicit batching
        nb_batches = int(np.ceil(x_adv.shape[0] / float(self.batch_size)))
        for batch_id in trange(nb_batches, desc="C&W L_inf", disable=not self.verbose):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
            x_batch = x_adv[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]

            # Determine values for later clipping
            clip_min = np.clip(x_batch - self.eps, clip_min_per_pixel, clip_max_per_pixel)
            clip_max = np.clip(x_batch + self.eps, clip_min_per_pixel, clip_max_per_pixel)

            # The optimization is performed in tanh space to keep the
            # adversarial images bounded from clip_min and clip_max.
            x_batch_tanh = original_to_tanh(x_batch, clip_min, clip_max, self._tanh_smoother)

            # Initialize perturbation in tanh space:
            x_adv_batch = x_batch.copy()
            x_adv_batch_tanh = x_batch_tanh.copy()

            # Initialize optimization:
            z_logits, loss = self._loss(x_adv_batch, y_batch)
            attack_success = loss <= 0
            learning_rate = self.learning_rate * np.ones(x_batch.shape[0])

            for i_iter in range(self.max_iter):
                logger.debug("Iteration step %i out of %i", i_iter, self.max_iter)
                logger.debug("Average Loss: %f", np.mean(loss))

                logger.debug(
                    "Successful attack samples: %i out of %i", int(np.sum(attack_success)), x_batch.shape[0],
                )

                # only continue optimization for those samples where attack hasn't succeeded yet:
                active = ~attack_success
                if np.sum(active) == 0:
                    break

                # compute gradient:
                logger.debug("Compute loss gradient")
                perturbation_tanh = -self._loss_gradient(
                    z_logits[active],
                    y_batch[active],
                    x_adv_batch[active],
                    x_adv_batch_tanh[active],
                    clip_min[active],
                    clip_max[active],
                )

                # perform line search to optimize perturbation
                # first, halve the learning rate until perturbation actually decreases the loss:
                prev_loss = loss.copy()
                best_loss = loss.copy()
                best_lr = np.zeros(x_batch.shape[0])
                halving = np.zeros(x_batch.shape[0])

                for i_halve in range(self.max_halving):
                    logger.debug(
                        "Perform halving iteration %i out of %i", i_halve, self.max_halving,
                    )
                    do_halving = loss[active] >= prev_loss[active]
                    logger.debug("Halving to be performed on %i samples", int(np.sum(do_halving)))
                    if np.sum(do_halving) == 0:
                        break
                    active_and_do_halving = active.copy()
                    active_and_do_halving[active] = do_halving

                    lr_mult = learning_rate[active_and_do_halving]
                    for _ in range(len(x.shape) - 1):
                        lr_mult = lr_mult[:, np.newaxis]

                    adv_10 = x_adv_batch_tanh[active_and_do_halving]
                    new_x_adv_batch_tanh = adv_10 + lr_mult * perturbation_tanh[do_halving]

                    new_x_adv_batch = tanh_to_original(
                        new_x_adv_batch_tanh, clip_min[active_and_do_halving], clip_max[active_and_do_halving],
                    )
                    _, loss[active_and_do_halving] = self._loss(new_x_adv_batch, y_batch[active_and_do_halving])
                    logger.debug("New Average Loss: %f", np.mean(loss))
                    logger.debug("Loss: %s", str(loss))
                    logger.debug("Prev_loss: %s", str(prev_loss))
                    logger.debug("Best_loss: %s", str(best_loss))

                    best_lr[loss < best_loss] = learning_rate[loss < best_loss]
                    best_loss[loss < best_loss] = loss[loss < best_loss]
                    learning_rate[active_and_do_halving] /= 2
                    halving[active_and_do_halving] += 1
                learning_rate[active] *= 2

                # if no halving was actually required, double the learning rate as long as this
                # decreases the loss:
                for i_double in range(self.max_doubling):
                    logger.debug(
                        "Perform doubling iteration %i out of %i", i_double, self.max_doubling,
                    )
                    do_doubling = (halving[active] == 1) & (loss[active] <= best_loss[active])
                    logger.debug(
                        "Doubling to be performed on %i samples", int(np.sum(do_doubling)),
                    )
                    if np.sum(do_doubling) == 0:
                        break
                    active_and_do_doubling = active.copy()
                    active_and_do_doubling[active] = do_doubling
                    learning_rate[active_and_do_doubling] *= 2

                    lr_mult = learning_rate[active_and_do_doubling]
                    for _ in range(len(x.shape) - 1):
                        lr_mult = lr_mult[:, np.newaxis]

                    x_adv15 = x_adv_batch_tanh[active_and_do_doubling]
                    new_x_adv_batch_tanh = x_adv15 + lr_mult * perturbation_tanh[do_doubling]
                    new_x_adv_batch = tanh_to_original(
                        new_x_adv_batch_tanh, clip_min[active_and_do_doubling], clip_max[active_and_do_doubling],
                    )
                    _, loss[active_and_do_doubling] = self._loss(new_x_adv_batch, y_batch[active_and_do_doubling])
                    logger.debug("New Average Loss: %f", np.mean(loss))
                    best_lr[loss < best_loss] = learning_rate[loss < best_loss]
                    best_loss[loss < best_loss] = loss[loss < best_loss]

                learning_rate[halving == 1] /= 2

                update_adv = best_lr[active] > 0
                logger.debug(
                    "Number of adversarial samples to be finally updated: %i", int(np.sum(update_adv)),
                )

                if np.sum(update_adv) > 0:
                    active_and_update_adv = active.copy()
                    active_and_update_adv[active] = update_adv
                    best_lr_mult = best_lr[active_and_update_adv]
                    for _ in range(len(x.shape) - 1):
                        best_lr_mult = best_lr_mult[:, np.newaxis]

                    best_13 = best_lr_mult * perturbation_tanh[update_adv]
                    x_adv_batch_tanh[active_and_update_adv] = x_adv_batch_tanh[active_and_update_adv] + best_13
                    x_adv_batch[active_and_update_adv] = tanh_to_original(
                        x_adv_batch_tanh[active_and_update_adv],
                        clip_min[active_and_update_adv],
                        clip_max[active_and_update_adv],
                    )
                    (z_logits[active_and_update_adv], loss[active_and_update_adv],) = self._loss(
                        x_adv_batch[active_and_update_adv], y_batch[active_and_update_adv],
                    )
                    attack_success = loss <= 0

            # Update depending on attack success:
            x_adv_batch[~attack_success] = x_batch[~attack_success]
            x_adv[batch_index_1:batch_index_2] = x_adv_batch

        logger.info(
            "Success rate of C&W L_inf attack: %.2f%%",
            100 * compute_success(self.estimator, x, y, x_adv, self.targeted, batch_size=self.batch_size),
        )

        return x_adv
Exemple #31
0
 def _sample_chain(self, rng, n_sample, init_state, chain_var_funcs,
                   chain_index, parallel_chains, memmap_enabled,
                   memmap_path):
     if not isinstance(init_state, ChainState):
         state = ChainState(init_state)
     else:
         state = init_state
     chain_stats = self._init_chain_stats(
         n_sample, memmap_enabled, memmap_path, chain_index)
     # Initialise chain variable trace arrays
     chains = {}
     for key, chain_func in chain_var_funcs.items():
         var = chain_func(state)
         if memmap_enabled:
             filename = self._generate_memmap_filename(
                 memmap_path, 'trace', key, chain_index)
             chains[key] = self._open_new_memmap(
                 filename, (n_sample,) + var.shape, np.float64, np.nan)
         else:
             chains[key] = np.full((n_sample,) + var.shape, np.nan)
     total_return_nbytes = get_size(chain_stats) + get_size(chains)
     # Check if running in parallel and if total number of bytes to be
     # returned exceeds pickle limit
     if parallel_chains and total_return_nbytes > 2**31 - 1:
         raise RuntimeError(
             f'Total number of bytes allocated for arrays to be returned '
             f'({total_return_nbytes / 2**30:.2f} GiB) exceeds size limit '
             f'for returning results of a process (2 GiB). Try rerunning '
             f'with chain memory-mapping enabled (`memmap_enabled=True`).')
     if TQDM_AVAILABLE:
         desc = ('Sampling' if chain_index is None
                 else f'Chain {chain_index}')
         position = chain_index if parallel_chains else None
         sample_range = tqdm.trange(
             n_sample, desc=desc, unit='it', dynamic_ncols=True,
             position=position)
     else:
         sample_range = range(n_sample)
     try:
         for sample_index in sample_range:
             for trans_key, transition in self.transitions.items():
                 state, trans_stats = transition.sample(state, rng)
                 if trans_stats is not None:
                     if trans_key not in chain_stats:
                         logger.warning(
                             f'Transition {trans_key} returned statistics '
                             f'but has no `statistic_types` attribute.')
                     for key, val in trans_stats.items():
                         if key in chain_stats[trans_key]:
                             chain_stats[trans_key][key][sample_index] = val
             for key, chain_func in chain_var_funcs.items():
                 var = chain_func(state)
                 chains[key][sample_index] = var
     except KeyboardInterrupt:
         if memmap_enabled:
             for chain in chains.values:
                 chain.flush()
             for trans_stats in chain_stats.values():
                 for stat in trans_stats.values():
                     stat.flush()
     else:
         # If not interrupted increment sample_index so that it equals
         # n_sample to flag chain completed sampling
         sample_index += 1
     if parallel_chains and memmap_enabled:
             trace_filenames = self._memmaps_to_filenames(chains)
             stats_filenames = self._memmaps_to_filenames(chain_stats)
             return trace_filenames, stats_filenames, sample_index
     return chains, chain_stats, sample_index