Exemple #1
0
    def train_queue(self,
                    queue,
                    optimizer,
                    criterion=lambda i, l, t: nn.CrossEntropyLoss()(l, t),
                    eval_criterions=None,
                    steps=1,
                    **kwargs):
        assert steps > 0
        self._set_mode("train")

        average_ans = None
        for _ in range(steps):
            data = next(queue)
            data = (data[0].to(self.get_device()),
                    data[1].to(self.get_device()))
            _, targets = data
            outputs = self.forward_data(*data, **kwargs)
            loss = criterion(data[0], outputs, targets)
            if eval_criterions:
                ans = utils.flatten_list(
                    [c(data[0], outputs, targets) for c in eval_criterions])
                if average_ans is None:
                    average_ans = ans
                else:
                    average_ans = [s + a for s, a in zip(average_ans, ans)]
            self.zero_grad()
            loss.backward()
            optimizer.step()
            self.clear_cache()

        if eval_criterions:
            return [s / steps for s in average_ans]
        return []
Exemple #2
0
    def eval_queue(self,
                   queue,
                   criterions,
                   steps=1,
                   mode="eval",
                   aggregate_fns=None,
                   **kwargs):
        self._set_mode(mode)

        aggr_ans = []
        context = torch.no_grad if self.eval_no_grad else nullcontext
        with context():
            for _ in range(steps):
                data = next(queue)
                # print("{}/{}\r".format(i, steps), end="")
                data = _to_device(data, self.get_device())
                outputs = self.forward_data(data[0], **kwargs)
                self._set_mode("eval")  # mAP only is calculated in "eval" mode
                ans = utils.flatten_list(
                    [c(data[0], outputs, data[1]) for c in criterions])
                aggr_ans.append(ans)
                self._set_mode(mode)
        aggr_ans = np.asarray(aggr_ans).transpose()
        if aggregate_fns is None:
            # by default, aggregate batch rewards with MEAN
            aggregate_fns = [lambda perfs: np.mean(perfs) if len(perfs) > 0 else 0.]\
                * len(aggr_ans)
        return [aggr_fn(ans) for aggr_fn, ans in zip(aggregate_fns, aggr_ans)]
Exemple #3
0
    def eval_queue(self,
                   queue,
                   criterions,
                   steps=1,
                   mode="eval",
                   aggregate_fns=None,
                   **kwargs):
        # BN running statistics calibration
        if self.calib_bn_batch > 0:
            calib_data = [next(queue) for _ in range(self.calib_bn_batch)]
            self.calib_bn(calib_data)

        self._set_mode(mode)

        aggr_ans = []
        context = torch.no_grad if self.eval_no_grad else nullcontext
        with context():
            for i in range(steps):
                if i < self.calib_bn_batch:
                    data = calib_data[i]
                else:
                    data = next(queue)
                data = _to_device(data, self.get_device())
                outputs = self.forward_data(data[0], **kwargs)
                ans = utils.flatten_list(
                    [c(data[0], outputs, data[1]) for c in criterions])
                aggr_ans.append(ans)
        aggr_ans = np.asarray(aggr_ans).transpose()
        if aggregate_fns is None:
            # by default, aggregate batch rewards with MEAN
            aggregate_fns = [lambda perfs: np.mean(perfs) if len(perfs) > 0 else 0.]\
                            * len(aggr_ans)
        return [aggr_fn(ans) for aggr_fn, ans in zip(aggregate_fns, aggr_ans)]
Exemple #4
0
    def eval_data(self, data, criterions, mode="eval", **kwargs): #pylint: disable=arguments-differ
        """
        Override eval_data, to enable gradient.

        Returns:
           results (list of results return by criterions)
        """
        self._set_mode(mode)

        outputs = self.forward_data(data[0], **kwargs)
        return utils.flatten_list([c(data[0], outputs, data[1]) for c in criterions])
Exemple #5
0
    def eval_data(self, data, criterions, mode="eval", **kwargs):
        """
        Returns:
           results (list of results return by criterions)
        """
        self._set_mode(mode)

        context = torch.no_grad if self.eval_no_grad else nullcontext
        with context():
            outputs = self.forward_data(data[0], **kwargs)
            return utils.flatten_list([c(data[0], outputs, data[1]) for c in criterions])
    def eval_data(self, data, criterions, mode="eval", **kwargs):  # pylint: disable=arguments-differ
        """
        Override eval_data, to enable gradient.

        Returns:
           results (list of results return by criterions)
        """
        self._set_mode(mode)

        outputs = self.forward_data(data[0])
        # kwargs is detach_arch: False, since here the forward has no arg detach-arch, so not using the kwargs

        return utils.flatten_list(
            [c(data[0], outputs, data[1]) for c in criterions])
Exemple #7
0
    def gradient(self,
                 data,
                 criterion=lambda i, l, t: nn.CrossEntropyLoss()(l, t),
                 parameters=None,
                 eval_criterions=None,
                 mode="train",
                 zero_grads=True,
                 return_grads=True,
                 **kwargs):
        """Get the gradient with respect to the candidate net parameters.

        Args:
            parameters (optional): if specificied, can be a dict of param_name: param,
            or a list of parameter name.
        Returns:
            grads (dict of name: grad tensor)
        """
        self._set_mode(mode)

        if return_grads:
            active_parameters = dict(self.named_parameters())
            if parameters is not None:
                _parameters = dict(parameters)
                _addi = set(_parameters.keys()).difference(active_parameters)
                assert not _addi,\
                    ("Cannot get gradient of parameters that are not active "
                     "in this candidate net: {}")\
                        .format(", ".join(_addi))
            else:
                _parameters = active_parameters
        _, targets = data
        outputs = self.forward_data(*data, **kwargs)
        loss = criterion(data[0], outputs, targets)
        if zero_grads:
            self.zero_grad()
        loss.backward()

        if not return_grads:
            grads = None
        else:
            grads = [(k, v.grad.clone()) for k, v in six.iteritems(_parameters)\
                     if v.grad is not None]

        if eval_criterions:
            eval_res = utils.flatten_list(
                [c(data[0], outputs, targets) for c in eval_criterions])
            return grads, eval_res
        return grads
Exemple #8
0
    def eval_queue(self, queue, criterions, steps=1, mode="eval", **kwargs):
        self._set_mode(mode)

        average_ans = None
        context = torch.no_grad if self.eval_no_grad else nullcontext
        with context():
            for _ in range(steps):
                data = next(queue)
                # print("{}/{}\r".format(i, steps), end="")
                data = (data[0].to(self.get_device()), data[1].to(self.get_device()))
                outputs = self.forward_data(data[0], **kwargs)
                ans = utils.flatten_list([c(data[0], outputs, data[1]) for c in criterions])
                if average_ans is None:
                    average_ans = ans
                else:
                    average_ans = [s + a for s, a in zip(average_ans, ans)]
        return [s / steps for s in average_ans]
Exemple #9
0
    def _init_criterions(self, rollout_type):
        # criterion and forward keyword arguments for evaluating rollout in `evaluate_rollout`

        # support compare rollout
        assert "differentiable" in rollout_type

        # NOTE: only handle differentiable rollout differently
        self._reward_func = partial(
            self.objective.get_loss,
            add_controller_regularization=True,
            add_evaluator_regularization=False,
        )
        self._reward_kwargs = {"detach_arch": False}
        self._scalar_reward_func = lambda *args, **kwargs: utils.get_numpy(
            self._reward_func(*args, **kwargs)
        )

        self._perf_names = self.objective.perf_names()
        self._all_perf_names = utils.flatten_list(["reward", "loss", self._perf_names])
        # criterion funcs for meta parameter training
        self._eval_loss_func = partial(
            self.objective.get_loss,
            add_controller_regularization=False,
            add_evaluator_regularization=True,
        )
        # criterion funcs for log/report
        self._report_loss_funcs = [
            partial(
                self.objective.get_loss_item,
                add_controller_regularization=False,
                add_evaluator_regularization=False,
            ),
            self.objective.get_perfs,
        ]
        self._criterions_related_attrs = [
            "_reward_func",
            "_reward_kwargs",
            "_scalar_reward_func",
            "_reward_kwargs",
            "_perf_names",
            "_eval_loss_func",
            "_report_loss_funcs",
        ]
Exemple #10
0
    def train_queue(self,
                    queue,
                    optimizer,
                    criterion=lambda i, l, t: nn.CrossEntropyLoss()(l, t),
                    eval_criterions=None,
                    steps=1,
                    aggregate_fns=None,
                    **kwargs):
        assert steps > 0

        self._set_mode("train")

        aggr_ans = []
        for _ in range(steps):
            data = next(queue)
            data = _to_device(data, self.get_device())
            _, targets = data
            outputs = self.forward_data(*data, **kwargs)
            loss = criterion(data[0], outputs, targets)
            if eval_criterions:
                ans = utils.flatten_list(
                    [c(data[0], outputs, targets) for c in eval_criterions])
                aggr_ans.append(ans)
            self.zero_grad()
            loss.backward()
            optimizer.step()
            self.clear_cache()

        if eval_criterions:
            aggr_ans = np.asarray(aggr_ans).transpose()
            if aggregate_fns is None:
                # by default, aggregate batch rewards with MEAN
                aggregate_fns = [
                    lambda perfs: np.mean(perfs) if len(perfs) > 0 else 0.0
                ] * len(aggr_ans)
            return [
                aggr_fn(ans) for aggr_fn, ans in zip(aggregate_fns, aggr_ans)
            ]
        return []
Exemple #11
0
    def _init_criterions(self, rollout_type):
        # criterion and forward keyword arguments for evaluating rollout in `evaluate_rollout`

        # support compare rollout
        if rollout_type == "compare":
            # init criterions according to weights manager's rollout type
            rollout_type = self.weights_manager.rollout_type

        self._reward_func = self.objective.get_reward
        self._reward_kwargs = {}
        self._scalar_reward_func = self._reward_func

        self._perf_names = self.objective.perf_names()
        self._all_perf_names = utils.flatten_list(["reward", "loss", self._perf_names])
        # criterion funcs for meta parameter training
        self._eval_loss_func = partial(
            self.objective.get_loss,
            add_controller_regularization=False,
            add_evaluator_regularization=True,
        )
        # criterion funcs for log/report
        self._report_loss_funcs = [
            partial(
                self.objective.get_loss_item,
                add_controller_regularization=False,
                add_evaluator_regularization=False,
            ),
            self.objective.get_perfs,
        ]
        self._criterions_related_attrs = [
            "_reward_func",
            "_reward_kwargs",
            "_scalar_reward_func",
            "_reward_kwargs",
            "_perf_names",
            "_eval_loss_func",
            "_report_loss_funcs",
        ]
    def eval_queue(self,
                   queue,
                   criterions,
                   steps=1,
                   mode="eval",
                   aggregate_fns=None,
                   **kwargs):
        # BN running statistics calibration
        if self.calib_bn_num > 0:
            # check `calib_bn_num` first
            calib_num = 0
            calib_data = []
            calib_batch = 0
            while calib_num < self.calib_bn_num:
                if calib_batch == steps:
                    utils.getLogger("robustness plugin.{}".format(self.__class__.__name__)).warn(
                        "steps (%d) reached, true calib bn num (%d)", calib_num, steps)
                    break
                calib_data.append(next(queue))
                calib_num += len(calib_data[-1][1])
                calib_batch += 1
            self.calib_bn(calib_data)
        elif self.calib_bn_batch > 0:
            if self.calib_bn_batch > steps:
                utils.getLogger("robustness plugin.{}".format(self.__class__.__name__)).warn(
                    "eval steps (%d) < `calib_bn_batch` (%d). Only use %d batches.",
                    steps, self.calib_bn_steps, steps)
                calib_bn_batch = steps
            else:
                calib_bn_batch = self.calib_bn_batch
            # check `calib_bn_batch` then
            calib_data = [next(queue) for _ in range(calib_bn_batch)]
            self.calib_bn(calib_data)
        else:
            calib_data = []

        self._set_mode("eval") # Use eval mode after BN calibration

        aggr_ans = []
        context = torch.no_grad if self.eval_no_grad else nullcontext
        with context():
            for i in range(steps):
                if i < len(calib_data):# self.calib_bn_batch:
                    data = calib_data[i]
                else:
                    data = next(queue)
                data = _to_device(data, self.get_device())
                outputs = self.forward_data(data[0], **kwargs)
                ans = utils.flatten_list(
                    [c(data[0], outputs, data[1]) for c in criterions])
                aggr_ans.append(ans)
                del outputs
                print("\reva step {}/{} ".format(i, steps), end="", flush=True)

        aggr_ans = np.asarray(aggr_ans).transpose()

        if aggregate_fns is None:
            # by default, aggregate batch rewards with MEAN
            aggregate_fns = [lambda perfs: np.mean(perfs) if len(perfs) > 0 else 0.]\
                            * len(aggr_ans)
        return [aggr_fn(ans) for aggr_fn, ans in zip(aggregate_fns, aggr_ans)]
Exemple #13
0
    def gradient(self,
                 data,
                 criterion=lambda i, l, t: nn.CrossEntropyLoss()(l, t),
                 parameters=None,
                 eval_criterions=None,
                 mode="train",
                 zero_grads=True,
                 return_grads=True,
                 **kwargs):
        """Get the gradient with respect to the candidate net parameters.

        Args:
            parameters (optional): if specificied, can be a dict of param_name: param,
            or a list of parameter name.
        Returns:
            grads (dict of name: grad tensor)
        """
        self._set_mode(mode)

        if return_grads:
            active_parameters = dict(self.named_parameters())
            if parameters is not None:
                _parameters = dict(parameters)
                _addi = set(_parameters.keys()).difference(active_parameters)
                assert not _addi,\
                    ("Cannot get gradient of parameters that are not active "
                     "in this candidate net: {}")\
                    .format(", ".join(_addi))
            else:
                _parameters = active_parameters
        inputs, targets = data
        batch_size = inputs.size(0)
        min_image_size = min(self.super_net.search_space.image_size_choice)
        cur_image_size = self.rollout.image_size
        ratio = (min_image_size / cur_image_size)**2
        mini_batch_size = make_divisible(batch_size * ratio, 8)
        inputs = F.interpolate(inputs, (cur_image_size, cur_image_size),
                               mode="bilinear",
                               align_corners=False)
        if zero_grads:
            self.zero_grad()
        for i in range(
                0, batch_size // mini_batch_size +
                int(batch_size % mini_batch_size != 0), mini_batch_size):
            mini_inputs = inputs[i:i + mini_batch_size]
            mini_targets = targets[i:i + mini_batch_size]
            outputs = self.forward_data(mini_inputs, mini_targets, **kwargs)
            loss = criterion(mini_inputs, outputs, mini_targets)

            loss.backward()

        if not return_grads:
            grads = None
        else:
            grads = [(k, v.grad.clone()) for k, v in six.iteritems(_parameters)
                     if v.grad is not None]

        if eval_criterions:
            eval_res = utils.flatten_list([
                c(mini_inputs, outputs, mini_targets) for c in eval_criterions
            ])
            return grads, eval_res
        return grads