예제 #1
0
 def set_mask(self, mask):
     if mask is None:
         return
     channel = mask.sum().item()
     mid_channel = make_divisible(channel // self.reduction, 8)
     exp_mask = _get_channel_mask(self.se.expand.weight.data, mid_channel)
     self.se.reduction.set_mask(mask, exp_mask)
     self.se.expand.set_mask(exp_mask, mask)
예제 #2
0
    def __init__(self, channel, reduction=4, reduction_layer=None, expand_layer=None):
        super(SEModule, self).__init__()
        self.channel = channel
        self.reduction = reduction
        mid_channel = make_divisible(channel // reduction, 8)

        self.se = nn.Sequential(OrderedDict([
            ("reduction", reduction_layer or nn.Conv2d(self.channel, mid_channel, 1, 1, 0)),
            ("relu", nn.ReLU(inplace=True)),
            ("expand", expand_layer or nn.Conv2d(mid_channel, self.channel, 1, 1, 0)),
            ("activation", get_op("h_sigmoid")())
        ]))
예제 #3
0
파일: ops.py 프로젝트: zzzDavid/aw_nas
 def __init__(self, channel, reduction=4):
     mid_channel = make_divisible(channel // reduction, 8)
     reduction_layer = FlexiblePointLinear(channel,
                                           mid_channel,
                                           1,
                                           1,
                                           0,
                                           bias=True)
     expand_layer = FlexiblePointLinear(mid_channel,
                                        channel,
                                        1,
                                        1,
                                        0,
                                        bias=True)
     super(FlexibleSEModule, self).__init__(channel, reduction,
                                            reduction_layer, expand_layer)
     FlexibleLayer.__init__(self)
예제 #4
0
    def gradient(self,
                 data,
                 criterion=lambda i, l, t: nn.CrossEntropyLoss()(l, t),
                 parameters=None,
                 eval_criterions=None,
                 mode="train",
                 zero_grads=True,
                 return_grads=True,
                 **kwargs):
        """Get the gradient with respect to the candidate net parameters.

        Args:
            parameters (optional): if specificied, can be a dict of param_name: param,
            or a list of parameter name.
        Returns:
            grads (dict of name: grad tensor)
        """
        self._set_mode(mode)

        if return_grads:
            active_parameters = dict(self.named_parameters())
            if parameters is not None:
                _parameters = dict(parameters)
                _addi = set(_parameters.keys()).difference(active_parameters)
                assert not _addi,\
                    ("Cannot get gradient of parameters that are not active "
                     "in this candidate net: {}")\
                    .format(", ".join(_addi))
            else:
                _parameters = active_parameters
        inputs, targets = data
        batch_size = inputs.size(0)
        min_image_size = min(self.super_net.search_space.image_size_choice)
        cur_image_size = self.rollout.image_size
        ratio = (min_image_size / cur_image_size)**2
        mini_batch_size = make_divisible(batch_size * ratio, 8)
        inputs = F.interpolate(inputs, (cur_image_size, cur_image_size),
                               mode="bilinear",
                               align_corners=False)
        if zero_grads:
            self.zero_grad()
        for i in range(
                0, batch_size // mini_batch_size +
                int(batch_size % mini_batch_size != 0), mini_batch_size):
            mini_inputs = inputs[i:i + mini_batch_size]
            mini_targets = targets[i:i + mini_batch_size]
            outputs = self.forward_data(mini_inputs, mini_targets, **kwargs)
            loss = criterion(mini_inputs, outputs, mini_targets)

            loss.backward()

        if not return_grads:
            grads = None
        else:
            grads = [(k, v.grad.clone()) for k, v in six.iteritems(_parameters)
                     if v.grad is not None]

        if eval_criterions:
            eval_res = utils.flatten_list([
                c(mini_inputs, outputs, mini_targets) for c in eval_criterions
            ])
            return grads, eval_res
        return grads