Beispiel #1
0
 def _build_block(self,
                  c_in: int,
                  c_inner: int,
                  c_out: int,
                  has_first_act=False) -> nn.Module:
     padding0 = get_padding(self.padding, self.k_size, self.stride,
                            self.dilation)
     padding1 = get_padding('same', self.k_size, 1, self.dilation)
     ops = [
         nn.Conv2d(c_in,
                   c_inner,
                   self.k_size,
                   self.stride,
                   padding0,
                   self.dilation,
                   bias=False),
         nn.BatchNorm2d(c_inner, affine=self.bn_affine),
         Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
         nn.Conv2d(c_inner, c_out, self.k_size, 1, padding1, 1, bias=False),
         nn.BatchNorm2d(c_out, affine=self.bn_affine),
     ]
     if has_first_act:
         return nn.Sequential(
             Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
             *ops)
     return nn.Sequential(*ops)
Beispiel #2
0
    def _build(self, s_in: Shape, c_out: int) -> Shape:
        assert not (c_out <= s_in.num_features() and self.stride > 1), "must increase num features when stride is >1"
        assert s_in.num_features() % 4 == 0 and c_out % 2 == 0, "num features must be divisible by 4"

        padding = get_padding(self.padding, self.k_size, self.stride, self.dilation)
        padding2 = get_padding(self.padding, self.k_size, 1, self.dilation)

        if self.stride >= 2:
            c_side = c_main_in = s_in.num_features()

            self.branch_proj = nn.Sequential(*[
                # dw
                nn.Conv2d(c_side, c_side, self.k_size, self.stride, padding, groups=c_side, bias=False),
                nn.BatchNorm2d(c_side, affine=self.bn_affine),
                # pw
                nn.Conv2d(c_side, c_side, 1, 1, 0, bias=False),
                nn.BatchNorm2d(c_side, affine=self.bn_affine),
                Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
            ])
        else:
            c_side = c_main_in = s_in.num_features() // 2
        c_main_out = c_out - c_side
        c_main_mid = int(c_out // 2 * self.expansion)

        bm = [
            # dw 1
            nn.Conv2d(c_main_in, c_main_in, self.k_size, self.stride, padding, groups=c_main_in, bias=False),
            nn.BatchNorm2d(c_main_in, affine=self.bn_affine),
            # pw 1
            nn.Conv2d(c_main_in, c_main_mid, 1, 1, 0, bias=False),
            nn.BatchNorm2d(c_main_mid, affine=self.bn_affine),
            Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
            # dw 2
            nn.Conv2d(c_main_mid, c_main_mid, self.k_size, 1, padding2, groups=c_main_mid, bias=False),
            nn.BatchNorm2d(c_main_mid, affine=self.bn_affine),
            # pw 2
            nn.Conv2d(c_main_mid, c_main_mid, 1, 1, 0, bias=False),
            nn.BatchNorm2d(c_main_mid, affine=self.bn_affine),
            Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
            # dw 3
            nn.Conv2d(c_main_mid, c_main_mid, self.k_size, 1, padding2, groups=c_main_mid, bias=False),
            nn.BatchNorm2d(c_main_mid, affine=self.bn_affine),
            # pw 3
            nn.Conv2d(c_main_mid, c_main_out, 1, 1, 0, bias=False),
            nn.BatchNorm2d(c_main_out, affine=self.bn_affine),
            Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
        ]
        # optional attention module
        if isinstance(self.att_dict, dict):
            bm.append(AbstractAttentionModule.module_from_dict(c_main_out, c_substitute=c_main_in,
                                                               att_dict=self.att_dict))

        # self.branch_main = nn.Sequential(*bm)
        self.branch_main = DropPathModule(nn.Sequential(*bm))
        return self.probe_outputs(s_in)
Beispiel #3
0
 def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape:
     padding = get_padding(self.padding, self.k_size, self.stride, 1)
     pool = (nn.AvgPool2d if self.pool_type == 'avg' else nn.MaxPool2d)(
         self.k_size, self.stride, padding)
     conv = nn.Conv2d(s_in.num_features(),
                      c_out,
                      kernel_size=1,
                      stride=1,
                      padding=0,
                      bias=self.bias)
     wf = list(weight_functions) + [pool, conv]
     return super()._build(s_in, c_out, weight_functions=wf)
Beispiel #4
0
def get_conv2d(c_in: int, c_out: int, k_size, stride=1, groups=-1, dilation=1, padding='same') -> nn.Module:
    # multiple kernel sizes, mix conv
    if isinstance(k_size, (tuple, list)):
        if len(k_size) > 1:
            return MixConvModule(c_in, c_out, k_size=k_size, stride=stride,
                                 dilation=dilation, padding=padding, groups=groups, bias=False,
                                 mode='even', divisible=1)
        k_size = k_size[0]
    # one kernel size, regular conv
    padding = get_padding(padding, k_size, stride, dilation)
    groups = c_in if groups == -1 else groups
    return nn.Conv2d(c_in, c_out, k_size, stride, padding, groups=groups, bias=False)
Beispiel #5
0
 def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape:
     padding = get_padding(self.padding, self.k_size, self.stride,
                           self.dilation)
     conv = nn.Conv2d(s_in.num_features(),
                      c_out,
                      kernel_size=self.k_size,
                      stride=self.stride,
                      padding=padding,
                      dilation=self.dilation,
                      groups=get_number(self.groups, s_in.num_features()),
                      bias=self.bias)
     wf = list(weight_functions) + [conv]
     return super()._build(s_in, c_out, weight_functions=wf)
Beispiel #6
0
 def __init__(self,
              c_in: int,
              c_out: int,
              k_size=(3, 5, 7),
              stride=1,
              dilation=1,
              groups=-1,
              bias=False,
              padding='same',
              mode='even',
              divisible=1):
     super().__init__()
     assert isinstance(k_size, (tuple, list))
     assert c_in == c_out or groups == 1
     self.splits_in = get_splits(c_in,
                                 len(k_size),
                                 mode=mode,
                                 divisible=divisible)
     self.splits_out = get_splits(c_out,
                                  len(k_size),
                                  mode=mode,
                                  divisible=divisible)
     groups = [groups] * len(k_size) if groups > 0 else self.splits_in
     ops = []
     for k, g, si, so in zip(k_size, groups, self.splits_in,
                             self.splits_out):
         p = get_padding(padding, k, stride, dilation)
         ops.append(
             nn.Conv2d(si,
                       so,
                       k,
                       stride=stride,
                       padding=p,
                       groups=g,
                       bias=bias))
     self.ops = nn.ModuleList(ops)
Beispiel #7
0
 def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape:
     padding = get_padding(self.padding, self.k_size, self.stride, 1)
     pool = (nn.AvgPool2d if self.pool_type == 'avg' else nn.MaxPool2d)(
         self.k_size, self.stride, padding)
     wf = list(weight_functions) + [pool]
     return super()._build(s_in, c_out, weight_functions=wf)
Beispiel #8
0
    def __init__(self,
                 c_in: int,
                 c_out: int,
                 name: str,
                 strategy_name='default',
                 k_sizes=(3, 5),
                 c_multipliers=(0.5, 1.0),
                 dilation=1,
                 stride=1,
                 padding='same',
                 groups=-1,
                 bias=False):
        """
        A super-kernel that applies convolution with a masked weight, using architecture weights to figure out the best
        masking, thus kernel size and num output channels. Since the architecture weights are applied to the mask rather
        than generating different outputs, this module can be used efficiently for differentiable weight strategies.

        :param c_in: num input channels
        :param c_out: num output channels
        :param name: name under which to register architecture weights
        :param strategy_name: name of the strategy for architecture weights
        :param k_sizes: kernel sizes
        :param c_multipliers:
        :param dilation: dilation for the kernel
        :param stride: stride for the kernel
        :param padding:
        :param padding: 'same' or number
        :param bias:
        """
        super().__init__()
        self.name_c = '%s/c' % name
        self.name_k = '%s/k' % name
        self.k_sizes = k_sizes
        self.c_multipliers = c_multipliers
        assert max(
            c_multipliers
        ) <= 1.0, "Can only reduce max channels, choose a higher c_in/c_out"

        self._stride = stride
        self._groups = get_number(groups, c_out)
        self._dilation = dilation
        assert c_in % self._groups == 0

        max_k = max(k_sizes)
        channels = [int(c_out * ci) for ci in sorted(c_multipliers)]
        masks_c, masks_k = [], []

        # arc weights
        self.ws = StrategyManager().make_weight(strategy_name,
                                                self.name_k,
                                                only_single_path=True,
                                                num_choices=len(k_sizes))
        self.ws = StrategyManager().make_weight(strategy_name,
                                                self.name_c,
                                                only_single_path=True,
                                                num_choices=len(channels))

        # conv weight
        self._padding = get_padding(padding, max_k, stride, 1)
        self.weight = nn.Parameter(torch.Tensor(c_out, c_in // self._groups,
                                                max_k, max_k),
                                   requires_grad=True)
        nn.init.kaiming_normal_(self.weight, mode='fan_out')

        # bias
        if bias:
            self.bias = nn.Parameter(torch.Tensor(c_out))
            nn.init.zeros_(self.bias)
        else:
            self.bias = None

        # mask c
        for cs in channels:
            mask = torch.ones(size=(c_out, 1, 1, 1), dtype=self.weight.dtype)
            mask[cs:c_out, :, :, :].zero_()
            masks_c.append(mask)
        self.register_buffer('masks_c', torch.stack(masks_c, dim=0))

        # mask k
        for k in sorted(k_sizes):
            mask = torch.zeros(size=(1, 1, max_k, max_k),
                               dtype=self.weight.dtype)
            dk = (max_k - k) // 2
            if dk == 0:
                mask += 1
            else:
                mask[:, :, dk:-dk, dk:-dk] += 1
            masks_k.append(mask)
        self.register_buffer('masks_k', torch.stack(masks_k, dim=0))
Beispiel #9
0
    def __init__(self, c_in: int, c_out: int, k_sizes=(3, 5, 7), c_multipliers=(0.5, 1.0),
                 dilation=1, stride=1, padding='same', groups=1, bias=False):
        """
        A super-kernel that applies convolution with a masked weight, using differentiable weights and thresholds
        to figure out the best masking, thus kernel size and num output channels.
        Since the mask is learned, rather than generating different outputs, this module can be used efficiently to
        learn the architecture of (huge) networks.

        :param c_in: num input channels
        :param c_out: num output channels
        :param k_sizes: kernel sizes
        :param c_multipliers:
        :param dilation: dilation for the kernel
        :param stride: stride for the kernel
        :param padding: 'same' or number
        :param bias: whether to use a bias
        """
        super().__init__()
        k_sizes = sorted(k_sizes)
        max_k = max(k_sizes)
        c_multipliers = sorted(c_multipliers)
        assert max(c_multipliers) == 1.0, "Can only reduce max channels, choose a higher c_in/c_out"

        self.c_in = c_in
        self.c_out = c_out
        self.k_sizes = k_sizes
        self.c_multipliers = c_multipliers
        self.c_out_list = [int(cm * c_out) for cm in c_multipliers]
        self._padding = get_padding(padding, max_k, stride, 1)
        self._stride = stride
        self._dilation = dilation
        self._groups = get_number(groups, c_out)
        assert c_in % self._groups == 0

        # conv and bias weights
        self.weight = nn.Parameter(torch.zeros(c_out, c_in // self._groups, max_k, max_k), requires_grad=True)
        self.bias = nn.Parameter(torch.zeros(c_out), requires_grad=True) if bias else None
        nn.init.kaiming_normal_(self.weight, mode='fan_out')

        # channel masks
        masks_c = []
        for cs in self.c_out_list:
            mask = torch.ones(size=(c_out, 1, 1, 1), dtype=self.weight.dtype)
            mask[cs:c_out, :, :, :].zero_()
            for prev_mask in masks_c:
                mask -= prev_mask
            masks_c.append(mask)
        self.mask_c = TrainableMask(masks_c)

        # kernel masks
        masks_k = []
        for k in sorted(k_sizes):
            mask = torch.zeros(size=(1, 1, max_k, max_k), dtype=self.weight.dtype)
            dk = (max_k - k) // 2
            if dk == 0:
                mask += 1
            else:
                mask[:, :, dk:-dk, dk:-dk] += 1
            for prev_mask in masks_k:
                mask -= prev_mask
            masks_k.append(mask)
        self.mask_k = TrainableMask(masks_k)