def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape: steps = [] for s in self.order.split('_'): if s == 'bn' and self.use_bn and self.batchnorm_fun is not None: bn = self._get_bn(s_in.num_features(), c_out) if bn is not None: steps.append(bn) if s == 'w': if (self.dropout_rate > 0 or self.dropout_keep) and self.dropout_fun is not None: steps.append( self.dropout_fun(self.dropout_rate, inplace=self.dropout_inplace)) else: self.dropout_rate = 0.0 steps.extend(weight_functions) if s == 'act': act = Register.act_funs.get( self.act_fun)(inplace=self.act_inplace) if act is not None: steps.append(act) if (c_out > s_in.num_features()) and not self.changes_c: steps.append(PaddingToValueModule(c_out, dim=1)) self.steps = nn.ModuleList(steps) return self.probe_outputs(s_in, multiple_outputs=False)
def _build(self, s_in: Shape, s_out: Shape) -> Shape: before, after, squeeze = [], [], [ nn.AdaptiveAvgPool2d(1), SqueezeModule() ] if self.gap_first: after = [ nn.Linear(s_in.num_features(), self.features, bias=True), # no affine bn -> use bias Register.act_funs.get(self.act_fun)(inplace=True) ] self.cached['shape_inner'] = Shape([self.features]) else: before = [ nn.Conv2d(s_in.num_features(), self.features, 1, 1, 0, bias=False), nn.BatchNorm2d(self.features, affine=True), Register.act_funs.get(self.act_fun)(inplace=True) ] self.cached['shape_inner'] = Shape( [self.features, s_in.shape[1], s_in.shape[2]]) ops = before + squeeze + after + [ nn.Dropout(p=self.dropout), nn.Linear(self.features, s_out.num_features(), bias=self.bias) ] self.head_module = nn.Sequential(*ops) return self.probe_outputs(s_in)
def _build(self, s_in: Shape, c_out: int) -> Shape: assert c_out - s_in.num_features() >= 0 self.conv = nn.Conv2d(s_in.num_features(), c_out, kernel_size=1, stride=1, padding=0, bias=False) return self.probe_outputs(s_in)
def _build(self, s_in: Shape, s_out: Shape) -> Shape: self.head_module = nn.Sequential(*[ nn.BatchNorm2d(s_in.num_features()), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d(1), SqueezeModule(), nn.Dropout(p=0.0), nn.Linear(s_in.num_features(), s_out.num_features(), bias=True) ]) return self.probe_outputs(s_in)
def _build(self, s_in: Shape, c_out: int) -> Shape: assert not (c_out <= s_in.num_features() and self.stride > 1), "must increase num features when stride is >1" assert s_in.num_features() % 4 == 0 and c_out % 2 == 0, "num features must be divisible by 4" padding = get_padding(self.padding, self.k_size, self.stride, self.dilation) padding2 = get_padding(self.padding, self.k_size, 1, self.dilation) if self.stride >= 2: c_side = c_main_in = s_in.num_features() self.branch_proj = nn.Sequential(*[ # dw nn.Conv2d(c_side, c_side, self.k_size, self.stride, padding, groups=c_side, bias=False), nn.BatchNorm2d(c_side, affine=self.bn_affine), # pw nn.Conv2d(c_side, c_side, 1, 1, 0, bias=False), nn.BatchNorm2d(c_side, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) else: c_side = c_main_in = s_in.num_features() // 2 c_main_out = c_out - c_side c_main_mid = int(c_out // 2 * self.expansion) bm = [ # dw 1 nn.Conv2d(c_main_in, c_main_in, self.k_size, self.stride, padding, groups=c_main_in, bias=False), nn.BatchNorm2d(c_main_in, affine=self.bn_affine), # pw 1 nn.Conv2d(c_main_in, c_main_mid, 1, 1, 0, bias=False), nn.BatchNorm2d(c_main_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), # dw 2 nn.Conv2d(c_main_mid, c_main_mid, self.k_size, 1, padding2, groups=c_main_mid, bias=False), nn.BatchNorm2d(c_main_mid, affine=self.bn_affine), # pw 2 nn.Conv2d(c_main_mid, c_main_mid, 1, 1, 0, bias=False), nn.BatchNorm2d(c_main_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), # dw 3 nn.Conv2d(c_main_mid, c_main_mid, self.k_size, 1, padding2, groups=c_main_mid, bias=False), nn.BatchNorm2d(c_main_mid, affine=self.bn_affine), # pw 3 nn.Conv2d(c_main_mid, c_main_out, 1, 1, 0, bias=False), nn.BatchNorm2d(c_main_out, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ] # optional attention module if isinstance(self.att_dict, dict): bm.append(AbstractAttentionModule.module_from_dict(c_main_out, c_substitute=c_main_in, att_dict=self.att_dict)) # self.branch_main = nn.Sequential(*bm) self.branch_main = DropPathModule(nn.Sequential(*bm)) return self.probe_outputs(s_in)
def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape: padding = get_padding(self.padding, self.k_size, self.stride, self.dilation) conv = nn.Conv2d(s_in.num_features(), c_out, kernel_size=self.k_size, stride=self.stride, padding=padding, dilation=self.dilation, groups=get_number(self.groups, s_in.num_features()), bias=self.bias) wf = list(weight_functions) + [conv] return super()._build(s_in, c_out, weight_functions=wf)
def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape: c_in = s_in.num_features() self.conv = SuperKernelConv(c_in, c_in, self.name, self.strategy_name, self.k_sizes, (1.0, ), self.dilation, self.stride, self.padding, self.groups, self.bias) point_conv = nn.Conv2d(c_in, c_out, kernel_size=1, groups=get_number(self.groups, s_in.num_features()), bias=self.bias) wf = list(weight_functions) + [self.conv, point_conv] return super()._build(s_in, c_out, weight_functions=wf)
def _build(self, s_in: Shape, c_out: int) -> Shape: c_in = s_in.num_features() c_inner = int(c_out * self.expansion) self.block = self._build_block(c_in, c_inner, c_out, self.has_first_act) if self.shortcut_type in [None, 'None']: pass elif self.shortcut_type == 'id': self.shortcut = nn.Identity() elif self.shortcut_type == 'conv1x1': self.shortcut = nn.Sequential(*[ nn.Conv2d(c_in, c_out, 1, self.stride, 0, bias=False), nn.BatchNorm2d(c_out, affine=self.bn_affine), ]) elif self.shortcut_type == 'avg_conv': self.shortcut = nn.Sequential(*[ nn.AvgPool2d(kernel_size=2, stride=self.stride, padding=0), nn.Conv2d(c_in, c_out, 1, 1, 0, bias=False), ]) else: raise NotImplementedError('shortcut type "%s" is not implemented' % self.shortcut_type) self.has_shortcut = isinstance(self.shortcut, nn.Module) if self.has_shortcut: self.block = DropPathModule(self.block) return self.probe_outputs(s_in)
def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape: self.conv = SuperKernelConv(s_in.num_features(), c_out, self.name, self.strategy_name, self.k_sizes, (1.0, ), self.dilation, self.stride, self.padding, self.groups, self.bias) wf = list(weight_functions) + [self.conv] return super()._build(s_in, c_out, weight_functions=wf)
def _build(self, s_in: Shape, c_out: int) -> Shape: c_in = s_in.num_features() c_mid = make_divisible(int(c_in * self.expansion), divisible=8) self.has_skip = self.stride == 1 and c_in == c_out ops = [] conv_kwargs = dict(dilation=self.dilation, padding=self.padding) if self.expansion > 1: # pw ops.extend([ get_conv2d(c_in, c_mid, k_size=self.k_size_in, groups=1, **conv_kwargs), nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # dw ops.extend([ get_conv2d(c_mid, c_mid, k_size=self.k_size, stride=self.stride, groups=-1, **conv_kwargs), nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # optional squeeze+excitation module if isinstance(self.att_dict, dict): ops.append(AbstractAttentionModule.module_from_dict(c_mid, c_substitute=c_in, att_dict=self.att_dict)) # pw ops.extend([ get_conv2d(c_mid, c_out, k_size=self.k_size_out, groups=1, **conv_kwargs), nn.BatchNorm2d(c_out, affine=self.bn_affine), ]) self.block = nn.Sequential(*ops) if self.has_skip: self.block = DropPathModule(self.block) return self.probe_outputs(s_in)
def _build2(self, s_in: Shape, s_out: Shape) -> ShapeList: """ build the network """ assert s_in.num_dims() == s_out.num_dims() == 1 s_cur = self.stem.build(s_in, c_out=self._layer_widths[0]) for i in range(len(self._layer_widths) - 1): s_cur = self.cells[i].build(s_cur, c_out=self._layer_widths[i + 1]) s_heads = [ h.build(s_cur, c_out=s_out.num_features()) for h in self.heads ] return ShapeList(s_heads)
def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape: padding = get_padding(self.padding, self.k_size, self.stride, 1) pool = (nn.AvgPool2d if self.pool_type == 'avg' else nn.MaxPool2d)( self.k_size, self.stride, padding) conv = nn.Conv2d(s_in.num_features(), c_out, kernel_size=1, stride=1, padding=0, bias=self.bias) wf = list(weight_functions) + [pool, conv] return super()._build(s_in, c_out, weight_functions=wf)
def _build(self, s_in: Shape, s_out: Shape) -> Shape: ops = [nn.AdaptiveAvgPool2d(1)] if self.se_cmul > 0: ops.append( SqueezeExcitationChannelModule( s_in.num_features(), c_mul=self.se_cmul, squeeze_act=self.se_act_fun, squeeze_bias=self.se_squeeze_bias and not self.se_bn, excite_bias=self.se_excite_bias, squeeze_bn=self.se_bn, squeeze_bn_affine=self.se_squeeze_bias)) ops.extend([ SqueezeModule(), nn.Linear(s_in.num_features(), self.features, bias=self.bias0), Register.act_funs.get(self.act_fun)(inplace=True), nn.Dropout(p=self.dropout), nn.Linear(self.features, s_out.num_features(), bias=self.bias1) ]) self.head_module = nn.Sequential(*ops) self.cached['shape_inner'] = Shape([self.features]) return self.probe_outputs(s_in)
def _build(self, s_in: Shape, c_out: int) -> Shape: c_in = s_in.num_features() max_exp = max(self.expansions) exp_mults = [e / max_exp for e in self.expansions] c_mid = make_divisible(int(c_in * max_exp), divisible=8) self.has_skip = self.stride == 1 and c_in == c_out ops = [] self.conv = SuperKernelConv(c_mid, c_mid, self.name, self.strategy_name, self.k_sizes, exp_mults, self.dilation, self.stride, self.padding, c_mid, bias=False) if max_exp > 1: # pw ops.extend([ nn.Conv2d(c_in, c_mid, 1, 1, 0, bias=False), nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # dw ops.extend([ self.conv, nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # optional attention module if isinstance(self.att_dict, dict): ops.append( AbstractAttentionModule.module_from_dict(c_mid, c_substitute=c_in, **self.att_dict)) # pw ops.extend([ nn.Conv2d(c_mid, c_out, 1, 1, 0, bias=False), nn.BatchNorm2d(c_out, affine=self.bn_affine), ]) self.block = nn.Sequential(*ops) if self.has_skip: self.block = DropPathModule(self.block) return self.probe_outputs(s_in)
def _build(self, s_in: Shape, c_out: int) -> Shape: conv_kwargs = dict(dilation=self.dilation, padding=self.padding) c_in = s_in.num_features() self.has_skip = self.stride == 1 and c_in == c_out for e in range(len(self.expansions)): for k in range(len(self.k_sizes)): self._choices_by_idx.append((e, k)) if self.has_skip and isinstance(self.skip_op, str): self.skip = Register.network_layers.get(self.skip_op)() self.skip.build(s_in, c_out) self._choices_by_idx.append(('skip', 'skip')) self.ws = StrategyManager().make_weight(self.strategy_name, self.name, only_single_path=True, num_choices=len(self._choices_by_idx)) for e in self.expansions: c_mid = int(c_in * e) # pw in self.pw_in.append(nn.Sequential( get_conv2d(c_in, c_mid, k_size=self.k_size_in, groups=1, **conv_kwargs), nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), )) # dw conv ops with different kernel sizes convs = nn.ModuleList([]) for k in self.k_sizes: convs.append(nn.Sequential( get_conv2d(c_mid, c_mid, k_size=k, stride=self.stride, groups=-1, **conv_kwargs), nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), )) self.dw_conv.append(convs) # dw optional attention module if self.has_att: self.dw_att.append(AbstractAttentionModule.module_from_dict(c_mid, c_substitute=c_in, att_dict=self.att_dict)) # pw out self.pw_out.append(nn.Sequential( get_conv2d(c_mid, c_out, k_size=self.k_size_out, groups=1, **conv_kwargs), nn.BatchNorm2d(c_out, affine=self.bn_affine), )) return self.probe_outputs(s_in)
def _build(self, s_in: Shape, c_out: int) -> Shape: c_in = s_in.num_features() c_mid = int(c_in * max(self.expansions)) self.has_skip = self.stride == 1 and c_in == c_out max_exp = max(self.expansions) exp_mults = [e / max_exp for e in self.expansions] ops = [] if max_exp > 1: # pw ops.extend([ nn.Conv2d(c_in, c_mid, 1, 1, 0, groups=1, bias=False), nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # dw self.conv = SuperKernelThresholdConv(c_mid, c_mid, self.k_sizes, exp_mults, self.dilation, self.stride, self.padding, -1, bias=False) ops.extend([ self.conv, nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # optional squeeze+excitation module with searchable width if isinstance(self.sse_dict, dict): self.learned_se = SuperSqueezeExcitationChannelThresholdModule(c_mid, c_substitute=c_in, **self.sse_dict) ops.append(self.learned_se) else: self.learned_se = None # pw ops.extend([ nn.Conv2d(c_mid, c_out, 1, 1, 0, groups=1, bias=False), nn.BatchNorm2d(c_out, affine=self.bn_affine), ]) self.block = nn.Sequential(*ops) if self.has_skip: self.block = DropPathModule(self.block) return self.probe_outputs(s_in)
def _build(self, s_in: Shape, s_out: Shape) -> Shape: """ assuming input size 14x14 """ self.head_module = BasicDartsAuxHead(init_pool_stride=2) return self.head_module.build(s_in, s_out.num_features())
def _build(self, s_in: Shape, c_out: int) -> Shape: self.auxiliary = BasicDartsAuxHeadModule( c=s_in.num_features(), num_classes=c_out, init_pool_stride=self.init_pool_stride) return self.probe_outputs(s_in, multiple_outputs=False)
def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape: wf = list(weight_functions) + [nn.Linear(s_in.num_features(), c_out, self.bias)] return super()._build(s_in, c_out, weight_functions=wf)
def _build(self, s_in: Shape, c_out: int) -> Shape: feature_diff = c_out - s_in.num_features() assert feature_diff >= 0 self._add_to_print_kwargs(features=c_out, feature_diff=feature_diff) return self.probe_outputs(s_in)
def _build2(self, s_in: Shape, s_out: Shape) -> ShapeList: """ build the network """ self.net = create_model(self.model_name, in_chans=s_in.num_features(), num_classes=s_out.num_features()) return self.get_network_output_shapes()
def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape: assert c_out % 2 == 0 wf = list(weight_functions) + [ FactorizedReductionModule(s_in.num_features(), c_out, stride=2) ] return super()._build(s_in, c_out, weight_functions=wf)
def _build(self, s_in: Shape, s_out: Shape) -> Shape: self.head_module = ClassificationLayer(bias=self.bias, use_bn=False, use_gap=True, dropout_rate=self.dropout) return self.head_module.build(s_in, s_out.num_features())
def _build(self, s_in: Shape, c_out: int) -> Shape: assert s_in.num_features() == c_out self.att_module = AbstractAttentionModule.module_from_dict( c_out, c_substitute=None, att_dict=self.att_dict) return s_in
def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape: wf = list(weight_functions) if self.use_gap: wf += [GapSqueezeModule()] wf += [nn.Linear(s_in.num_features(), c_out, bias=self.bias)] return super()._build(s_in, c_out, weight_functions=wf)
def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape: self.conv = SuperKernelThresholdConv(s_in.num_features(), s_in.num_features(), self.k_sizes, (1.0,), self.dilation, self.stride, self.padding, self.groups, self.bias) point_conv = nn.Conv2d(s_in.num_features(), c_out, kernel_size=1, groups=1, bias=self.bias) wf = list(weight_functions) + [self.conv, point_conv] return super()._build(s_in, c_out, weight_functions=wf)