예제 #1
0
	def __init__(self, n_classes=1000, bn_param=(0.1, 1e-5), dropout_rate=0,
	             depth_list=2, expand_ratio_list=0.25, width_mult_list=1.0):

		self.depth_list = val2list(depth_list)
		self.expand_ratio_list = val2list(expand_ratio_list)
		self.width_mult_list = val2list(width_mult_list)
		# sort
		self.depth_list.sort()
		self.expand_ratio_list.sort()
		self.width_mult_list.sort()

		input_channel = [
			make_divisible(64 * width_mult, MyNetwork.CHANNEL_DIVISIBLE) for width_mult in self.width_mult_list
		]
		
		mid_input_channel = [
			make_divisible(channel // 2, MyNetwork.CHANNEL_DIVISIBLE) for channel in input_channel
		]

		stage_width_list = ResNets.STAGE_WIDTH_LIST.copy()
		for i, width in enumerate(stage_width_list):
			stage_width_list[i] = [
				make_divisible(width * width_mult, MyNetwork.CHANNEL_DIVISIBLE) for width_mult in self.width_mult_list
			]

		n_block_list = [base_depth + max(self.depth_list) for base_depth in ResNets.BASE_DEPTH_LIST]
		stride_list = [1, 2, 2, 2]

		# build input stem
		input_stem = [
			DynamicConvLayer(val2list(3), mid_input_channel, 3, stride=2, use_bn=True, act_func='relu'),
			ResidualBlock(
				DynamicConvLayer(mid_input_channel, mid_input_channel, 3, stride=1, use_bn=True, act_func='relu'),
				IdentityLayer(mid_input_channel, mid_input_channel)
			),
			DynamicConvLayer(mid_input_channel, input_channel, 3, stride=1, use_bn=True, act_func='relu')
		]

		# blocks
		blocks = []
		for d, width, s in zip(n_block_list, stage_width_list, stride_list):
			for i in range(d):
				stride = s if i == 0 else 1
				bottleneck_block = DynamicResNetBottleneckBlock(
					input_channel, width, expand_ratio_list=self.expand_ratio_list,
					kernel_size=3, stride=stride, act_func='relu', downsample_mode='avgpool_conv',
				)
				blocks.append(bottleneck_block)
				input_channel = width
		# classifier
		classifier = DynamicLinearLayer(input_channel, n_classes, dropout_rate=dropout_rate)

		super(OFAResNets, self).__init__(input_stem, blocks, classifier)

		# set bn param
		self.set_bn_param(*bn_param)

		# runtime_depth
		self.input_stem_skipping = 0
		self.runtime_depth = [0] * len(n_block_list)
예제 #2
0
    def re_organize_middle_weights(self, expand_ratio_stage=0):
        # conv3 -> conv2
        importance = torch.sum(torch.abs(self.conv3.conv.conv.weight.data), dim=(0, 2, 3))
        if isinstance(self.conv2.bn, DynamicGroupNorm):
            channel_per_group = self.conv2.bn.channel_per_group
            importance_chunks = torch.split(importance, channel_per_group)
            for chunk in importance_chunks:
                chunk.data.fill_(torch.mean(chunk))
            importance = torch.cat(importance_chunks, dim=0)
        if expand_ratio_stage > 0:
            sorted_expand_list = copy.deepcopy(self.expand_ratio_list)
            sorted_expand_list.sort(reverse=True)
            target_width_list = [
                make_divisible(round(max(self.out_channel_list) * expand), MyNetwork.CHANNEL_DIVISIBLE)
                for expand in sorted_expand_list
            ]
            right = len(importance)
            base = - len(target_width_list) * 1e5
            for i in range(expand_ratio_stage + 1):
                left = target_width_list[i]
                importance[left:right] += base
                base += 1e5
                right = left

        sorted_importance, sorted_idx = torch.sort(importance, dim=0, descending=True)
        self.conv3.conv.conv.weight.data = torch.index_select(self.conv3.conv.conv.weight.data, 1, sorted_idx)
        adjust_bn_according_to_idx(self.conv2.bn.bn, sorted_idx)
        self.conv2.conv.conv.weight.data = torch.index_select(self.conv2.conv.conv.weight.data, 0, sorted_idx)

        # conv2 -> conv1
        importance = torch.sum(torch.abs(self.conv2.conv.conv.weight.data), dim=(0, 2, 3))
        if isinstance(self.conv1.bn, DynamicGroupNorm):
            channel_per_group = self.conv1.bn.channel_per_group
            importance_chunks = torch.split(importance, channel_per_group)
            for chunk in importance_chunks:
                chunk.data.fill_(torch.mean(chunk))
            importance = torch.cat(importance_chunks, dim=0)
        if expand_ratio_stage > 0:
            sorted_expand_list = copy.deepcopy(self.expand_ratio_list)
            sorted_expand_list.sort(reverse=True)
            target_width_list = [
                make_divisible(round(max(self.out_channel_list) * expand), MyNetwork.CHANNEL_DIVISIBLE)
                for expand in sorted_expand_list
            ]
            right = len(importance)
            base = - len(target_width_list) * 1e5
            for i in range(expand_ratio_stage + 1):
                left = target_width_list[i]
                importance[left:right] += base
                base += 1e5
                right = left
        sorted_importance, sorted_idx = torch.sort(importance, dim=0, descending=True)

        self.conv2.conv.conv.weight.data = torch.index_select(self.conv2.conv.conv.weight.data, 1, sorted_idx)
        adjust_bn_according_to_idx(self.conv1.bn.bn, sorted_idx)
        self.conv1.conv.conv.weight.data = torch.index_select(self.conv1.conv.conv.weight.data, 0, sorted_idx)

        return None
예제 #3
0
    def re_organize_middle_weights(self, expand_ratio_stage=0):
        importance = torch.sum(torch.abs(self.point_linear.conv.conv.weight.data), dim=(0, 2, 3))
        if isinstance(self.depth_conv.bn, DynamicGroupNorm):
            channel_per_group = self.depth_conv.bn.channel_per_group
            importance_chunks = torch.split(importance, channel_per_group)
            for chunk in importance_chunks:
                chunk.data.fill_(torch.mean(chunk))
            importance = torch.cat(importance_chunks, dim=0)
        if expand_ratio_stage > 0:
            sorted_expand_list = copy.deepcopy(self.expand_ratio_list)
            sorted_expand_list.sort(reverse=True)
            target_width_list = [
                make_divisible(round(max(self.in_channel_list) * expand), MyNetwork.CHANNEL_DIVISIBLE)
                for expand in sorted_expand_list
            ]

            right = len(importance)
            base = - len(target_width_list) * 1e5
            for i in range(expand_ratio_stage + 1):
                left = target_width_list[i]
                importance[left:right] += base
                base += 1e5
                right = left

        sorted_importance, sorted_idx = torch.sort(importance, dim=0, descending=True)
        self.point_linear.conv.conv.weight.data = torch.index_select(
            self.point_linear.conv.conv.weight.data, 1, sorted_idx
        )

        adjust_bn_according_to_idx(self.depth_conv.bn.bn, sorted_idx)
        self.depth_conv.conv.conv.weight.data = torch.index_select(
            self.depth_conv.conv.conv.weight.data, 0, sorted_idx
        )

        if self.use_se:
            # se expand: output dim 0 reorganize
            se_expand = self.depth_conv.se.fc.expand
            se_expand.weight.data = torch.index_select(se_expand.weight.data, 0, sorted_idx)
            se_expand.bias.data = torch.index_select(se_expand.bias.data, 0, sorted_idx)
            # se reduce: input dim 1 reorganize
            se_reduce = self.depth_conv.se.fc.reduce
            se_reduce.weight.data = torch.index_select(se_reduce.weight.data, 1, sorted_idx)
            # middle weight reorganize
            se_importance = torch.sum(torch.abs(se_expand.weight.data), dim=(0, 2, 3))
            se_importance, se_idx = torch.sort(se_importance, dim=0, descending=True)

            se_expand.weight.data = torch.index_select(se_expand.weight.data, 1, se_idx)
            se_reduce.weight.data = torch.index_select(se_reduce.weight.data, 0, se_idx)
            se_reduce.bias.data = torch.index_select(se_reduce.bias.data, 0, se_idx)

        if self.inverted_bottleneck is not None:
            adjust_bn_according_to_idx(self.inverted_bottleneck.bn.bn, sorted_idx)
            self.inverted_bottleneck.conv.conv.weight.data = torch.index_select(
                self.inverted_bottleneck.conv.conv.weight.data, 0, sorted_idx
            )
            return None
        else:
            return sorted_idx
예제 #4
0
    def __init__(self,
                 in_channel_list,
                 out_channel_list,
                 kernel_size_list=3,
                 expand_ratio_list=6,
                 stride=1,
                 act_func='relu6',
                 use_se=False):
        super(DynamicMBConvLayer, self).__init__()

        self.in_channel_list = in_channel_list
        self.out_channel_list = out_channel_list

        self.kernel_size_list = val2list(kernel_size_list)
        self.expand_ratio_list = val2list(expand_ratio_list)

        self.stride = stride
        self.act_func = act_func
        self.use_se = use_se

        # build modules
        max_middle_channel = make_divisible(
            round(max(self.in_channel_list) * max(self.expand_ratio_list)),
            MyNetwork.CHANNEL_DIVISIBLE)
        if max(self.expand_ratio_list) == 1:
            self.inverted_bottleneck = None
        else:
            self.inverted_bottleneck = nn.Sequential(
                OrderedDict([
                    ('conv',
                     DynamicConv2d(max(self.in_channel_list),
                                   max_middle_channel)),
                    ('bn', DynamicBatchNorm2d(max_middle_channel)),
                    ('act', build_activation(self.act_func)),
                ]))

        self.depth_conv = nn.Sequential(
            OrderedDict([('conv',
                          DynamicSeparableConv2d(max_middle_channel,
                                                 self.kernel_size_list,
                                                 self.stride)),
                         ('bn', DynamicBatchNorm2d(max_middle_channel)),
                         ('act', build_activation(self.act_func))]))
        if self.use_se:
            self.depth_conv.add_module('se', DynamicSE(max_middle_channel))

        self.point_linear = nn.Sequential(
            OrderedDict([
                ('conv',
                 DynamicConv2d(max_middle_channel,
                               max(self.out_channel_list))),
                ('bn', DynamicBatchNorm2d(max(self.out_channel_list))),
            ]))

        self.active_kernel_size = max(self.kernel_size_list)
        self.active_expand_ratio = max(self.expand_ratio_list)
        self.active_out_channel = max(self.out_channel_list)
예제 #5
0
    def __init__(self, in_channel_list, out_channel_list, expand_ratio_list=0.25,
                 kernel_size=3, stride=1, act_func='relu', downsample_mode='avgpool_conv'):
        super(DynamicResNetBottleneckBlock, self).__init__()

        self.in_channel_list = in_channel_list
        self.out_channel_list = out_channel_list
        self.expand_ratio_list = val2list(expand_ratio_list)

        self.kernel_size = kernel_size
        self.stride = stride
        self.act_func = act_func
        self.downsample_mode = downsample_mode

        # build modules
        max_middle_channel = make_divisible(
            round(max(self.out_channel_list) * max(self.expand_ratio_list)), MyNetwork.CHANNEL_DIVISIBLE)

        self.conv1 = nn.Sequential(OrderedDict([
            ('conv', DynamicConv2d(max(self.in_channel_list), max_middle_channel)),
            ('bn', DynamicBatchNorm2d(max_middle_channel)),
            ('act', build_activation(self.act_func, inplace=True)),
        ]))

        self.conv2 = nn.Sequential(OrderedDict([
            ('conv', DynamicConv2d(max_middle_channel, max_middle_channel, kernel_size, stride)),
            ('bn', DynamicBatchNorm2d(max_middle_channel)),
            ('act', build_activation(self.act_func, inplace=True))
        ]))

        self.conv3 = nn.Sequential(OrderedDict([
            ('conv', DynamicConv2d(max_middle_channel, max(self.out_channel_list))),
            ('bn', DynamicBatchNorm2d(max(self.out_channel_list))),
        ]))

        if self.stride == 1 and self.in_channel_list == self.out_channel_list:
            self.downsample = IdentityLayer(max(self.in_channel_list), max(self.out_channel_list))
        elif self.downsample_mode == 'conv':
            self.downsample = nn.Sequential(OrderedDict([
                ('conv', DynamicConv2d(max(self.in_channel_list), max(self.out_channel_list), stride=stride)),
                ('bn', DynamicBatchNorm2d(max(self.out_channel_list))),
            ]))
        elif self.downsample_mode == 'avgpool_conv':
            self.downsample = nn.Sequential(OrderedDict([
                ('avg_pool', nn.AvgPool2d(kernel_size=stride, stride=stride, padding=0, ceil_mode=True)),
                ('conv', DynamicConv2d(max(self.in_channel_list), max(self.out_channel_list))),
                ('bn', DynamicBatchNorm2d(max(self.out_channel_list))),
            ]))
        else:
            raise NotImplementedError

        self.final_act = build_activation(self.act_func, inplace=True)

        self.active_expand_ratio = max(self.expand_ratio_list)
        self.active_out_channel = max(self.out_channel_list)
예제 #6
0
 def count_flops_given_config(net_config, image_size=224):
     flops = 0
     # first conv
     flops += count_conv_flop((image_size + 1) // 2, 3,
                              net_config['first_conv']['out_channels'], 3,
                              1)
     # blocks
     fsize = (image_size + 1) // 2
     for block in net_config['blocks']:
         mb_conv = block[
             'mobile_inverted_conv'] if 'mobile_inverted_conv' in block else block[
                 'conv']
         if mb_conv is None:
             continue
         out_fz = int((fsize - 1) / mb_conv['stride'] + 1)
         if mb_conv['mid_channels'] is None:
             mb_conv['mid_channels'] = round(mb_conv['in_channels'] *
                                             mb_conv['expand_ratio'])
         if mb_conv['expand_ratio'] != 1:
             # inverted bottleneck
             flops += count_conv_flop(fsize, mb_conv['in_channels'],
                                      mb_conv['mid_channels'], 1, 1)
         # depth conv
         flops += count_conv_flop(out_fz, mb_conv['mid_channels'],
                                  mb_conv['mid_channels'],
                                  mb_conv['kernel_size'],
                                  mb_conv['mid_channels'])
         if mb_conv['use_se']:
             # SE layer
             se_mid = make_divisible(mb_conv['mid_channels'] // 4,
                                     divisor=MyNetwork.CHANNEL_DIVISIBLE)
             flops += count_conv_flop(1, mb_conv['mid_channels'], se_mid, 1,
                                      1)
             flops += count_conv_flop(1, se_mid, mb_conv['mid_channels'], 1,
                                      1)
         # point linear
         flops += count_conv_flop(out_fz, mb_conv['mid_channels'],
                                  mb_conv['out_channels'], 1, 1)
         fsize = out_fz
     # final expand layer
     flops += count_conv_flop(
         fsize, net_config['final_expand_layer']['in_channels'],
         net_config['final_expand_layer']['out_channels'], 1, 1)
     # feature mix layer
     flops += count_conv_flop(
         1, net_config['feature_mix_layer']['in_channels'],
         net_config['feature_mix_layer']['out_channels'], 1, 1)
     # classifier
     flops += count_conv_flop(1, net_config['classifier']['in_features'],
                              net_config['classifier']['out_features'], 1,
                              1)
     return flops / 1e6  # MFLOPs
예제 #7
0
    def forward(self, x):
        in_channel = x.size(1)

        if self.inverted_bottleneck is not None:
            self.inverted_bottleneck.conv.active_out_channel = \
                make_divisible(round(in_channel * self.active_expand_ratio), MyNetwork.CHANNEL_DIVISIBLE)

        self.depth_conv.conv.active_kernel_size = self.active_kernel_size
        self.point_linear.conv.active_out_channel = self.active_out_channel

        if self.inverted_bottleneck is not None:
            x = self.inverted_bottleneck(x)
        x = self.depth_conv(x)
        x = self.point_linear(x)
        return x
예제 #8
0
    def count_flops_given_config(net_config, image_size=224):
        flops = 0
        # input stem
        for layer_config in net_config['input_stem']:
            if layer_config['name'] != 'ConvLayer':
                layer_config = layer_config['conv']
            in_channel = layer_config['in_channels']
            out_channel = layer_config['out_channels']
            out_image_size = int((image_size - 1) / layer_config['stride'] + 1)

            flops += count_conv_flop(out_image_size, in_channel, out_channel,
                                     layer_config['kernel_size'],
                                     layer_config.get('groups', 1))
            image_size = out_image_size
        # max pooling
        image_size = int((image_size - 1) / 2 + 1)
        # ResNetBottleneckBlocks
        for block_config in net_config['blocks']:
            in_channel = block_config['in_channels']
            out_channel = block_config['out_channels']

            out_image_size = int((image_size - 1) / block_config['stride'] + 1)
            mid_channel = block_config['mid_channels'] if block_config['mid_channels'] is not None \
             else round(out_channel * block_config['expand_ratio'])
            mid_channel = make_divisible(mid_channel,
                                         MyNetwork.CHANNEL_DIVISIBLE)

            # conv1
            flops += count_conv_flop(image_size, in_channel, mid_channel, 1, 1)
            # conv2
            flops += count_conv_flop(out_image_size, mid_channel, mid_channel,
                                     block_config['kernel_size'],
                                     block_config['groups'])
            # conv3
            flops += count_conv_flop(out_image_size, mid_channel, out_channel,
                                     1, 1)
            # downsample
            if block_config['stride'] == 1 and in_channel == out_channel:
                pass
            else:
                flops += count_conv_flop(out_image_size, in_channel,
                                         out_channel, 1, 1)
            image_size = out_image_size
        # final classifier
        flops += count_conv_flop(1, net_config['classifier']['in_features'],
                                 net_config['classifier']['out_features'], 1,
                                 1)
        return flops / 1e6  # MFLOPs
예제 #9
0
	def forward(self, x, groups=None):
		in_channel = x.size(1)
		num_mid = make_divisible(in_channel // self.reduction, divisor=MyNetwork.CHANNEL_DIVISIBLE)

		y = x.mean(3, keepdim=True).mean(2, keepdim=True)
		# reduce
		reduce_filter = self.get_active_reduce_weight(num_mid, in_channel, groups=groups).contiguous()
		reduce_bias = self.get_active_reduce_bias(num_mid)
		y = F.conv2d(y, reduce_filter, reduce_bias, 1, 0, 1, 1)
		# relu
		y = self.fc.relu(y)
		# expand
		expand_filter = self.get_active_expand_weight(num_mid, in_channel, groups=groups).contiguous()
		expand_bias = self.get_active_expand_bias(in_channel, groups=groups)
		y = F.conv2d(y, expand_filter, expand_bias, 1, 0, 1, 1)
		# hard sigmoid
		y = self.fc.h_sigmoid(y)

		return x * y
예제 #10
0
    def get_active_subnet(self, in_channel, preserve_weight=True):
        # build the new layer
        sub_layer = set_layer_from_config(
            self.get_active_subnet_config(in_channel))
        sub_layer = sub_layer.to(get_net_device(self))
        if not preserve_weight:
            return sub_layer

        middle_channel = self.active_middle_channel(in_channel)
        # copy weight from current layer
        if sub_layer.inverted_bottleneck is not None:
            sub_layer.inverted_bottleneck.conv.weight.data.copy_(
                self.inverted_bottleneck.conv.get_active_filter(
                    middle_channel, in_channel).data, )
            copy_bn(sub_layer.inverted_bottleneck.bn,
                    self.inverted_bottleneck.bn.bn)

        sub_layer.depth_conv.conv.weight.data.copy_(
            self.depth_conv.conv.get_active_filter(
                middle_channel, self.active_kernel_size).data)
        copy_bn(sub_layer.depth_conv.bn, self.depth_conv.bn.bn)

        if self.use_se:
            se_mid = make_divisible(middle_channel // SEModule.REDUCTION,
                                    divisor=MyNetwork.CHANNEL_DIVISIBLE)
            sub_layer.depth_conv.se.fc.reduce.weight.data.copy_(
                self.depth_conv.se.get_active_reduce_weight(
                    se_mid, middle_channel).data)
            sub_layer.depth_conv.se.fc.reduce.bias.data.copy_(
                self.depth_conv.se.get_active_reduce_bias(se_mid).data)

            sub_layer.depth_conv.se.fc.expand.weight.data.copy_(
                self.depth_conv.se.get_active_expand_weight(
                    se_mid, middle_channel).data)
            sub_layer.depth_conv.se.fc.expand.bias.data.copy_(
                self.depth_conv.se.get_active_expand_bias(middle_channel).data)

        sub_layer.point_linear.conv.weight.data.copy_(
            self.point_linear.conv.get_active_filter(self.active_out_channel,
                                                     middle_channel).data)
        copy_bn(sub_layer.point_linear.bn, self.point_linear.bn.bn)

        return sub_layer
예제 #11
0
    def __init__(self,
                 n_classes=1000,
                 width_mult=1.0,
                 bn_param=(0.1, 1e-3),
                 dropout_rate=0.2,
                 ks=None,
                 expand_ratio=None,
                 depth_param=None,
                 stage_width_list=None):

        ks = 3 if ks is None else ks
        expand_ratio = 6 if expand_ratio is None else expand_ratio

        input_channel = 32
        last_channel = 1280

        input_channel = make_divisible(input_channel * width_mult,
                                       MyNetwork.CHANNEL_DIVISIBLE)
        last_channel = make_divisible(last_channel * width_mult, MyNetwork.CHANNEL_DIVISIBLE) \
         if width_mult > 1.0 else last_channel

        inverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [expand_ratio, 24, 2, 2],
            [expand_ratio, 32, 3, 2],
            [expand_ratio, 64, 4, 2],
            [expand_ratio, 96, 3, 1],
            [expand_ratio, 160, 3, 2],
            [expand_ratio, 320, 1, 1],
        ]

        if depth_param is not None:
            assert isinstance(depth_param, int)
            for i in range(1, len(inverted_residual_setting) - 1):
                inverted_residual_setting[i][2] = depth_param

        if stage_width_list is not None:
            for i in range(len(inverted_residual_setting)):
                inverted_residual_setting[i][1] = stage_width_list[i]

        ks = val2list(ks,
                      sum([n for _, _, n, _ in inverted_residual_setting]) - 1)
        _pt = 0

        # first conv layer
        first_conv = ConvLayer(3,
                               input_channel,
                               kernel_size=3,
                               stride=2,
                               use_bn=True,
                               act_func='relu6',
                               ops_order='weight_bn_act')
        # inverted residual blocks
        blocks = []
        for t, c, n, s in inverted_residual_setting:
            output_channel = make_divisible(c * width_mult,
                                            MyNetwork.CHANNEL_DIVISIBLE)
            for i in range(n):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                if t == 1:
                    kernel_size = 3
                else:
                    kernel_size = ks[_pt]
                    _pt += 1
                mobile_inverted_conv = MBConvLayer(
                    in_channels=input_channel,
                    out_channels=output_channel,
                    kernel_size=kernel_size,
                    stride=stride,
                    expand_ratio=t,
                )
                if stride == 1:
                    if input_channel == output_channel:
                        shortcut = IdentityLayer(input_channel, input_channel)
                    else:
                        shortcut = None
                else:
                    shortcut = None
                blocks.append(ResidualBlock(mobile_inverted_conv, shortcut))
                input_channel = output_channel
        # 1x1_conv before global average pooling
        feature_mix_layer = ConvLayer(
            input_channel,
            last_channel,
            kernel_size=1,
            use_bn=True,
            act_func='relu6',
            ops_order='weight_bn_act',
        )

        classifier = LinearLayer(last_channel,
                                 n_classes,
                                 dropout_rate=dropout_rate)

        super(MobileNetV2, self).__init__(first_conv, blocks,
                                          feature_mix_layer, classifier)

        # set bn param
        self.set_bn_param(*bn_param)
예제 #12
0
    def __init__(self,
                 n_classes=1000,
                 width_mult=1.0,
                 bn_param=(0.1, 1e-5),
                 dropout_rate=0,
                 expand_ratio=None,
                 depth_param=None):

        expand_ratio = 0.25 if expand_ratio is None else expand_ratio

        input_channel = make_divisible(64 * width_mult,
                                       MyNetwork.CHANNEL_DIVISIBLE)
        mid_input_channel = make_divisible(input_channel // 2,
                                           MyNetwork.CHANNEL_DIVISIBLE)
        stage_width_list = ResNets.STAGE_WIDTH_LIST.copy()
        for i, width in enumerate(stage_width_list):
            stage_width_list[i] = make_divisible(width * width_mult,
                                                 MyNetwork.CHANNEL_DIVISIBLE)

        depth_list = [3, 4, 6, 3]
        if depth_param is not None:
            for i, depth in enumerate(ResNets.BASE_DEPTH_LIST):
                depth_list[i] = depth + depth_param

        stride_list = [1, 2, 2, 2]

        # build input stem
        input_stem = [
            ConvLayer(3,
                      mid_input_channel,
                      3,
                      stride=2,
                      use_bn=True,
                      act_func='relu'),
            ResidualBlock(
                ConvLayer(mid_input_channel,
                          mid_input_channel,
                          3,
                          stride=1,
                          use_bn=True,
                          act_func='relu'),
                IdentityLayer(mid_input_channel, mid_input_channel)),
            ConvLayer(mid_input_channel,
                      input_channel,
                      3,
                      stride=1,
                      use_bn=True,
                      act_func='relu')
        ]

        # blocks
        blocks = []
        for d, width, s in zip(depth_list, stage_width_list, stride_list):
            for i in range(d):
                stride = s if i == 0 else 1
                bottleneck_block = ResNetBottleneckBlock(
                    input_channel,
                    width,
                    kernel_size=3,
                    stride=stride,
                    expand_ratio=expand_ratio,
                    act_func='relu',
                    downsample_mode='avgpool_conv',
                )
                blocks.append(bottleneck_block)
                input_channel = width
        # classifier
        classifier = LinearLayer(input_channel,
                                 n_classes,
                                 dropout_rate=dropout_rate)

        super(ResNet50D, self).__init__(input_stem, blocks, classifier)

        # set bn param
        self.set_bn_param(*bn_param)
예제 #13
0
    def __init__(self,
                 main_branch,
                 in_channels,
                 out_channels,
                 expand=1.0,
                 kernel_size=3,
                 act_func='relu',
                 n_groups=2,
                 downsample_ratio=2,
                 upsample_type='bilinear',
                 stride=1):
        super(LiteResidualModule, self).__init__()

        self.main_branch = main_branch

        self.lite_residual_config = {
            'in_channels': in_channels,
            'out_channels': out_channels,
            'expand': expand,
            'kernel_size': kernel_size,
            'act_func': act_func,
            'n_groups': n_groups,
            'downsample_ratio': downsample_ratio,
            'upsample_type': upsample_type,
            'stride': stride,
        }

        kernel_size = 1 if downsample_ratio is None else kernel_size

        padding = get_same_padding(kernel_size)
        if downsample_ratio is None:
            pooling = MyGlobalAvgPool2d()
        else:
            pooling = nn.AvgPool2d(downsample_ratio, downsample_ratio, 0)
        num_mid = make_divisible(int(in_channels * expand),
                                 divisor=MyNetwork.CHANNEL_DIVISIBLE)
        self.lite_residual = nn.Sequential(
            OrderedDict({
                'pooling':
                pooling,
                'conv1':
                nn.Conv2d(in_channels,
                          num_mid,
                          kernel_size,
                          stride,
                          padding,
                          groups=n_groups,
                          bias=False),
                'bn1':
                nn.BatchNorm2d(num_mid),
                'act':
                build_activation(act_func),
                'conv2':
                nn.Conv2d(num_mid, out_channels, 1, 1, 0, bias=False),
                'final_bn':
                nn.BatchNorm2d(out_channels),
            }))

        # initialize
        init_models(self.lite_residual)
        self.lite_residual.final_bn.weight.data.zero_()
예제 #14
0
#for i, stage in enumerate(stages):
#    depth = sample['d'][i]
#    kernels = sample['ks'][4*i: 4*(i+1)]
#    expand_ratios = sample['e'][4*i: 4*(i+1)]

out_channels = [model.blocks[i].mobile_inverted_conv.point_linear.conv.conv.out_channels for i in range(1, 21)]
out_channels = [model.blocks[0].mobile_inverted_conv.point_linear.conv.out_channels] + out_channels
l2_squared = np.zeros([20, 3, 3], dtype=np.float)
for i in range(20):
    for k in [3, 5, 7]:
        for e in [3, 4, 6]:
            l2 = 0.0
            module = model.blocks[i+1].mobile_inverted_conv
            in_channel = out_channels[i]
            mid_channels = make_divisible(round(in_channel * e), 8)

            if module.inverted_bottleneck is not None:
                l2 += torch.norm(module.inverted_bottleneck.conv.conv.weight[:mid_channels, :in_channel, :, :], p=1)
                l2 += torch.norm(module.inverted_bottleneck.bn.bn.weight[:mid_channels], p=1)
                if module.inverted_bottleneck.bn.bn.bias is not None:
                    l2 += torch.norm(module.inverted_bottleneck.bn.bn.bias[:mid_channels], p=1)

            l2 += torch.norm(module.depth_conv.conv.get_active_filter(mid_channels, k), p=1)
            l2 += torch.norm(module.depth_conv.bn.bn.weight[:mid_channels], p=1)
            if module.depth_conv.bn.bn.bias is not None:
                l2 += torch.norm(module.depth_conv.bn.bn.bias[:mid_channels], p=1)
            if hasattr(module.depth_conv, 'se'):
                se_channel = make_divisible(mid_channels // module.depth_conv.se.reduction, divisor=8)
                l2 += torch.norm(module.depth_conv.se.fc.reduce.weight[:se_channel, :mid_channels, :, :], p=1)
                if module.depth_conv.se.fc.reduce.bias is not None:
예제 #15
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size=3,
                 stride=1,
                 expand_ratio=0.25,
                 mid_channels=None,
                 act_func='relu',
                 groups=1,
                 downsample_mode='avgpool_conv'):
        super(ResNetBottleneckBlock, self).__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels

        self.kernel_size = kernel_size
        self.stride = stride
        self.expand_ratio = expand_ratio
        self.mid_channels = mid_channels
        self.act_func = act_func
        self.groups = groups

        self.downsample_mode = downsample_mode

        if self.mid_channels is None:
            feature_dim = round(self.out_channels * self.expand_ratio)
        else:
            feature_dim = self.mid_channels

        feature_dim = make_divisible(feature_dim, MyNetwork.CHANNEL_DIVISIBLE)
        self.mid_channels = feature_dim

        # build modules
        self.conv1 = nn.Sequential(
            OrderedDict([
                ('conv',
                 nn.Conv2d(self.in_channels, feature_dim, 1, 1, 0,
                           bias=False)),
                ('bn', nn.BatchNorm2d(feature_dim)),
                ('act', build_activation(self.act_func, inplace=True)),
            ]))

        pad = get_same_padding(self.kernel_size)
        self.conv2 = nn.Sequential(
            OrderedDict([('conv',
                          nn.Conv2d(feature_dim,
                                    feature_dim,
                                    kernel_size,
                                    stride,
                                    pad,
                                    groups=groups,
                                    bias=False)),
                         ('bn', nn.BatchNorm2d(feature_dim)),
                         ('act', build_activation(self.act_func,
                                                  inplace=True))]))

        self.conv3 = nn.Sequential(
            OrderedDict([
                ('conv',
                 nn.Conv2d(feature_dim, self.out_channels, 1, 1, 0,
                           bias=False)),
                ('bn', nn.BatchNorm2d(self.out_channels)),
            ]))

        if stride == 1 and in_channels == out_channels:
            self.downsample = IdentityLayer(in_channels, out_channels)
        elif self.downsample_mode == 'conv':
            self.downsample = nn.Sequential(
                OrderedDict([
                    ('conv',
                     nn.Conv2d(in_channels,
                               out_channels,
                               1,
                               stride,
                               0,
                               bias=False)),
                    ('bn', nn.BatchNorm2d(out_channels)),
                ]))
        elif self.downsample_mode == 'avgpool_conv':
            self.downsample = nn.Sequential(
                OrderedDict([
                    ('avg_pool',
                     nn.AvgPool2d(kernel_size=stride,
                                  stride=stride,
                                  padding=0,
                                  ceil_mode=True)),
                    ('conv',
                     nn.Conv2d(in_channels, out_channels, 1, 1, 0,
                               bias=False)),
                    ('bn', nn.BatchNorm2d(out_channels)),
                ]))
        else:
            raise NotImplementedError

        self.final_act = build_activation(self.act_func, inplace=True)
예제 #16
0
	def __init__(self, n_classes=1000, bn_param=(0.1, 1e-3), dropout_rate=0.1, base_stage_width=None, width_mult=1.0,
	             ks_list=3, expand_ratio_list=6, depth_list=4):

		self.width_mult = width_mult
		self.ks_list = val2list(ks_list, 1)
		self.expand_ratio_list = val2list(expand_ratio_list, 1)
		self.depth_list = val2list(depth_list, 1)

		self.ks_list.sort()
		self.expand_ratio_list.sort()
		self.depth_list.sort()

		if base_stage_width == 'google':
			# MobileNetV2 Stage Width
			base_stage_width = [32, 16, 24, 32, 64, 96, 160, 320, 1280]
		else:
			# ProxylessNAS Stage Width
			base_stage_width = [32, 16, 24, 40, 80, 96, 192, 320, 1280]

		input_channel = make_divisible(base_stage_width[0] * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE)
		first_block_width = make_divisible(base_stage_width[1] * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE)
		last_channel = make_divisible(base_stage_width[-1] * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE)

		# first conv layer
		first_conv = ConvLayer(
			3, input_channel, kernel_size=3, stride=2, use_bn=True, act_func='relu6', ops_order='weight_bn_act'
		)
		# first block
		first_block_conv = MBConvLayer(
			in_channels=input_channel, out_channels=first_block_width, kernel_size=3, stride=1,
			expand_ratio=1, act_func='relu6',
		)
		first_block = ResidualBlock(first_block_conv, None)

		input_channel = first_block_width
		# inverted residual blocks
		self.block_group_info = []
		blocks = [first_block]
		_block_index = 1

		stride_stages = [2, 2, 2, 1, 2, 1]
		n_block_list = [max(self.depth_list)] * 5 + [1]

		width_list = []
		for base_width in base_stage_width[2:-1]:
			width = make_divisible(base_width * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE)
			width_list.append(width)

		for width, n_block, s in zip(width_list, n_block_list, stride_stages):
			self.block_group_info.append([_block_index + i for i in range(n_block)])
			_block_index += n_block

			output_channel = width
			for i in range(n_block):
				if i == 0:
					stride = s
				else:
					stride = 1

				mobile_inverted_conv = DynamicMBConvLayer(
					in_channel_list=val2list(input_channel, 1), out_channel_list=val2list(output_channel, 1),
					kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func='relu6',
				)

				if stride == 1 and input_channel == output_channel:
					shortcut = IdentityLayer(input_channel, input_channel)
				else:
					shortcut = None

				mb_inverted_block = ResidualBlock(mobile_inverted_conv, shortcut)

				blocks.append(mb_inverted_block)
				input_channel = output_channel
		# 1x1_conv before global average pooling
		feature_mix_layer = ConvLayer(
			input_channel, last_channel, kernel_size=1, use_bn=True, act_func='relu6',
		)
		classifier = LinearLayer(last_channel, n_classes, dropout_rate=dropout_rate)

		super(OFAProxylessNASNets, self).__init__(first_conv, blocks, feature_mix_layer, classifier)

		# set bn param
		self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

		# runtime_depth
		self.runtime_depth = [len(block_idx) for block_idx in self.block_group_info]
예제 #17
0
    def __init__(self,
                 n_classes=1000,
                 bn_param=(0.1, 1e-5),
                 dropout_rate=0.1,
                 base_stage_width=None,
                 width_mult=1.0,
                 ks_list=3,
                 expand_ratio_list=6,
                 depth_list=4):

        self.width_mult = width_mult
        self.ks_list = val2list(ks_list, 1)
        self.expand_ratio_list = val2list(expand_ratio_list, 1)
        self.depth_list = val2list(depth_list, 1)

        self.ks_list.sort()
        self.expand_ratio_list.sort()
        self.depth_list.sort()

        base_stage_width = [16, 16, 24, 40, 80, 112, 160, 960, 1280]

        final_expand_width = make_divisible(
            base_stage_width[-2] * self.width_mult,
            MyNetwork.CHANNEL_DIVISIBLE)
        last_channel = make_divisible(base_stage_width[-1] * self.width_mult,
                                      MyNetwork.CHANNEL_DIVISIBLE)

        stride_stages = [1, 2, 2, 2, 1, 2]
        act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish']
        se_stages = [False, False, True, False, True, True]
        n_block_list = [1] + [max(self.depth_list)] * 5
        width_list = []
        for base_width in base_stage_width[:-2]:
            width = make_divisible(base_width * self.width_mult,
                                   MyNetwork.CHANNEL_DIVISIBLE)
            width_list.append(width)

        input_channel, first_block_dim = width_list[0], width_list[1]
        # first conv layer
        first_conv = ConvLayer(3,
                               input_channel,
                               kernel_size=3,
                               stride=2,
                               act_func='h_swish')
        first_block_conv = MBConvLayer(
            in_channels=input_channel,
            out_channels=first_block_dim,
            kernel_size=3,
            stride=stride_stages[0],
            expand_ratio=1,
            act_func=act_stages[0],
            use_se=se_stages[0],
        )
        first_block = ResidualBlock(
            first_block_conv,
            IdentityLayer(first_block_dim, first_block_dim)
            if input_channel == first_block_dim else None,
        )

        # inverted residual blocks
        self.block_group_info = []
        blocks = [first_block]
        _block_index = 1
        feature_dim = first_block_dim

        for width, n_block, s, act_func, use_se in zip(width_list[2:],
                                                       n_block_list[1:],
                                                       stride_stages[1:],
                                                       act_stages[1:],
                                                       se_stages[1:]):
            self.block_group_info.append(
                [_block_index + i for i in range(n_block)])
            _block_index += n_block

            output_channel = width
            for i in range(n_block):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                mobile_inverted_conv = DynamicMBConvLayer(
                    in_channel_list=val2list(feature_dim),
                    out_channel_list=val2list(output_channel),
                    kernel_size_list=ks_list,
                    expand_ratio_list=expand_ratio_list,
                    stride=stride,
                    act_func=act_func,
                    use_se=use_se,
                )
                if stride == 1 and feature_dim == output_channel:
                    shortcut = IdentityLayer(feature_dim, feature_dim)
                else:
                    shortcut = None
                blocks.append(ResidualBlock(mobile_inverted_conv, shortcut))
                feature_dim = output_channel
        # final expand layer, feature mix layer & classifier
        final_expand_layer = ConvLayer(feature_dim,
                                       final_expand_width,
                                       kernel_size=1,
                                       act_func='h_swish')
        feature_mix_layer = ConvLayer(
            final_expand_width,
            last_channel,
            kernel_size=1,
            bias=False,
            use_bn=False,
            act_func='h_swish',
        )

        classifier = LinearLayer(last_channel,
                                 n_classes,
                                 dropout_rate=dropout_rate)

        super(OFAMobileNetV3,
              self).__init__(first_conv, blocks, final_expand_layer,
                             feature_mix_layer, classifier)

        # set bn param
        self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

        # runtime_depth
        self.runtime_depth = [
            len(block_idx) for block_idx in self.block_group_info
        ]
예제 #18
0
 def active_middle_channel(self, in_channel):
     return make_divisible(round(in_channel * self.active_expand_ratio), MyNetwork.CHANNEL_DIVISIBLE)
예제 #19
0
    def __init__(self,
                 n_classes=1000,
                 width_mult=1.0,
                 bn_param=(0.1, 1e-5),
                 dropout_rate=0.2,
                 ks=None,
                 expand_ratio=None,
                 depth_param=None,
                 stage_width_list=None):
        input_channel = 16
        last_channel = 1280

        input_channel = make_divisible(input_channel * width_mult,
                                       MyNetwork.CHANNEL_DIVISIBLE)
        last_channel = make_divisible(last_channel * width_mult, MyNetwork.CHANNEL_DIVISIBLE) \
         if width_mult > 1.0 else last_channel

        cfg = {
            #    k,     exp,    c,      se,         nl,         s,      e,
            '0': [
                [3, 16, 16, False, 'relu', 1, 1],
            ],
            '1': [
                [3, 64, 24, False, 'relu', 2, None],  # 4
                [3, 72, 24, False, 'relu', 1, None],  # 3
            ],
            '2': [
                [5, 72, 40, True, 'relu', 2, None],  # 3
                [5, 120, 40, True, 'relu', 1, None],  # 3
                [5, 120, 40, True, 'relu', 1, None],  # 3
            ],
            '3': [
                [3, 240, 80, False, 'h_swish', 2, None],  # 6
                [3, 200, 80, False, 'h_swish', 1, None],  # 2.5
                [3, 184, 80, False, 'h_swish', 1, None],  # 2.3
                [3, 184, 80, False, 'h_swish', 1, None],  # 2.3
            ],
            '4': [
                [3, 480, 112, True, 'h_swish', 1, None],  # 6
                [3, 672, 112, True, 'h_swish', 1, None],  # 6
            ],
            '5': [
                [5, 672, 160, True, 'h_swish', 2, None],  # 6
                [5, 960, 160, True, 'h_swish', 1, None],  # 6
                [5, 960, 160, True, 'h_swish', 1, None],  # 6
            ]
        }

        cfg = self.adjust_cfg(cfg, ks, expand_ratio, depth_param,
                              stage_width_list)
        # width multiplier on mobile setting, change `exp: 1` and `c: 2`
        for stage_id, block_config_list in cfg.items():
            for block_config in block_config_list:
                if block_config[1] is not None:
                    block_config[1] = make_divisible(
                        block_config[1] * width_mult,
                        MyNetwork.CHANNEL_DIVISIBLE)
                block_config[2] = make_divisible(block_config[2] * width_mult,
                                                 MyNetwork.CHANNEL_DIVISIBLE)

        first_conv, blocks, final_expand_layer, feature_mix_layer, classifier = self.build_net_via_cfg(
            cfg, input_channel, last_channel, n_classes, dropout_rate)
        super(MobileNetV3Large,
              self).__init__(first_conv, blocks, final_expand_layer,
                             feature_mix_layer, classifier)
        # set bn param
        self.set_bn_param(*bn_param)
예제 #20
0
 def active_middle_channels(self):
     feature_dim = round(self.active_out_channel * self.active_expand_ratio)
     feature_dim = make_divisible(feature_dim, MyNetwork.CHANNEL_DIVISIBLE)
     return feature_dim