Beispiel #1
0
    def get_active_net_config(self):
        input_stem_config = [self.input_stem[0].get_active_subnet_config(3)]
        if self.input_stem_skipping <= 0:
            input_stem_config.append({
                'name':
                ResidualBlock.__name__,
                'conv':
                self.input_stem[1].conv.get_active_subnet_config(
                    self.input_stem[0].active_out_channel),
                'shortcut':
                IdentityLayer(self.input_stem[0].active_out_channel,
                              self.input_stem[0].active_out_channel),
            })
        input_stem_config.append(self.input_stem[2].get_active_subnet_config(
            self.input_stem[0].active_out_channel))
        input_channel = self.input_stem[2].active_out_channel

        blocks_config = []
        for stage_id, block_idx in enumerate(self.grouped_block_index):
            depth_param = self.runtime_depth[stage_id]
            active_idx = block_idx[:len(block_idx) - depth_param]
            for idx in active_idx:
                blocks_config.append(
                    self.blocks[idx].get_active_subnet_config(input_channel))
                input_channel = self.blocks[idx].active_out_channel
        classifier_config = self.classifier.get_active_subnet_config(
            input_channel)
        return {
            'name': ResNets.__name__,
            'bn': self.get_bn_param(),
            'input_stem': input_stem_config,
            'blocks': blocks_config,
            'classifier': classifier_config,
        }
Beispiel #2
0
    def get_active_subnet(self, preserve_weight=True):
        input_stem = [self.input_stem[0].get_active_subnet(3, preserve_weight)]
        if self.input_stem_skipping <= 0:
            input_stem.append(
                ResidualBlock(
                    self.input_stem[1].conv.get_active_subnet(
                        self.input_stem[0].active_out_channel,
                        preserve_weight),
                    IdentityLayer(self.input_stem[0].active_out_channel,
                                  self.input_stem[0].active_out_channel)))
        input_stem.append(self.input_stem[2].get_active_subnet(
            self.input_stem[0].active_out_channel, preserve_weight))
        input_channel = self.input_stem[2].active_out_channel

        blocks = []
        for stage_id, block_idx in enumerate(self.grouped_block_index):
            depth_param = self.runtime_depth[stage_id]
            active_idx = block_idx[:len(block_idx) - depth_param]
            for idx in active_idx:
                blocks.append(self.blocks[idx].get_active_subnet(
                    input_channel, preserve_weight))
                input_channel = self.blocks[idx].active_out_channel
        classifier = self.classifier.get_active_subnet(input_channel,
                                                       preserve_weight)
        subnet = ResNets(input_stem, blocks, classifier)

        subnet.set_bn_param(**self.get_bn_param())
        return subnet
Beispiel #3
0
    def build_net_via_cfg(cfg, input_channel, last_channel, n_classes,
                          dropout_rate):
        # first conv layer
        first_conv = ConvLayer(3,
                               input_channel,
                               kernel_size=3,
                               stride=2,
                               use_bn=True,
                               act_func='h_swish',
                               ops_order='weight_bn_act')
        # build mobile blocks
        feature_dim = input_channel
        blocks = []
        for stage_id, block_config_list in cfg.items():
            for k, mid_channel, out_channel, use_se, act_func, stride, expand_ratio in block_config_list:
                mb_conv = MBConvLayer(feature_dim, out_channel, k, stride,
                                      expand_ratio, mid_channel, act_func,
                                      use_se)
                if stride == 1 and out_channel == feature_dim:
                    shortcut = IdentityLayer(out_channel, out_channel)
                else:
                    shortcut = None
                blocks.append(ResidualBlock(mb_conv, shortcut))
                feature_dim = out_channel
        # final expand layer
        final_expand_layer = ConvLayer(
            feature_dim,
            feature_dim * 6,
            kernel_size=1,
            use_bn=True,
            act_func='h_swish',
            ops_order='weight_bn_act',
        )
        # feature mix layer
        feature_mix_layer = ConvLayer(
            feature_dim * 6,
            last_channel,
            kernel_size=1,
            bias=False,
            use_bn=False,
            act_func='h_swish',
        )
        # classifier
        classifier = LinearLayer(last_channel,
                                 n_classes,
                                 dropout_rate=dropout_rate)

        return first_conv, blocks, final_expand_layer, feature_mix_layer, classifier
Beispiel #4
0
	def __init__(self, n_classes=1000, width_mult=1.0, bn_param=(0.1, 1e-5), dropout_rate=0,
	             expand_ratio=None, depth_param=None):

		expand_ratio = 0.25 if expand_ratio is None else expand_ratio

		input_channel = make_divisible(64 * width_mult, MyNetwork.CHANNEL_DIVISIBLE)
		mid_input_channel = make_divisible(input_channel // 2, MyNetwork.CHANNEL_DIVISIBLE)
		stage_width_list = ResNets.STAGE_WIDTH_LIST.copy()
		for i, width in enumerate(stage_width_list):
			stage_width_list[i] = make_divisible(width * width_mult, MyNetwork.CHANNEL_DIVISIBLE)

		depth_list = [3, 4, 6, 3]
		if depth_param is not None:
			for i, depth in enumerate(ResNets.BASE_DEPTH_LIST):
				depth_list[i] = depth + depth_param

		stride_list = [1, 2, 2, 2]

		# build input stem
		input_stem = [
			ConvLayer(3, mid_input_channel, 3, stride=2, use_bn=True, act_func='relu'),
			ResidualBlock(
				ConvLayer(mid_input_channel, mid_input_channel, 3, stride=1, use_bn=True, act_func='relu'),
				IdentityLayer(mid_input_channel, mid_input_channel)
			),
			ConvLayer(mid_input_channel, input_channel, 3, stride=1, use_bn=True, act_func='relu')
		]

		# blocks
		blocks = []
		for d, width, s in zip(depth_list, stage_width_list, stride_list):
			for i in range(d):
				stride = s if i == 0 else 1
				bottleneck_block = ResNetBottleneckBlock(
					input_channel, width, kernel_size=3, stride=stride, expand_ratio=expand_ratio,
					act_func='relu', downsample_mode='avgpool_conv',
				)
				blocks.append(bottleneck_block)
				input_channel = width
		# classifier
		classifier = LinearLayer(input_channel, n_classes, dropout_rate=dropout_rate)

		super(ResNet50D, self).__init__(input_stem, blocks, classifier)

		# set bn param
		self.set_bn_param(*bn_param)
Beispiel #5
0
    def __init__(self,
                 in_channel_list,
                 out_channel_list,
                 expand_ratio_list=0.25,
                 kernel_size=3,
                 stride=1,
                 act_func='relu',
                 downsample_mode='avgpool_conv'):
        super(DynamicResNetBottleneckBlock, self).__init__()

        self.in_channel_list = in_channel_list
        self.out_channel_list = out_channel_list
        self.expand_ratio_list = val2list(expand_ratio_list)

        self.kernel_size = kernel_size
        self.stride = stride
        self.act_func = act_func
        self.downsample_mode = downsample_mode

        # build modules
        max_middle_channel = make_divisible(
            round(max(self.out_channel_list) * max(self.expand_ratio_list)),
            MyNetwork.CHANNEL_DIVISIBLE)

        self.conv1 = nn.Sequential(
            OrderedDict([
                ('conv',
                 DynamicConv2d(max(self.in_channel_list), max_middle_channel)),
                ('bn', DynamicBatchNorm2d(max_middle_channel)),
                ('act', build_activation(self.act_func, inplace=True)),
            ]))

        self.conv2 = nn.Sequential(
            OrderedDict([('conv',
                          DynamicConv2d(max_middle_channel, max_middle_channel,
                                        kernel_size, stride)),
                         ('bn', DynamicBatchNorm2d(max_middle_channel)),
                         ('act', build_activation(self.act_func,
                                                  inplace=True))]))

        self.conv3 = nn.Sequential(
            OrderedDict([
                ('conv',
                 DynamicConv2d(max_middle_channel,
                               max(self.out_channel_list))),
                ('bn', DynamicBatchNorm2d(max(self.out_channel_list))),
            ]))

        if self.stride == 1 and self.in_channel_list == self.out_channel_list:
            self.downsample = IdentityLayer(max(self.in_channel_list),
                                            max(self.out_channel_list))
        elif self.downsample_mode == 'conv':
            self.downsample = nn.Sequential(
                OrderedDict([
                    ('conv',
                     DynamicConv2d(max(self.in_channel_list),
                                   max(self.out_channel_list),
                                   stride=stride)),
                    ('bn', DynamicBatchNorm2d(max(self.out_channel_list))),
                ]))
        elif self.downsample_mode == 'avgpool_conv':
            self.downsample = nn.Sequential(
                OrderedDict([
                    ('avg_pool',
                     nn.AvgPool2d(kernel_size=stride,
                                  stride=stride,
                                  padding=0,
                                  ceil_mode=True)),
                    ('conv',
                     DynamicConv2d(max(self.in_channel_list),
                                   max(self.out_channel_list))),
                    ('bn', DynamicBatchNorm2d(max(self.out_channel_list))),
                ]))
        else:
            raise NotImplementedError

        self.final_act = build_activation(self.act_func, inplace=True)

        self.active_expand_ratio = max(self.expand_ratio_list)
        self.active_out_channel = max(self.out_channel_list)
Beispiel #6
0
    def __init__(self,
                 n_classes=1000,
                 bn_param=(0.1, 1e-3),
                 dropout_rate=0.1,
                 base_stage_width=None,
                 width_mult=1.0,
                 ks_list=3,
                 expand_ratio_list=6,
                 depth_list=4):

        self.width_mult = width_mult
        self.ks_list = val2list(ks_list, 1)
        self.expand_ratio_list = val2list(expand_ratio_list, 1)
        self.depth_list = val2list(depth_list, 1)

        self.ks_list.sort()
        self.expand_ratio_list.sort()
        self.depth_list.sort()

        if base_stage_width == 'google':
            # MobileNetV2 Stage Width
            base_stage_width = [32, 16, 24, 32, 64, 96, 160, 320, 1280]
        else:
            # ProxylessNAS Stage Width
            base_stage_width = [32, 16, 24, 40, 80, 96, 192, 320, 1280]

        input_channel = make_divisible(base_stage_width[0] * self.width_mult,
                                       MyNetwork.CHANNEL_DIVISIBLE)
        first_block_width = make_divisible(
            base_stage_width[1] * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE)
        last_channel = make_divisible(base_stage_width[-1] * self.width_mult,
                                      MyNetwork.CHANNEL_DIVISIBLE)

        # first conv layer
        first_conv = ConvLayer(3,
                               input_channel,
                               kernel_size=3,
                               stride=2,
                               use_bn=True,
                               act_func='relu6',
                               ops_order='weight_bn_act')
        # first block
        first_block_conv = MBConvLayer(
            in_channels=input_channel,
            out_channels=first_block_width,
            kernel_size=3,
            stride=1,
            expand_ratio=1,
            act_func='relu6',
        )
        first_block = ResidualBlock(first_block_conv, None)

        input_channel = first_block_width
        # inverted residual blocks
        self.block_group_info = []
        blocks = [first_block]
        _block_index = 1

        stride_stages = [2, 2, 2, 1, 2, 1]
        n_block_list = [max(self.depth_list)] * 5 + [1]

        width_list = []
        for base_width in base_stage_width[2:-1]:
            width = make_divisible(base_width * self.width_mult,
                                   MyNetwork.CHANNEL_DIVISIBLE)
            width_list.append(width)

        for width, n_block, s in zip(width_list, n_block_list, stride_stages):
            self.block_group_info.append(
                [_block_index + i for i in range(n_block)])
            _block_index += n_block

            output_channel = width
            for i in range(n_block):
                if i == 0:
                    stride = s
                else:
                    stride = 1

                mobile_inverted_conv = DynamicMBConvLayer(
                    in_channel_list=val2list(input_channel, 1),
                    out_channel_list=val2list(output_channel, 1),
                    kernel_size_list=ks_list,
                    expand_ratio_list=expand_ratio_list,
                    stride=stride,
                    act_func='relu6',
                )

                if stride == 1 and input_channel == output_channel:
                    shortcut = IdentityLayer(input_channel, input_channel)
                else:
                    shortcut = None

                mb_inverted_block = ResidualBlock(mobile_inverted_conv,
                                                  shortcut)

                blocks.append(mb_inverted_block)
                input_channel = output_channel
        # 1x1_conv before global average pooling
        feature_mix_layer = ConvLayer(
            input_channel,
            last_channel,
            kernel_size=1,
            use_bn=True,
            act_func='relu6',
        )
        classifier = LinearLayer(last_channel,
                                 n_classes,
                                 dropout_rate=dropout_rate)

        super(OFAProxylessNASNets,
              self).__init__(first_conv, blocks, feature_mix_layer, classifier)

        # set bn param
        self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

        # runtime_depth
        self.runtime_depth = [
            len(block_idx) for block_idx in self.block_group_info
        ]
Beispiel #7
0
    def __init__(self,
                 n_classes=1000,
                 bn_param=(0.1, 1e-5),
                 dropout_rate=0.1,
                 base_stage_width=None,
                 width_mult=1.0,
                 ks_list=3,
                 expand_ratio_list=6,
                 depth_list=4,
                 dropblock=False,
                 block_size=0):

        self.width_mult = width_mult
        self.ks_list = val2list(ks_list, 1)
        self.expand_ratio_list = val2list(expand_ratio_list, 1)
        self.depth_list = val2list(depth_list, 1)

        self.ks_list.sort()
        self.expand_ratio_list.sort()
        self.depth_list.sort()

        base_stage_width = [16, 16, 24, 40, 80, 112, 160, 960, 1280]

        final_expand_width = make_divisible(
            base_stage_width[-2] * self.width_mult,
            MyNetwork.CHANNEL_DIVISIBLE)
        last_channel = make_divisible(base_stage_width[-1] * self.width_mult,
                                      MyNetwork.CHANNEL_DIVISIBLE)

        stride_stages = [1, 2, 2, 2, 1, 2]
        act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish']
        se_stages = [False, False, True, False, True, True]
        n_block_list = [1] + [max(self.depth_list)] * 5
        width_list = []
        for base_width in base_stage_width[:-2]:
            width = make_divisible(base_width * self.width_mult,
                                   MyNetwork.CHANNEL_DIVISIBLE)
            width_list.append(width)

        input_channel, first_block_dim = width_list[0], width_list[1]
        # first conv layer
        first_conv = ConvLayer(3,
                               input_channel,
                               kernel_size=3,
                               stride=2,
                               act_func='h_swish')
        first_block_conv = MBConvLayer(
            in_channels=input_channel,
            out_channels=first_block_dim,
            kernel_size=3,
            stride=stride_stages[0],
            expand_ratio=1,
            act_func=act_stages[0],
            use_se=se_stages[0],
        )
        first_block = ResidualBlock(
            first_block_conv,
            IdentityLayer(first_block_dim, first_block_dim) if input_channel
            == first_block_dim else None, dropout_rate, dropblock, block_size)

        # inverted residual blocks
        self.block_group_info = []
        blocks = [first_block]
        _block_index = 1
        feature_dim = first_block_dim

        for width, n_block, s, act_func, use_se in zip(width_list[2:],
                                                       n_block_list[1:],
                                                       stride_stages[1:],
                                                       act_stages[1:],
                                                       se_stages[1:]):
            self.block_group_info.append(
                [_block_index + i for i in range(n_block)])
            _block_index += n_block

            output_channel = width
            for i in range(n_block):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                mobile_inverted_conv = DynamicMBConvLayer(
                    in_channel_list=val2list(feature_dim),
                    out_channel_list=val2list(output_channel),
                    kernel_size_list=ks_list,
                    expand_ratio_list=expand_ratio_list,
                    stride=stride,
                    act_func=act_func,
                    use_se=use_se,
                )
                if stride == 1 and feature_dim == output_channel:
                    shortcut = IdentityLayer(feature_dim, feature_dim)
                else:
                    shortcut = None
                blocks.append(
                    ResidualBlock(mobile_inverted_conv, shortcut, dropout_rate,
                                  dropblock, block_size))
                feature_dim = output_channel
        # final expand layer, feature mix layer & classifier
        final_expand_layer = ConvLayer(feature_dim,
                                       final_expand_width,
                                       kernel_size=1,
                                       act_func='h_swish')
        feature_mix_layer = ConvLayer(
            final_expand_width,
            last_channel,
            kernel_size=1,
            bias=False,
            use_bn=False,
            act_func='h_swish',
        )

        classifier = LinearLayer(last_channel,
                                 n_classes,
                                 dropout_rate=dropout_rate)

        super(OFAMobileNetV3,
              self).__init__(first_conv, blocks, final_expand_layer,
                             feature_mix_layer, classifier)

        # set bn param
        self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

        # runtime_depth
        self.runtime_depth = [
            len(block_idx) for block_idx in self.block_group_info
        ]
Beispiel #8
0
    def __init__(self,
                 n_classes=1000,
                 bn_param=(0.1, 1e-5),
                 dropout_rate=0,
                 depth_list=2,
                 expand_ratio_list=0.25,
                 width_mult_list=1.0):

        self.depth_list = val2list(depth_list)
        self.expand_ratio_list = val2list(expand_ratio_list)
        self.width_mult_list = val2list(width_mult_list)
        # sort
        self.depth_list.sort()
        self.expand_ratio_list.sort()
        self.width_mult_list.sort()

        input_channel = [
            make_divisible(64 * width_mult, MyNetwork.CHANNEL_DIVISIBLE)
            for width_mult in self.width_mult_list
        ]
        mid_input_channel = [
            make_divisible(channel // 2, MyNetwork.CHANNEL_DIVISIBLE)
            for channel in input_channel
        ]

        stage_width_list = ResNets.STAGE_WIDTH_LIST.copy()
        for i, width in enumerate(stage_width_list):
            stage_width_list[i] = [
                make_divisible(width * width_mult, MyNetwork.CHANNEL_DIVISIBLE)
                for width_mult in self.width_mult_list
            ]

        n_block_list = [
            base_depth + max(self.depth_list)
            for base_depth in ResNets.BASE_DEPTH_LIST
        ]
        stride_list = [1, 2, 2, 2]

        # build input stem
        input_stem = [
            DynamicConvLayer(val2list(3),
                             mid_input_channel,
                             3,
                             stride=2,
                             use_bn=True,
                             act_func='relu'),
            ResidualBlock(
                DynamicConvLayer(mid_input_channel,
                                 mid_input_channel,
                                 3,
                                 stride=1,
                                 use_bn=True,
                                 act_func='relu'),
                IdentityLayer(mid_input_channel, mid_input_channel)),
            DynamicConvLayer(mid_input_channel,
                             input_channel,
                             3,
                             stride=1,
                             use_bn=True,
                             act_func='relu')
        ]

        # blocks
        blocks = []
        for d, width, s in zip(n_block_list, stage_width_list, stride_list):
            for i in range(d):
                stride = s if i == 0 else 1
                bottleneck_block = DynamicResNetBottleneckBlock(
                    input_channel,
                    width,
                    expand_ratio_list=self.expand_ratio_list,
                    kernel_size=3,
                    stride=stride,
                    act_func='relu',
                    downsample_mode='avgpool_conv',
                )
                blocks.append(bottleneck_block)
                input_channel = width
        # classifier
        classifier = DynamicLinearLayer(input_channel,
                                        n_classes,
                                        dropout_rate=dropout_rate)

        super(OFAResNets, self).__init__(input_stem, blocks, classifier)

        # set bn param
        self.set_bn_param(*bn_param)

        # runtime_depth
        self.input_stem_skipping = 0
        self.runtime_depth = [0] * len(n_block_list)
Beispiel #9
0
    def __init__(self,
                 n_classes=1000,
                 width_mult=1.0,
                 bn_param=(0.1, 1e-3),
                 dropout_rate=0.2,
                 ks=None,
                 expand_ratio=None,
                 depth_param=None,
                 stage_width_list=None):

        ks = 3 if ks is None else ks
        expand_ratio = 6 if expand_ratio is None else expand_ratio

        input_channel = 32
        last_channel = 1280

        input_channel = make_divisible(input_channel * width_mult,
                                       MyNetwork.CHANNEL_DIVISIBLE)
        last_channel = make_divisible(last_channel * width_mult, MyNetwork.CHANNEL_DIVISIBLE) \
         if width_mult > 1.0 else last_channel

        inverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [expand_ratio, 24, 2, 2],
            [expand_ratio, 32, 3, 2],
            [expand_ratio, 64, 4, 2],
            [expand_ratio, 96, 3, 1],
            [expand_ratio, 160, 3, 2],
            [expand_ratio, 320, 1, 1],
        ]

        if depth_param is not None:
            assert isinstance(depth_param, int)
            for i in range(1, len(inverted_residual_setting) - 1):
                inverted_residual_setting[i][2] = depth_param

        if stage_width_list is not None:
            for i in range(len(inverted_residual_setting)):
                inverted_residual_setting[i][1] = stage_width_list[i]

        ks = val2list(ks,
                      sum([n for _, _, n, _ in inverted_residual_setting]) - 1)
        _pt = 0

        # first conv layer
        first_conv = ConvLayer(3,
                               input_channel,
                               kernel_size=3,
                               stride=2,
                               use_bn=True,
                               act_func='relu6',
                               ops_order='weight_bn_act')
        # inverted residual blocks
        blocks = []
        for t, c, n, s in inverted_residual_setting:
            output_channel = make_divisible(c * width_mult,
                                            MyNetwork.CHANNEL_DIVISIBLE)
            for i in range(n):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                if t == 1:
                    kernel_size = 3
                else:
                    kernel_size = ks[_pt]
                    _pt += 1
                mobile_inverted_conv = MBConvLayer(
                    in_channels=input_channel,
                    out_channels=output_channel,
                    kernel_size=kernel_size,
                    stride=stride,
                    expand_ratio=t,
                )
                if stride == 1:
                    if input_channel == output_channel:
                        shortcut = IdentityLayer(input_channel, input_channel)
                    else:
                        shortcut = None
                else:
                    shortcut = None
                blocks.append(ResidualBlock(mobile_inverted_conv, shortcut))
                input_channel = output_channel
        # 1x1_conv before global average pooling
        feature_mix_layer = ConvLayer(
            input_channel,
            last_channel,
            kernel_size=1,
            use_bn=True,
            act_func='relu6',
            ops_order='weight_bn_act',
        )

        classifier = LinearLayer(last_channel,
                                 n_classes,
                                 dropout_rate=dropout_rate)

        super(MobileNetV2, self).__init__(first_conv, blocks,
                                          feature_mix_layer, classifier)

        # set bn param
        self.set_bn_param(*bn_param)