Esempio n. 1
0
    def set_active_subnet(self, d=None, e=None, w=None, **kwargs):
        depth = val2list(d, len(ResNets.BASE_DEPTH_LIST) + 1)
        expand_ratio = val2list(e, len(self.blocks))
        width_mult = val2list(w, len(ResNets.BASE_DEPTH_LIST) + 2)

        for block, e in zip(self.blocks, expand_ratio):
            if e is not None:
                block.active_expand_ratio = e

        if width_mult[0] is not None:
            self.input_stem[1].conv.active_out_channel = self.input_stem[0].active_out_channel = \
             self.input_stem[0].out_channel_list[width_mult[0]]
        if width_mult[1] is not None:
            self.input_stem[2].active_out_channel = self.input_stem[
                2].out_channel_list[width_mult[1]]

        if depth[0] is not None:
            self.input_stem_skipping = (depth[0] != max(self.depth_list))
        for stage_id, (block_idx, d, w) in enumerate(
                zip(self.grouped_block_index, depth[1:], width_mult[2:])):
            if d is not None:
                self.runtime_depth[stage_id] = max(self.depth_list) - d
            if w is not None:
                for idx in block_idx:
                    self.blocks[idx].active_out_channel = self.blocks[
                        idx].out_channel_list[w]
Esempio n. 2
0
	def set_active_subnet(self, ks=None, e=None, d=None, **kwargs):
		ks = val2list(ks, len(self.blocks) - 1)
		expand_ratio = val2list(e, len(self.blocks) - 1)
		depth = val2list(d, len(self.block_group_info))

		for block, k, e in zip(self.blocks[1:], ks, expand_ratio):
			if k is not None:
				block.conv.active_kernel_size = k
			if e is not None:
				block.conv.active_expand_ratio = e

		for i, d in enumerate(depth):
			if d is not None:
				self.runtime_depth[i] = min(len(self.block_group_info[i]), d)
Esempio n. 3
0
def validate(run_manager, epoch=0, is_test=False, image_size_list=None,
             ks_list=None, expand_ratio_list=None, depth_list=None, width_mult_list=None, additional_setting=None):
	dynamic_net = run_manager.net
	if isinstance(dynamic_net, nn.DataParallel):
		dynamic_net = dynamic_net.module

	dynamic_net.eval()

	if image_size_list is None:
		image_size_list = val2list(run_manager.run_config.data_provider.image_size, 1)
	if ks_list is None:
		ks_list = dynamic_net.ks_list
	if expand_ratio_list is None:
		expand_ratio_list = dynamic_net.expand_ratio_list
	if depth_list is None:
		depth_list = dynamic_net.depth_list
	if width_mult_list is None:
		if 'width_mult_list' in dynamic_net.__dict__:
			width_mult_list = list(range(len(dynamic_net.width_mult_list)))
		else:
			width_mult_list = [0]

	subnet_settings = []
	for d in depth_list:
		for e in expand_ratio_list:
			for k in ks_list:
				for w in width_mult_list:
					for img_size in image_size_list:
						subnet_settings.append([{
							'image_size': img_size,
							'd': d,
							'e': e,
							'ks': k,
							'w': w,
						}, 'R%s-D%s-E%s-K%s-W%s' % (img_size, d, e, k, w)])
	if additional_setting is not None:
		subnet_settings += additional_setting

	losses_of_subnets, top1_of_subnets, top5_of_subnets = [], [], []

	valid_log = ''
	for setting, name in subnet_settings:
		run_manager.write_log('-' * 30 + ' Validate %s ' % name + '-' * 30, 'train', should_print=False)
		run_manager.run_config.data_provider.assign_active_img_size(setting.pop('image_size'))
		dynamic_net.set_active_subnet(**setting)
		run_manager.write_log(dynamic_net.module_str, 'train', should_print=False)

		run_manager.reset_running_statistics(dynamic_net)
		loss, (top1, top5) = run_manager.validate(epoch=epoch, is_test=is_test, run_str=name, net=dynamic_net)
		losses_of_subnets.append(loss)
		top1_of_subnets.append(top1)
		top5_of_subnets.append(top5)
		valid_log += '%s (%.3f), ' % (name, top1)

	return list_mean(losses_of_subnets), list_mean(top1_of_subnets), list_mean(top5_of_subnets), valid_log
Esempio n. 4
0
    def __init__(self, in_channel_list, out_channel_list,
                 kernel_size_list=3, expand_ratio_list=6, stride=1, act_func='relu6', use_se=False):
        super(DynamicMBConvLayer, self).__init__()

        self.in_channel_list = in_channel_list
        self.out_channel_list = out_channel_list

        self.kernel_size_list = val2list(kernel_size_list)
        self.expand_ratio_list = val2list(expand_ratio_list)

        self.stride = stride
        self.act_func = act_func
        self.use_se = use_se

        # build modules
        max_middle_channel = make_divisible(
            round(max(self.in_channel_list) * max(self.expand_ratio_list)), MyNetwork.CHANNEL_DIVISIBLE)
        if max(self.expand_ratio_list) == 1:
            self.inverted_bottleneck = None
        else:
            self.inverted_bottleneck = nn.Sequential(OrderedDict([
                ('conv', DynamicConv2d(max(self.in_channel_list), max_middle_channel)),
                ('bn', DynamicBatchNorm2d(max_middle_channel)),
                ('act', build_activation(self.act_func)),
            ]))

        self.depth_conv = nn.Sequential(OrderedDict([
            ('conv', DynamicSeparableConv2d(max_middle_channel, self.kernel_size_list, self.stride)),
            ('bn', DynamicBatchNorm2d(max_middle_channel)),
            ('act', build_activation(self.act_func))
        ]))
        if self.use_se:
            self.depth_conv.add_module('se', DynamicSE(max_middle_channel))

        self.point_linear = nn.Sequential(OrderedDict([
            ('conv', DynamicConv2d(max_middle_channel, max(self.out_channel_list))),
            ('bn', DynamicBatchNorm2d(max(self.out_channel_list))),
        ]))

        self.active_kernel_size = max(self.kernel_size_list)
        self.active_expand_ratio = max(self.expand_ratio_list)
        self.active_out_channel = max(self.out_channel_list)
Esempio n. 5
0
    def __init__(self, in_channel_list, out_channel_list, expand_ratio_list=0.25,
                 kernel_size=3, stride=1, act_func='relu', downsample_mode='avgpool_conv'):
        super(DynamicResNetBottleneckBlock, self).__init__()

        self.in_channel_list = in_channel_list
        self.out_channel_list = out_channel_list
        self.expand_ratio_list = val2list(expand_ratio_list)

        self.kernel_size = kernel_size
        self.stride = stride
        self.act_func = act_func
        self.downsample_mode = downsample_mode

        # build modules
        max_middle_channel = make_divisible(
            round(max(self.out_channel_list) * max(self.expand_ratio_list)), MyNetwork.CHANNEL_DIVISIBLE)

        self.conv1 = nn.Sequential(OrderedDict([
            ('conv', DynamicConv2d(max(self.in_channel_list), max_middle_channel)),
            ('bn', DynamicBatchNorm2d(max_middle_channel)),
            ('act', build_activation(self.act_func, inplace=True)),
        ]))

        self.conv2 = nn.Sequential(OrderedDict([
            ('conv', DynamicConv2d(max_middle_channel, max_middle_channel, kernel_size, stride)),
            ('bn', DynamicBatchNorm2d(max_middle_channel)),
            ('act', build_activation(self.act_func, inplace=True))
        ]))

        self.conv3 = nn.Sequential(OrderedDict([
            ('conv', DynamicConv2d(max_middle_channel, max(self.out_channel_list))),
            ('bn', DynamicBatchNorm2d(max(self.out_channel_list))),
        ]))

        if self.stride == 1 and self.in_channel_list == self.out_channel_list:
            self.downsample = IdentityLayer(max(self.in_channel_list), max(self.out_channel_list))
        elif self.downsample_mode == 'conv':
            self.downsample = nn.Sequential(OrderedDict([
                ('conv', DynamicConv2d(max(self.in_channel_list), max(self.out_channel_list), stride=stride)),
                ('bn', DynamicBatchNorm2d(max(self.out_channel_list))),
            ]))
        elif self.downsample_mode == 'avgpool_conv':
            self.downsample = nn.Sequential(OrderedDict([
                ('avg_pool', nn.AvgPool2d(kernel_size=stride, stride=stride, padding=0, ceil_mode=True)),
                ('conv', DynamicConv2d(max(self.in_channel_list), max(self.out_channel_list))),
                ('bn', DynamicBatchNorm2d(max(self.out_channel_list))),
            ]))
        else:
            raise NotImplementedError

        self.final_act = build_activation(self.act_func, inplace=True)

        self.active_expand_ratio = max(self.expand_ratio_list)
        self.active_out_channel = max(self.out_channel_list)
Esempio n. 6
0
	def __init__(self, n_classes=1000, bn_param=(0.1, 1e-5), dropout_rate=0.1, base_stage_width=None, width_mult=1.0,
	             ks_list=3, expand_ratio_list=6, depth_list=4):

		self.width_mult = width_mult
		self.ks_list = val2list(ks_list, 1)
		self.expand_ratio_list = val2list(expand_ratio_list, 1)
		self.depth_list = val2list(depth_list, 1)

		self.ks_list.sort()
		self.expand_ratio_list.sort()
		self.depth_list.sort()

		base_stage_width = [16, 16, 24, 40, 80, 112, 160, 960, 1280]

		final_expand_width = make_divisible(base_stage_width[-2] * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE)
		last_channel = make_divisible(base_stage_width[-1] * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE)

		stride_stages = [1, 2, 2, 2, 1, 2]
		act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish']
		se_stages = [False, False, True, False, True, True]
		n_block_list = [1] + [max(self.depth_list)] * 5
		width_list = []
		for base_width in base_stage_width[:-2]:
			width = make_divisible(base_width * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE)
			width_list.append(width)

		input_channel, first_block_dim = width_list[0], width_list[1]
		# first conv layer
		first_conv = ConvLayer(3, input_channel, kernel_size=3, stride=2, act_func='h_swish')
		first_block_conv = MBConvLayer(
			in_channels=input_channel, out_channels=first_block_dim, kernel_size=3, stride=stride_stages[0],
			expand_ratio=1, act_func=act_stages[0], use_se=se_stages[0],
		)
		first_block = ResidualBlock(
			first_block_conv,
			IdentityLayer(first_block_dim, first_block_dim) if input_channel == first_block_dim else None,
		)

		# inverted residual blocks
		self.block_group_info = []
		blocks = [first_block]
		_block_index = 1
		feature_dim = first_block_dim

		for width, n_block, s, act_func, use_se in zip(width_list[2:], n_block_list[1:],
		                                               stride_stages[1:], act_stages[1:], se_stages[1:]):
			self.block_group_info.append([_block_index + i for i in range(n_block)])
			_block_index += n_block

			output_channel = width
			for i in range(n_block):
				if i == 0:
					stride = s
				else:
					stride = 1
				mobile_inverted_conv = DynamicMBConvLayer(
					in_channel_list=val2list(feature_dim), out_channel_list=val2list(output_channel),
					kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list,
					stride=stride, act_func=act_func, use_se=use_se,
				)
				if stride == 1 and feature_dim == output_channel:
					shortcut = IdentityLayer(feature_dim, feature_dim)
				else:
					shortcut = None
				blocks.append(ResidualBlock(mobile_inverted_conv, shortcut))
				feature_dim = output_channel
		# final expand layer, feature mix layer & classifier
		final_expand_layer = ConvLayer(feature_dim, final_expand_width, kernel_size=1, act_func='h_swish')
		feature_mix_layer = ConvLayer(
			final_expand_width, last_channel, kernel_size=1, bias=False, use_bn=False, act_func='h_swish',
		)

		classifier = LinearLayer(last_channel, n_classes, dropout_rate=dropout_rate)

		super(OFAMobileNetV3, self).__init__(first_conv, blocks, final_expand_layer, feature_mix_layer, classifier)

		# set bn param
		self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

		# runtime_depth
		self.runtime_depth = [len(block_idx) for block_idx in self.block_group_info]
Esempio n. 7
0
    def __init__(self,
                 n_classes=1000,
                 bn_param=(0.1, 1e-5),
                 dropout_rate=0,
                 depth_list=2,
                 expand_ratio_list=0.25,
                 width_mult_list=1.0):

        self.depth_list = val2list(depth_list)
        self.expand_ratio_list = val2list(expand_ratio_list)
        self.width_mult_list = val2list(width_mult_list)
        # sort
        self.depth_list.sort()
        self.expand_ratio_list.sort()
        self.width_mult_list.sort()

        input_channel = [
            make_divisible(64 * width_mult, MyNetwork.CHANNEL_DIVISIBLE)
            for width_mult in self.width_mult_list
        ]
        mid_input_channel = [
            make_divisible(channel // 2, MyNetwork.CHANNEL_DIVISIBLE)
            for channel in input_channel
        ]

        stage_width_list = ResNets.STAGE_WIDTH_LIST.copy()
        for i, width in enumerate(stage_width_list):
            stage_width_list[i] = [
                make_divisible(width * width_mult, MyNetwork.CHANNEL_DIVISIBLE)
                for width_mult in self.width_mult_list
            ]

        n_block_list = [
            base_depth + max(self.depth_list)
            for base_depth in ResNets.BASE_DEPTH_LIST
        ]
        stride_list = [1, 2, 2, 2]

        # build input stem
        input_stem = [
            DynamicConvLayer(val2list(3),
                             mid_input_channel,
                             3,
                             stride=2,
                             use_bn=True,
                             act_func='relu'),
            ResidualBlock(
                DynamicConvLayer(mid_input_channel,
                                 mid_input_channel,
                                 3,
                                 stride=1,
                                 use_bn=True,
                                 act_func='relu'),
                IdentityLayer(mid_input_channel, mid_input_channel)),
            DynamicConvLayer(mid_input_channel,
                             input_channel,
                             3,
                             stride=1,
                             use_bn=True,
                             act_func='relu')
        ]

        # blocks
        blocks = []
        for d, width, s in zip(n_block_list, stage_width_list, stride_list):
            for i in range(d):
                stride = s if i == 0 else 1
                bottleneck_block = DynamicResNetBottleneckBlock(
                    input_channel,
                    width,
                    expand_ratio_list=self.expand_ratio_list,
                    kernel_size=3,
                    stride=stride,
                    act_func='relu',
                    downsample_mode='avgpool_conv',
                )
                blocks.append(bottleneck_block)
                input_channel = width
        # classifier
        classifier = DynamicLinearLayer(input_channel,
                                        n_classes,
                                        dropout_rate=dropout_rate)

        super(OFAResNets, self).__init__(input_stem, blocks, classifier)

        # set bn param
        self.set_bn_param(*bn_param)

        # runtime_depth
        self.input_stem_skipping = 0
        self.runtime_depth = [0] * len(n_block_list)
Esempio n. 8
0
	def __init__(self, n_classes=1000, bn_param=(0.1, 1e-3), dropout_rate=0.1, base_stage_width=None, width_mult=1.0,
	             ks_list=3, expand_ratio_list=6, depth_list=4):

		self.width_mult = width_mult
		self.ks_list = val2list(ks_list, 1)
		self.expand_ratio_list = val2list(expand_ratio_list, 1)
		self.depth_list = val2list(depth_list, 1)

		self.ks_list.sort()
		self.expand_ratio_list.sort()
		self.depth_list.sort()

		if base_stage_width == 'google':
			# MobileNetV2 Stage Width
			base_stage_width = [32, 16, 24, 32, 64, 96, 160, 320, 1280]
		else:
			# ProxylessNAS Stage Width
			base_stage_width = [32, 16, 24, 40, 80, 96, 192, 320, 1280]

		input_channel = make_divisible(base_stage_width[0] * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE)
		first_block_width = make_divisible(base_stage_width[1] * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE)
		last_channel = make_divisible(base_stage_width[-1] * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE)

		# first conv layer
		first_conv = ConvLayer(
			3, input_channel, kernel_size=3, stride=2, use_bn=True, act_func='relu6', ops_order='weight_bn_act'
		)
		# first block
		first_block_conv = MBConvLayer(
			in_channels=input_channel, out_channels=first_block_width, kernel_size=3, stride=1,
			expand_ratio=1, act_func='relu6',
		)
		first_block = ResidualBlock(first_block_conv, None)

		input_channel = first_block_width
		# inverted residual blocks
		self.block_group_info = []
		blocks = [first_block]
		_block_index = 1

		stride_stages = [2, 2, 2, 1, 2, 1]
		n_block_list = [max(self.depth_list)] * 5 + [1]

		width_list = []
		for base_width in base_stage_width[2:-1]:
			width = make_divisible(base_width * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE)
			width_list.append(width)

		for width, n_block, s in zip(width_list, n_block_list, stride_stages):
			self.block_group_info.append([_block_index + i for i in range(n_block)])
			_block_index += n_block

			output_channel = width
			for i in range(n_block):
				if i == 0:
					stride = s
				else:
					stride = 1

				mobile_inverted_conv = DynamicMBConvLayer(
					in_channel_list=val2list(input_channel, 1), out_channel_list=val2list(output_channel, 1),
					kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func='relu6',
				)

				if stride == 1 and input_channel == output_channel:
					shortcut = IdentityLayer(input_channel, input_channel)
				else:
					shortcut = None

				mb_inverted_block = ResidualBlock(mobile_inverted_conv, shortcut)

				blocks.append(mb_inverted_block)
				input_channel = output_channel
		# 1x1_conv before global average pooling
		feature_mix_layer = ConvLayer(
			input_channel, last_channel, kernel_size=1, use_bn=True, act_func='relu6',
		)
		classifier = LinearLayer(last_channel, n_classes, dropout_rate=dropout_rate)

		super(OFAProxylessNASNets, self).__init__(first_conv, blocks, feature_mix_layer, classifier)

		# set bn param
		self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

		# runtime_depth
		self.runtime_depth = [len(block_idx) for block_idx in self.block_group_info]
Esempio n. 9
0
	def __init__(self, n_classes=1000, width_mult=1.0, bn_param=(0.1, 1e-3), dropout_rate=0.2,
	             ks=None, expand_ratio=None, depth_param=None, stage_width_list=None):

		ks = 3 if ks is None else ks
		expand_ratio = 6 if expand_ratio is None else expand_ratio

		input_channel = 32
		last_channel = 1280

		input_channel = make_divisible(input_channel * width_mult, MyNetwork.CHANNEL_DIVISIBLE)
		last_channel = make_divisible(last_channel * width_mult, MyNetwork.CHANNEL_DIVISIBLE) \
			if width_mult > 1.0 else last_channel

		inverted_residual_setting = [
			# t, c, n, s
			[1, 16, 1, 1],
			[expand_ratio, 24, 2, 2],
			[expand_ratio, 32, 3, 2],
			[expand_ratio, 64, 4, 2],
			[expand_ratio, 96, 3, 1],
			[expand_ratio, 160, 3, 2],
			[expand_ratio, 320, 1, 1],
		]

		if depth_param is not None:
			assert isinstance(depth_param, int)
			for i in range(1, len(inverted_residual_setting) - 1):
				inverted_residual_setting[i][2] = depth_param

		if stage_width_list is not None:
			for i in range(len(inverted_residual_setting)):
				inverted_residual_setting[i][1] = stage_width_list[i]

		ks = val2list(ks, sum([n for _, _, n, _ in inverted_residual_setting]) - 1)
		_pt = 0

		# first conv layer
		first_conv = ConvLayer(
			3, input_channel, kernel_size=3, stride=2, use_bn=True, act_func='relu6', ops_order='weight_bn_act'
		)
		# inverted residual blocks
		blocks = []
		for t, c, n, s in inverted_residual_setting:
			output_channel = make_divisible(c * width_mult, MyNetwork.CHANNEL_DIVISIBLE)
			for i in range(n):
				if i == 0:
					stride = s
				else:
					stride = 1
				if t == 1:
					kernel_size = 3
				else:
					kernel_size = ks[_pt]
					_pt += 1
				mobile_inverted_conv = MBConvLayer(
					in_channels=input_channel, out_channels=output_channel, kernel_size=kernel_size, stride=stride,
					expand_ratio=t,
				)
				if stride == 1:
					if input_channel == output_channel:
						shortcut = IdentityLayer(input_channel, input_channel)
					else:
						shortcut = None
				else:
					shortcut = None
				blocks.append(
					ResidualBlock(mobile_inverted_conv, shortcut)
				)
				input_channel = output_channel
		# 1x1_conv before global average pooling
		feature_mix_layer = ConvLayer(
			input_channel, last_channel, kernel_size=1, use_bn=True, act_func='relu6', ops_order='weight_bn_act',
		)

		classifier = LinearLayer(last_channel, n_classes, dropout_rate=dropout_rate)

		super(MobileNetV2, self).__init__(first_conv, blocks, feature_mix_layer, classifier)

		# set bn param
		self.set_bn_param(*bn_param)