Ejemplo n.º 1
0
    def __init__(self,
                 num_classes=1000,
                 input_size=224,
                 input_channel=32,
                 last_channel=1280,
                 width_mult=1.0,
                 inverted_residual_setting=None,
                 dropout_ratio=0.2,
                 batch_norm_momentum=0.1,
                 batch_norm_epsilon=1e-5,
                 active_fn='nn.ReLU6',
                 block='InvertedResidualChannels',
                 blockFriendly='InvertedResidualFriendly',
                 round_nearest=8):
        """Build the network.

        Args:
            num_classes (int): Number of classes
            input_size (int): Input resolution.
            input_channel (int): Number of channels for stem convolution.
            last_channel (int): Number of channels for stem convolution.
            width_mult (float): Width multiplier - adjusts number of channels in
                each layer by this amount
            inverted_residual_setting (list): A list of
                [expand ratio, output channel, num repeat,
                stride of first block, A list of kernel sizes].
            dropout_ratio (float): Dropout ratio for linear classifier.
            batch_norm_momentum (float): Momentum for batch normalization.
            batch_norm_epsilon (float): Epsilon for batch normalization.
            active_fn (str): Specify which activation function to use.
            block (str): Specify which MobilenetV2 block implementation to use.
            round_nearest (int): Round the number of channels in each layer to
                be a multiple of this number Set to 1 to turn off rounding.
        """
        super(MobileNetV2, self).__init__()
        batch_norm_kwargs = {
            'momentum': batch_norm_momentum,
            'eps': batch_norm_epsilon
        }

        self.input_channel = input_channel
        self.last_channel = last_channel
        self.width_mult = width_mult
        self.round_nearest = round_nearest
        self.inverted_residual_setting = inverted_residual_setting
        self.active_fn = active_fn
        self.block = block
        self.blockFriendly = blockFriendly

        if len(inverted_residual_setting) == 0 or (len(
                inverted_residual_setting[0]) not in [5, 8]):
            raise ValueError("inverted_residual_setting should be non-empty "
                             "or a 5/8-element list, got {}".format(
                                 inverted_residual_setting))
        if input_size % 32 != 0:
            raise ValueError('Input size must divide 32')
        active_fn = get_active_fn(active_fn)
        block = get_block_wrapper(block)
        blockFriendly = get_block_wrapper_friendly(blockFriendly)
        # building first layer
        input_channel = _make_divisible(input_channel * width_mult,
                                        round_nearest)
        last_channel = _make_divisible(last_channel * max(1.0, width_mult),
                                       round_nearest)
        features = [
            ConvBNReLU(3,
                       input_channel,
                       stride=2,
                       batch_norm_kwargs=batch_norm_kwargs,
                       active_fn=active_fn)
        ]
        # building inverted residual blocks
        for t, c, n, s, ks, *extra in inverted_residual_setting:
            output_channel = _make_divisible(c * width_mult, round_nearest)
            _extra_kwargs = {}
            if len(extra) == 3:
                _extra_kwargs = {
                    k: v for k, v in zip(['nl_c', 'nl_s', 'se_ratio'], extra)
                }
            for i in range(n):
                stride = s if i == 0 else 1
                #if c == 24 or c == 32 or c == 64:
                features.append(blockFriendly(input_channel, output_channel, stride, t))
                #else:
                #    features.append(
                #        block(input_channel,
                #            output_channel,
                #            stride,
                #            t,
                #            ks,
                #            active_fn=active_fn,
                #            batch_norm_kwargs=batch_norm_kwargs,
                #            **_extra_kwargs))
                input_channel = output_channel
        # building last several layers
        features.append(
            ConvBNReLU(input_channel,
                       last_channel,
                       kernel_size=1,
                       batch_norm_kwargs=batch_norm_kwargs,
                       active_fn=active_fn))
        avg_pool_size = input_size // 32
        features.append(nn.AvgPool2d(avg_pool_size))
        # make it nn.Sequential
        self.features = nn.Sequential(*features)

        # building classifier
        self.classifier = nn.Sequential(
            nn.Dropout(dropout_ratio),
            nn.Linear(last_channel, num_classes),
        )
Ejemplo n.º 2
0
    def __init__(self,
                 num_classes=1000,
                 input_size=224,
                 input_stride=4,
                 input_channel=16,
                 last_channel=1024,
                 head_channels=[36, 72, 144, 288],
                 bn_momentum=0.1,
                 bn_epsilon=1e-5,
                 dropout_ratio=0.2,
                 active_fn='nn.ReLU6',
                 block='InvertedResidualChannels',
                 width_mult=1.0,
                 round_nearest=8,
                 expand_ratio=4,
                 kernel_sizes=[3, 5, 7],
                 inverted_residual_setting=None,
                 STAGE1=None,
                 STAGE2=None,
                 STAGE3=None,
                 STAGE4=None,
                 **kwargs):
        super(HighResolutionNetBase, self).__init__()

        batch_norm_kwargs = {'momentum': bn_momentum, 'eps': bn_epsilon}

        self.avg_pool_size = input_size // 32
        self.input_stride = input_stride
        self.input_channel = _make_divisible(input_channel * width_mult,
                                             round_nearest)
        self.last_channel = _make_divisible(
            last_channel * max(1.0, width_mult), round_nearest)
        self.batch_norm_kwargs = batch_norm_kwargs
        self.active_fn = get_active_fn(active_fn)
        self.kernel_sizes = kernel_sizes
        self.expand_ratio = expand_ratio
        self.head_channels = head_channels

        self.block = get_block_wrapper(block)
        self.inverted_residual_setting = inverted_residual_setting

        self.conv1 = nn.Conv2d(3,
                               self.input_channel,
                               kernel_size=3,
                               stride=2,
                               padding=1,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(self.input_channel, **batch_norm_kwargs)
        self.conv2 = nn.Conv2d(self.input_channel,
                               self.input_channel,
                               kernel_size=3,
                               stride=2,
                               padding=1,
                               bias=False)
        self.bn2 = nn.BatchNorm2d(self.input_channel, **batch_norm_kwargs)
        self.relu = nn.ReLU(inplace=True)

        self.stage1_cfg = STAGE1
        num_channels = self.stage1_cfg['NUM_CHANNELS'][0]
        block = blocks_dict[self.stage1_cfg['BLOCK']]
        num_blocks = self.stage1_cfg['NUM_BLOCKS'][0]
        self.layer1 = self._make_layer(block, self.input_channel, num_channels,
                                       num_blocks)
        stage1_out_channel = block.expansion * num_channels

        self.stage2_cfg = STAGE2
        num_channels = self.stage2_cfg['NUM_CHANNELS']
        block = blocks_dict[self.stage2_cfg['BLOCK']]
        num_channels = [
            num_channels[i] * block.expansion for i in range(len(num_channels))
        ]
        self.transition1 = self._make_transition_layer([stage1_out_channel],
                                                       num_channels)
        self.stage2, pre_stage_channels = self._make_stage(
            self.stage2_cfg, num_channels)

        self.stage3_cfg = STAGE3
        num_channels = self.stage3_cfg['NUM_CHANNELS']
        block = blocks_dict[self.stage3_cfg['BLOCK']]
        num_channels = [
            num_channels[i] * block.expansion for i in range(len(num_channels))
        ]
        self.transition2 = self._make_transition_layer(pre_stage_channels,
                                                       num_channels)
        self.stage3, pre_stage_channels = self._make_stage(
            self.stage3_cfg, num_channels)

        self.stage4_cfg = STAGE4
        num_channels = self.stage4_cfg['NUM_CHANNELS']
        block = blocks_dict[self.stage4_cfg['BLOCK']]
        num_channels = [
            num_channels[i] * block.expansion for i in range(len(num_channels))
        ]
        self.transition3 = self._make_transition_layer(pre_stage_channels,
                                                       num_channels)
        self.stage4, pre_stage_channels = self._make_stage(
            self.stage4_cfg, num_channels, multi_scale_output=True)

        # Classification Head
        self.incre_modules, self.downsamp_modules, \
            self.final_layer = self._make_head(pre_stage_channels)

        self.classifier = nn.Linear(self.last_channel, 1000)
        self.init_weights()
Ejemplo n.º 3
0
    def __init__(self,
                 num_classes=1000,
                 input_size=224,
                 input_stride=4,
                 input_channel=[16, 16],
                 last_channel=1024,
                 head_channels=None,
                 bn_momentum=0.1,
                 bn_epsilon=1e-5,
                 dropout_ratio=0.2,
                 active_fn='nn.ReLU6',
                 block='InvertedResidualChannels',
                 width_mult=1.0,
                 round_nearest=8,
                 expand_ratio=4,
                 kernel_sizes=[3, 5, 7],
                 inverted_residual_setting=None,
                 task='classification',
                 align_corners=False,
                 start_with_atomcell=False,
                 fcn_head_for_seg=False,
                 initial_for_heatmap=False,
                 **kwargs):
        super(HighResolutionNet, self).__init__()

        batch_norm_kwargs = {'momentum': bn_momentum, 'eps': bn_epsilon}

        self.avg_pool_size = input_size // 32
        self.input_stride = input_stride
        self.input_channel = [
            _make_divisible(item * width_mult, round_nearest)
            for item in input_channel
        ]
        self.last_channel = _make_divisible(
            last_channel * max(1.0, width_mult), round_nearest)
        self.batch_norm_kwargs = batch_norm_kwargs
        self.active_fn = get_active_fn(active_fn)
        self.kernel_sizes = kernel_sizes
        self.expand_ratio = expand_ratio
        self.task = task
        self.align_corners = align_corners
        self.initial_for_heatmap = initial_for_heatmap

        self.block = get_block_wrapper(block)
        self.inverted_residual_setting = inverted_residual_setting

        downsamples = []
        if self.input_stride > 1:
            downsamples.append(
                ConvBNReLU(3,
                           input_channel[0],
                           kernel_size=3,
                           stride=2,
                           batch_norm_kwargs=self.batch_norm_kwargs,
                           active_fn=self.active_fn))
        if self.input_stride > 2:
            if start_with_atomcell:
                downsamples.append(
                    InvertedResidual(input_channel[0], input_channel[0], 1, 1,
                                     [3], self.active_fn,
                                     self.batch_norm_kwargs))
            downsamples.append(
                ConvBNReLU(input_channel[0],
                           input_channel[1],
                           kernel_size=3,
                           stride=2,
                           batch_norm_kwargs=self.batch_norm_kwargs,
                           active_fn=self.active_fn))
        self.downsamples = nn.Sequential(*downsamples)

        features = []
        for index in range(len(inverted_residual_setting)):
            in_branches = 1 if index == 0 else inverted_residual_setting[index
                                                                         -
                                                                         1][0]
            in_channels = [
                input_channel[1]
            ] if index == 0 else inverted_residual_setting[index - 1][-1]
            features.append(
                FuseModule(in_branches=in_branches,
                           out_branches=inverted_residual_setting[index][0],
                           in_channels=in_channels,
                           out_channels=inverted_residual_setting[index][-1],
                           block=self.block,
                           expand_ratio=self.expand_ratio,
                           kernel_sizes=self.kernel_sizes,
                           batch_norm_kwargs=self.batch_norm_kwargs,
                           active_fn=self.active_fn))
            features.append(
                ParallelModule(
                    num_branches=inverted_residual_setting[index][0],
                    num_blocks=inverted_residual_setting[index][1],
                    num_channels=inverted_residual_setting[index][2],
                    block=self.block,
                    expand_ratio=self.expand_ratio,
                    kernel_sizes=self.kernel_sizes,
                    batch_norm_kwargs=self.batch_norm_kwargs,
                    active_fn=self.active_fn))

        if self.task == 'classification':
            features.append(
                HeadModule(pre_stage_channels=inverted_residual_setting[-1][2],
                           head_channels=head_channels,
                           last_channel=last_channel,
                           avg_pool_size=self.avg_pool_size,
                           block=self.block,
                           expand_ratio=self.expand_ratio,
                           kernel_sizes=self.kernel_sizes,
                           batch_norm_kwargs=self.batch_norm_kwargs,
                           active_fn=self.active_fn))

            self.classifier = nn.Sequential(
                nn.Dropout(dropout_ratio),
                nn.Linear(last_channel, num_classes),
            )
        elif self.task == 'segmentation':
            if fcn_head_for_seg:
                self.transform = ConvBNReLU(
                    sum(inverted_residual_setting[-1][-1]),
                    last_channel,
                    kernel_size=1,
                    batch_norm_kwargs=self.batch_norm_kwargs,
                    active_fn=self.active_fn)
            else:
                self.transform = self.block(
                    sum(inverted_residual_setting[-1][-1]),
                    last_channel,
                    expand_ratio=self.expand_ratio,
                    kernel_sizes=self.kernel_sizes,
                    stride=1,
                    batch_norm_kwargs=self.batch_norm_kwargs,
                    active_fn=self.active_fn,
                )
            self.classifier = nn.Conv2d(last_channel,
                                        num_classes,
                                        kernel_size=1)

        self.features = nn.Sequential(*features)

        self.init_weights()
Ejemplo n.º 4
0
    def __init__(self,
                 num_classes=1000,
                 input_size=224,
                 input_channel=16,
                 last_channel=1280,
                 width_mult=1.0,
                 inverted_bottleneck_setting_small=None,
                 inverted_bottleneck_setting_large=None,
                 dropout_ratio=0.2,
                 batch_norm_momentum=0.1,
                 batch_norm_epsilon=1e-5,
                 active_fn='nn.ReLU6',
                 mode='large',
                 block='MobileBottleneck',
                 blockFriendly='MobileBottleneckFriendly2',
                 round_nearest=8):
        """Build the network.

        Args:
            num_classes (int): Number of classes
            input_size (int): Input resolution.
            input_channel (int): Number of channels for stem convolution.
            last_channel (int): Number of channels for stem convolution.
            width_mult (float): Width multiplier - adjusts number of channels in
                each layer by this amount
            inverted_residual_setting (list): A list of
                [expand ratio, output channel, num repeat,
                stride of first block, A list of kernel sizes].
            dropout_ratio (float): Dropout ratio for linear classifier.
            batch_norm_momentum (float): Momentum for batch normalization.
            batch_norm_epsilon (float): Epsilon for batch normalization.
            active_fn (str): Specify which activation function to use.
            block (str): Specify which MobilenetV2 block implementation to use.
            round_nearest (int): Round the number of channels in each layer to
                be a multiple of this number Set to 1 to turn off rounding.
        """
        super(MobileNetV3, self).__init__()
        batch_norm_kwargs = {
            'momentum': batch_norm_momentum,
            'eps': batch_norm_epsilon
        }

        self.input_channel = input_channel
        self.last_channel = last_channel
        self.width_mult = width_mult
        self.round_nearest = round_nearest
        self.inverted_bottleneck_setting_large = inverted_bottleneck_setting_large
        self.inverted_bottleneck_setting_small = inverted_bottleneck_setting_small
        self.active_fn = active_fn
        self.block = block
        self.blockFriendly = blockFriendly
        self.mode = mode

        if self.mode == 'large':
            self.inverted_bottleneck_setting = inverted_bottleneck_setting_large
        else:
            self.inverted_bottleneck_setting = inverted_bottleneck_setting_small

        if input_size % 32 != 0:
            raise ValueError('Input size must divide 32')
        active_fn = get_active_fn(active_fn)
        #block = get_block_wrapper(block)
        blockFriendly = get_block_wrapper_friendly2(blockFriendly)
        # building first layer
        input_channel = _make_divisible(input_channel * width_mult,
                                        round_nearest)
        last_channel = _make_divisible(last_channel * max(1.0, width_mult),
                                       round_nearest)
        features = [
            ConvBNReLU(3,
                       input_channel,
                       stride=2,
                       batch_norm_kwargs=batch_norm_kwargs,
                       active_fn=Hswish_new)
        ]
        # building inverted residual blocks
        for k, exp, c, se, nl, s in self.inverted_bottleneck_setting:
            output_channel = _make_divisible(c * width_mult, round_nearest)
            exp_channel = _make_divisible(exp * width_mult, round_nearest)
            features.append(
                blockFriendly(input_channel,
                              output_channel,
                              k,
                              s,
                              exp_channel,
                              se,
                              nl,
                              batch_norm_kwargs=batch_norm_kwargs))
            input_channel = output_channel

        if self.mode == 'large':
            last_conv = _make_divisible(960 * width_mult, round_nearest)
        else:
            last_conv = _make_divisible(576 * width_mult, round_nearest)
        features.append(
            ConvBNReLU(input_channel,
                       last_conv,
                       kernel_size=1,
                       batch_norm_kwargs=batch_norm_kwargs,
                       active_fn=functools.partial(nn.ReLU, inplace=True)))
        features.append(nn.AdaptiveAvgPool2d(1))
        #avg_pool_size = input_size // 32
        #features.append(nn.AvgPool2d(avg_pool_size))
        features.append(nn.Conv2d(last_conv, last_channel, 1, 1, 0))
        features.append(Hswish_new())
        # make it nn.Sequential
        self.features = nn.Sequential(*features)

        # building classifier
        self.classifier = nn.Sequential(
            nn.Dropout(dropout_ratio),
            nn.Linear(last_channel, num_classes),
        )