Example #1
0
 def set_swish(self, memory_efficient=True):
     """
     Sets swish function as memory efficient (for training) or standard.
     """
     self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
     for block in self._blocks:
         block.set_swish(memory_efficient)
Example #2
0
    def __init__(self, block_args, global_params):
        """
        Args:
            block_args (EasyDict): block args, see: class: `EfficientNet`.
            global_params (EasyDict): global args, see: class: `EfficientNet`.
        """
        super().__init__()
        self._block_args = block_args
        self.has_se = (block_args.se_ratio
                       is not None) and (0 < block_args.se_ratio <= 1)
        self.id_skip = block_args.id_skip

        # Expansion phase
        # number of input channels
        inp = block_args.in_channels
        # number of output channels
        oup = block_args.in_channels * block_args.expand_ratio
        if block_args.expand_ratio != 1:
            self._expand_conv = Conv2d(in_channels=inp,
                                       out_channels=oup,
                                       kernel_size=1,
                                       padding=0,
                                       bias=False)
            self._bn0 = get_norm(global_params.norm, out_channels=oup)

        # Depthwise convolution phase
        k = block_args.kernel_size
        s = block_args.stride
        self._depthwise_conv = Conv2d(in_channels=oup,
                                      out_channels=oup,
                                      groups=oup,
                                      kernel_size=k,
                                      stride=s,
                                      padding="SAME",
                                      bias=False)
        self._bn1 = get_norm(global_params.norm, out_channels=oup)

        # Squeeze and Excitation layer, if desired
        if self.has_se:
            num_squeezed_channels = max(
                1, int(block_args.in_channels * block_args.se_ratio))
            self._se_reduce = Conv2d(in_channels=oup,
                                     out_channels=num_squeezed_channels,
                                     kernel_size=1,
                                     padding=0)
            self._se_expand = Conv2d(in_channels=num_squeezed_channels,
                                     out_channels=oup,
                                     kernel_size=1,
                                     padding=0)

        # Output phase
        final_oup = block_args.out_channels
        self._project_conv = Conv2d(in_channels=oup,
                                    out_channels=final_oup,
                                    kernel_size=1,
                                    padding=0,
                                    bias=False)
        self._bn2 = get_norm(global_params.norm, final_oup)
        self._swish = MemoryEfficientSwish()
Example #3
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__()
        in_channels = input_shape[0].channels
        num_classes = cfg.MODEL.EFFICIENTDET.NUM_CLASSES
        norm = cfg.MODEL.EFFICIENTDET.HEAD.NORM
        bn_momentum = cfg.MODEL.EFFICIENTDET.HEAD.BN_MOMENTUM
        bn_eps = cfg.MODEL.EFFICIENTDET.HEAD.BN_EPS
        prior_prob = cfg.MODEL.EFFICIENTDET.HEAD.PRIOR_PROB
        memory_efficient = cfg.MODEL.EFFICIENTDET.HEAD.MEMORY_EFFICIENT_SWISH
        num_conv_layers = cfg.MODEL.EFFICIENTDET.HEAD.NUM_CONV
        num_anchors = cfg.build_anchor_generator(cfg,
                                                 input_shape).num_cell_anchors

        self.bn_momentum = bn_momentum
        self.bn_eps = bn_eps
        self.prior_prob = prior_prob

        assert (
            len(set(num_anchors)) == 1
        ), "Using different number of anchors between levels is not currently supported!"

        num_anchors = num_anchors[0]
        self.cls_subnet = nn.ModuleList([])
        self.bbox_subnet = nn.ModuleList([])
        for _ in range(num_conv_layers):
            self.cls_subnet.append(
                SeparableConvBlock(in_channels,
                                   in_channels,
                                   kernel_size=3,
                                   padding="SAME"))
            self.bbox_subnet.append(
                SeparableConvBlock(in_channels,
                                   in_channels,
                                   kernel_size=3,
                                   padding="SAME"))

        num_levels = len(input_shape)
        self.bn_cls_subnet = nn.ModuleList()
        self.bn_bbox_subnet = nn.ModuleList()
        for _ in range(num_levels):
            self.bn_cls_subnet.append(
                nn.ModuleList([
                    get_norm(norm, in_channels) for _ in range(num_conv_layers)
                ]))
            self.bn_bbox_subnet.append(
                nn.ModuleList([
                    get_norm(norm, in_channels) for _ in range(num_conv_layers)
                ]))

        self.cls_score = SeparableConvBlock(in_channels,
                                            num_anchors * num_classes,
                                            kernel_size=3,
                                            padding="SAME")
        self.bbox_pred = SeparableConvBlock(in_channels,
                                            num_anchors * 4,
                                            kernel_size=3,
                                            padding="SAME")
        self.act = MemoryEfficientSwish() if memory_efficient else Swish()
        self._init_weights()
Example #4
0
    def __init__(self,
                 in_channels=3,
                 blocks_args=None,
                 global_params=None,
                 out_features=None):
        """
        Args:
            in_channels (int): Number of input image channels.
            blocks_args (list[EasyDict]): a list of EasyDict to construct blocks.
                Each item in the list contains:

                * num_repeat: int, the number of `MBConvBlock` in the stage.
                * in_channels: int, the number of input tensor channels in the stage.
                * out_channels: int, the number of output tensor channels in the stage.
                * kernel_size: int, the kernel size of conv layer in the stage.
                * stride: int or list or tuple, the stride of conv layer in the stage.
                * expand_ratio: int, the channel expansion ratio at expansion phase
                    in `MBConvBlock`.
                * id_skip: bool, if `True`, apply skip connection in `MBConvBlock`
                    when stride is equal to 1 and the input and output channels are equal.
                * se_ratio: float, Squeeze layer channel reduction ratio in SE module,
                    between 0 and 1.

            global_params (namedtuple): a EasyDict contains global params shared between blocks.
                Which contains:

                * norm: str, the normalization to use.
                * bn_momentum: float, the `momentum` parameter of the norm module.
                * bn_eps: float, the `eps` parameter of the norm module.
                * dropout_rate: dropout rate.
                * num_classes: None or int: if None, will not perform classification.
                * width_coefficient: float, coefficient of width.
                * depth_coefficient: float, coefficient of depth.
                * depth_divisor: int, when calculating and rounding the number of channels
                    of each stage according to the depth coefficient, the number of channels
                    must be an integer multiple of "depth_divisor".
                * min_depth: int, the lower bound of the number of channels in each stage.
                * drop_connect_rate: float, between 0 to 1, drop connect rate.
                * image_size: int, input image size.

            out_features (list[str]): name of the layers whose outputs should be returned
                in forward. Can be anything in "stage1", "stage2", ..., "stage8" or "linear".
                If None, will return the output of the last layer.
        """
        super().__init__()
        assert isinstance(blocks_args, list), "blocks_args should be a list"
        assert len(blocks_args) > 0, "block_args must be greater than 0"
        self._size_divisibility = 0
        self._global_params = global_params
        self._blocks_args = blocks_args
        self._out_features = list()
        self._out_feature_strides = dict()
        self._out_feature_channels = dict()
        self.num_classes = global_params.num_classes

        # Stem
        # number of output channels
        out_channels = round_filters(32,
                                     global_params,
                                     skip=global_params.fix_head_stem)
        self._conv_stem = Conv2d(in_channels,
                                 out_channels,
                                 kernel_size=3,
                                 stride=2,
                                 padding="SAME",
                                 bias=False)
        self._bn0 = get_norm(global_params.norm, out_channels=out_channels)

        # Build blocks
        self._blocks = nn.ModuleList([])
        curr_stride = 2
        curr_block_idx = 0
        self.block_idx_to_name = dict()
        for stage_idx, block_args in enumerate(blocks_args):
            # Update block input and output filters based on depth multiplier.
            block_args.update(
                in_channels=round_filters(block_args.in_channels,
                                          global_params),
                out_channels=round_filters(block_args.out_channels,
                                           global_params),
                num_repeat=round_repeats(block_args.num_repeat, global_params))

            name = "stage{}".format(stage_idx + 2)
            curr_stride *= block_args.stride
            self._out_feature_strides[name] = curr_stride
            self._out_feature_channels[name] = block_args.out_channels
            curr_block_idx += block_args.num_repeat
            self.block_idx_to_name[curr_block_idx - 1] = name

            # The first block needs to take care of stride and
            # filter size increase.
            self._blocks.append(MBConvBlock(block_args, global_params))
            if block_args.num_repeat > 1:
                next_block_args = deepcopy(block_args)
                next_block_args.update(in_channels=block_args.out_channels,
                                       stride=1)
            for _ in range(block_args.num_repeat - 1):
                self._blocks.append(MBConvBlock(next_block_args,
                                                global_params))

        # Head
        if self.num_classes is not None:
            in_channels = block_args.out_channels  # output of final block
            out_channels = round_filters(1280,
                                         global_params,
                                         skip=global_params.fix_head_stem)
            self._conv_head = Conv2d(in_channels,
                                     out_channels,
                                     kernel_size=1,
                                     padding=0,
                                     bias=False)
            self._bn1 = get_norm(global_params.norm, out_channels=out_channels)

            # Final linear layers
            self._avg_pooling = nn.AdaptiveAvgPool2d(1)
            self._dropout = nn.Dropout(global_params.dropout_rate)
            self._fc = nn.Linear(out_channels, global_params.num_classes)
            name = "linear"

        self._swish = MemoryEfficientSwish()

        if out_features is None:
            out_features = [name]
        self._out_features = out_features
        assert len(self._out_features)

        # init bn params
        bn_mom = global_params.bn_momentum
        bn_eps = global_params.bn_eps
        if bn_mom is not None and bn_eps is not None:
            for m in self.modules():
                if isinstance(m, nn.BatchNorm2d):
                    m.momentum = bn_mom
                    m.eps = bn_eps
Example #5
0
 def set_swish(self, memory_efficient=True):
     """
     Sets swish function as memory efficient or standard.
     """
     self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
Example #6
0
    def __init__(self, input_size, in_channels_list, out_channels,
                 fuse_type="fast", norm="BN", memory_efficient=True):
        """
        input_size (int): the input image size.
        in_channels_list (list): the number of input tensor channels per level.
        out_channels (int): the number of output tensor channels.
        fuse_type (str): now only support three weighted fusion approaches:

            * fast:    Output = sum(Input_i * w_i / sum(w_j))
            * sotfmax: Output = sum(Input_i * e ^ w_i / sum(e ^ w_j))
            * sum:     Output = sum(Input_i) / len(Input_i)

        norm (str): the normalization to use.
        memory_efficient (bool): use `MemoryEfficientSwish` or `Swish` as activation function.
        """
        super(BiFPNLayer, self).__init__()
        assert fuse_type in ("fast", "softmax", "sum"), f"Unknown fuse method: {fuse_type}." \
            " Please select in [fast, sotfmax, sum]."

        self.input_size = input_size
        self.in_channels_list = in_channels_list
        self.fuse_type = fuse_type
        self.levels = len(in_channels_list)
        self.nodes_input_offsets = [
            [3, 4],
            [2, 5],
            [1, 6],
            [0, 7],
            [1, 7, 8],
            [2, 6, 9],
            [3, 5, 10],
            [4, 11],
        ]
        self.nodes_strides = [
            2 ** x
            for x in [6, 5, 4, 3, 4, 5, 6, 7]
        ]

        # Change input feature map to have target number of channels.
        self.resample_convs = nn.ModuleList()
        for node_i_input_offsets in self.nodes_input_offsets:
            resample_convs_i = nn.ModuleList()
            for input_offset in node_i_input_offsets:
                if self.in_channels_list[input_offset] != out_channels:
                    resample_conv = Conv2d(
                        self.in_channels_list[input_offset],
                        out_channels,
                        kernel_size=1,
                        stride=1,
                        padding=0,
                        norm=get_norm(norm, out_channels),
                        activation=None,
                    )
                else:
                    resample_conv = nn.Identity()
                self.in_channels_list.append(out_channels)
                resample_convs_i.append(resample_conv)
            self.resample_convs.append(resample_convs_i)

        # fpn combine weights
        self.edge_weights = nn.ParameterList()
        for node_i_input_offsets in self.nodes_input_offsets:
            # combine weight
            if fuse_type == "fast" or fuse_type == "softmax":
                weights_i = nn.Parameter(
                    torch.ones(len(node_i_input_offsets), dtype=torch.float32),
                    requires_grad=True,
                )
            elif fuse_type == "sum":
                weights_i = nn.Parameter(
                    torch.ones(len(node_i_input_offsets), dtype=torch.float32),
                    requires_grad=False,
                )
            else:
                raise ValueError("Unknown fuse method: {}".format(self.fuse_type))
            self.edge_weights.append(weights_i)

        # Convs for combine edge features
        self.combine_convs = nn.ModuleList()
        for node_i_input_offsets in self.nodes_input_offsets:
            combine_conv = SeparableConvBlock(
                out_channels,
                out_channels,
                kernel_size=3,
                padding="SAME",
                norm=get_norm(norm, out_channels),
                activation=None,
            )
            self.combine_convs.append(combine_conv)

        self.act = MemoryEfficientSwish() if memory_efficient else Swish()
        self.down_sampling = MaxPool2d(kernel_size=3, stride=2, padding="SAME")
        self.up_sampling = nn.Upsample(scale_factor=2, mode='nearest')