Beispiel #1
0
    def __init__(
        self,
        layout: List[Tuple[int, int]],
        num_classes: int = 80,
        in_channels: int = 3,
        stem_channels: int = 32,
        anchors: Optional[Tensor] = None,
        act_layer: Optional[nn.Module] = None,
        norm_layer: Optional[Callable[[int], nn.Module]] = None,
        drop_layer: Optional[Callable[..., nn.Module]] = None,
        conv_layer: Optional[Callable[..., nn.Module]] = None,
        backbone_norm_layer: Optional[Callable[[int],
                                               nn.Module]] = None) -> None:
        super().__init__()

        if act_layer is None:
            act_layer = nn.LeakyReLU(0.1, inplace=True)
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if backbone_norm_layer is None:
            backbone_norm_layer = norm_layer

        # backbone
        self.backbone = DarknetBodyV4(layout, in_channels, stem_channels, 3,
                                      Mish(), backbone_norm_layer, drop_layer,
                                      conv_layer)
        # neck
        self.neck = Neck([1024, 512, 256], act_layer, norm_layer, drop_layer,
                         conv_layer)
        # head
        self.head = Yolov4Head(num_classes, anchors, act_layer, norm_layer,
                               drop_layer, conv_layer)

        init_module(self.neck, 'leaky_relu')
        init_module(self.head, 'leaky_relu')
Beispiel #2
0
    def __init__(
        self,
        layout: List[List[int]],
        num_classes: int = 20,
        in_channels: int = 3,
        stem_channels: int = 64,
        num_anchors: int = 2,
        lambda_obj: float = 1,
        lambda_noobj: float = 0.5,
        lambda_class: float = 1,
        lambda_coords: float = 5.,
        rpn_nms_thresh: float = 0.7,
        box_score_thresh: float = 0.05,
        head_hidden_nodes: int = 512,  # In the original paper, 4096
        act_layer: Optional[nn.Module] = None,
        norm_layer: Optional[Callable[[int], nn.Module]] = None,
        drop_layer: Optional[Callable[..., nn.Module]] = None,
        conv_layer: Optional[Callable[..., nn.Module]] = None,
        backbone_norm_layer: Optional[Callable[[int], nn.Module]] = None
    ) -> None:

        super().__init__(
            num_classes,
            rpn_nms_thresh,
            box_score_thresh,
            lambda_obj,
            lambda_noobj,
            lambda_class,
            lambda_coords
        )

        if act_layer is None:
            act_layer = nn.LeakyReLU(0.1, inplace=True)

        if backbone_norm_layer is None and norm_layer is not None:
            backbone_norm_layer = norm_layer

        self.backbone = DarknetBodyV1(layout, in_channels, stem_channels, act_layer, backbone_norm_layer)

        self.block4 = nn.Sequential(
            *conv_sequence(1024, 1024, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=(norm_layer is None)),
            *conv_sequence(1024, 1024, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, stride=2, bias=(norm_layer is None)),
            *conv_sequence(1024, 1024, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=(norm_layer is None)),
            *conv_sequence(1024, 1024, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=(norm_layer is None)))

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(1024 * 7 ** 2, head_hidden_nodes),
            act_layer,
            nn.Dropout(0.5),
            nn.Linear(head_hidden_nodes, 7 ** 2 * (num_anchors * 5 + num_classes)))
        self.num_anchors = num_anchors

        init_module(self.block4, 'leaky_relu')
        init_module(self.classifier, 'leaky_relu')
Beispiel #3
0
    def test_init(self):

        module = nn.Sequential(nn.Conv2d(3, 32, 3), nn.BatchNorm2d(32),
                               nn.LeakyReLU(inplace=True))

        # Check that each layer was initialized correctly
        init_module(module, 'leaky_relu')
        self.assertTrue(torch.all(module[0].bias.data == 0))
        self.assertTrue(torch.all(module[1].weight.data == 1))
        self.assertTrue(torch.all(module[1].bias.data == 0))
Beispiel #4
0
    def __init__(self, layout, num_classes=20, in_channels=3, stem_chanels=32, anchors=None, passthrough_ratio=8,
                 lambda_noobj=0.5, lambda_coords=5., rpn_nms_thresh=0.7, box_score_thresh=0.05,
                 act_layer=None, norm_layer=None, drop_layer=None, conv_layer=None, backbone_norm_layer=None):

        super().__init__(rpn_nms_thresh, box_score_thresh)

        if act_layer is None:
            act_layer = nn.LeakyReLU(0.1, inplace=True)
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if backbone_norm_layer is None:
            backbone_norm_layer = norm_layer

        # Priors computed using K-means
        if anchors is None:
            anchors = torch.tensor([[1.3221, 1.73145], [3.19275, 4.00944], [5.05587, 8.09892],
                                    [9.47112, 4.84053], [11.2364, 10.0071]])
        self.num_classes = num_classes

        self.backbone = DarknetBodyV2(layout, in_channels, stem_chanels, True, act_layer,
                                      backbone_norm_layer, drop_layer, conv_layer)

        self.block5 = nn.Sequential(
            *conv_sequence(layout[-1][0], layout[-1][0], act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=False),
            *conv_sequence(layout[-1][0], layout[-1][0], act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=False))

        self.passthrough_layer = nn.Sequential(*conv_sequence(layout[-2][0], layout[-2][0] // passthrough_ratio,
                                                              act_layer, norm_layer, drop_layer, conv_layer,
                                                              kernel_size=1, bias=False),
                                               ConcatDownsample2d(scale_factor=2))

        self.block6 = nn.Sequential(
            *conv_sequence(layout[-1][0] + layout[-2][0] // passthrough_ratio * 2 ** 2, layout[-1][0],
                           act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=False))

        # Each box has P_objectness, 4 coords, and score for each class
        self.head = nn.Conv2d(layout[-1][0], anchors.shape[0] * (5 + num_classes), 1)

        # Register losses
        self.register_buffer('anchors', anchors)

        # Loss coefficients
        self.lambda_noobj = lambda_noobj
        self.lambda_coords = lambda_coords

        init_module(self.block5, 'leaky_relu')
        init_module(self.passthrough_layer, 'leaky_relu')
        init_module(self.block6, 'leaky_relu')
        init_module(self.head, 'leaky_relu')
Beispiel #5
0
    def __init__(self, in_planes, act_layer=None, norm_layer=None, drop_layer=None, conv_layer=None):
        super().__init__()

        self.fpn = nn.Sequential(
            *conv_sequence(in_planes[0], in_planes[0] // 2, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=1, bias=False),
            *conv_sequence(in_planes[0] // 2, in_planes[0], act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=False),
            *conv_sequence(in_planes[0], in_planes[0] // 2, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=1, bias=False),
            SPP([5, 9, 13]),
            *conv_sequence(4 * in_planes[0] // 2, in_planes[0] // 2, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=1, bias=False),
            *conv_sequence(in_planes[0] // 2, in_planes[0], act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=False),
            *conv_sequence(in_planes[0], in_planes[0] // 2, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=1, bias=False)
        )

        self.pan1 = PAN(in_planes[1], act_layer, norm_layer, drop_layer, conv_layer)
        self.pan2 = PAN(in_planes[2], act_layer, norm_layer, drop_layer, conv_layer)
        init_module(self, 'leaky_relu')
Beispiel #6
0
    def __init__(self, layout, num_classes=20, in_channels=3, stem_channels=64, num_anchors=2,
                 lambda_noobj=0.5, lambda_coords=5., rpn_nms_thresh=0.7, box_score_thresh=0.05,
                 act_layer=None, norm_layer=None, drop_layer=None, conv_layer=None, backbone_norm_layer=None):

        super().__init__(rpn_nms_thresh, box_score_thresh)

        if act_layer is None:
            act_layer = nn.LeakyReLU(0.1, inplace=True)

        if backbone_norm_layer is None and norm_layer is not None:
            backbone_norm_layer = norm_layer

        self.backbone = DarknetBodyV1(layout, in_channels, stem_channels, act_layer, backbone_norm_layer)

        self.block4 = nn.Sequential(
            *conv_sequence(1024, 1024, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=False),
            *conv_sequence(1024, 1024, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, stride=2, bias=False),
            *conv_sequence(1024, 1024, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=False),
            *conv_sequence(1024, 1024, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=False))

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(1024 * 7 ** 2, 4096),
            nn.LeakyReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, 7 ** 2 * (num_anchors * 5 + num_classes)))
        self.num_anchors = num_anchors
        self.num_classes = num_classes
        # Loss coefficients
        self.lambda_noobj = lambda_noobj
        self.lambda_coords = lambda_coords

        init_module(self.block4, 'leaky_relu')
        init_module(self.classifier, 'leaky_relu')
Beispiel #7
0
    def __init__(self, layout, num_classes=80, in_channels=3, stem_channels=32, anchors=None,
                 act_layer=None, norm_layer=None, drop_layer=None, conv_layer=None, backbone_norm_layer=None):
        super().__init__()

        if act_layer is None:
            act_layer = nn.LeakyReLU(0.1, inplace=True)
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if backbone_norm_layer is None:
            backbone_norm_layer = norm_layer
        if drop_layer is None:
            drop_layer = DropBlock2d

        # backbone
        self.backbone = DarknetBodyV4(layout, in_channels, stem_channels, 3, Mish(),
                                      backbone_norm_layer, drop_layer, conv_layer)
        # neck
        self.neck = Neck([1024, 512, 256], act_layer, norm_layer, drop_layer, conv_layer)
        # head
        self.head = Yolov4Head(num_classes, anchors, act_layer, norm_layer, drop_layer, conv_layer)

        init_module(self.neck, 'leaky_relu')
        init_module(self.head, 'leaky_relu')
Beispiel #8
0
    def __init__(self, num_classes=80, anchors=None,
                 act_layer=None, norm_layer=None, drop_layer=None, conv_layer=None):

        # cf. https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov4.cfg#L1143
        if anchors is None:
            anchors = torch.tensor([[[12, 16], [19, 36], [40, 28]],
                                   [[36, 75], [76, 55], [72, 146]],
                                   [[142, 110], [192, 243], [459, 401]]], dtype=torch.float32) / 608
        elif not isinstance(anchors, torch.Tensor):
            anchors = torch.tensor(anchors, dtype=torch.float32)

        if anchors.shape[0] != 3:
            raise AssertionError(f"The number of anchors is expected to be 3. received: {anchors.shape[0]}")

        super().__init__()

        self.head1 = nn.Sequential(
            *conv_sequence(128, 256, act_layer, norm_layer, None, conv_layer,
                           kernel_size=3, padding=1, bias=False),
            *conv_sequence(256, (5 + num_classes) * 3, None, None, None, conv_layer,
                           kernel_size=1, bias=True))

        self.yolo1 = YoloLayer(anchors[0], num_classes=num_classes, scale_xy=1.2)

        self.pre_head2 = nn.Sequential(*conv_sequence(128, 256, act_layer, norm_layer, drop_layer, conv_layer,
                                                      kernel_size=3, padding=1, stride=2, bias=False))
        self.head2_1 = nn.Sequential(
            *conv_sequence(512, 256, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=1, bias=False),
            *conv_sequence(256, 512, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=False),
            *conv_sequence(512, 256, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=1, bias=False),
            *conv_sequence(256, 512, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=False),
            *conv_sequence(512, 256, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=1, bias=False))
        self.head2_2 = nn.Sequential(
            *conv_sequence(256, 512, act_layer, norm_layer, None, conv_layer,
                           kernel_size=3, padding=1, bias=False),
            *conv_sequence(512, (5 + num_classes) * 3, None, None, None, conv_layer,
                           kernel_size=1, bias=True))

        self.yolo2 = YoloLayer(anchors[1], num_classes=num_classes, scale_xy=1.1)

        self.pre_head3 = nn.Sequential(*conv_sequence(256, 512, act_layer, norm_layer, drop_layer, conv_layer,
                                                      kernel_size=3, padding=1, stride=2, bias=False))
        self.head3 = nn.Sequential(
            *conv_sequence(1024, 512, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=1, bias=False),
            *conv_sequence(512, 1024, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=False),
            *conv_sequence(1024, 512, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=1, bias=False),
            *conv_sequence(512, 1024, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=False),
            *conv_sequence(1024, 512, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=1, bias=False),
            *conv_sequence(512, 1024, act_layer, norm_layer, drop_layer, conv_layer,
                           kernel_size=3, padding=1, bias=False),
            *conv_sequence(1024, (5 + num_classes) * 3, None, None, None, conv_layer,
                           kernel_size=1, bias=True))

        self.yolo3 = YoloLayer(anchors[2], num_classes=num_classes, scale_xy=1.05)
        init_module(self, 'leaky_relu')
        # Zero init
        self.head1[-1].weight.data.zero_()
        self.head1[-1].bias.data.zero_()
        self.head2_2[-1].weight.data.zero_()
        self.head2_2[-1].bias.data.zero_()
        self.head3[-1].weight.data.zero_()
        self.head3[-1].bias.data.zero_()
Beispiel #9
0
    def __init__(self,
                 block,
                 num_blocks,
                 planes,
                 num_classes=10,
                 in_channels=3,
                 zero_init_residual=False,
                 width_per_group=64,
                 conv_layer=None,
                 act_layer=None,
                 norm_layer=None,
                 drop_layer=None,
                 deep_stem=False,
                 stem_pool=True,
                 avg_downsample=False,
                 num_repeats=1,
                 block_args=None):

        if conv_layer is None:
            conv_layer = nn.Conv2d
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if act_layer is None:
            act_layer = nn.ReLU(inplace=True)
        self.dilation = 1

        in_planes = 64
        # Deep stem from ResNet-C
        if deep_stem:
            _layers = [
                *conv_sequence(in_channels,
                               in_planes // 2,
                               act_layer,
                               norm_layer,
                               drop_layer,
                               conv_layer,
                               kernel_size=3,
                               stride=2,
                               padding=1,
                               bias=False), *conv_sequence(in_planes // 2,
                                                           in_planes // 2,
                                                           act_layer,
                                                           norm_layer,
                                                           drop_layer,
                                                           conv_layer,
                                                           kernel_size=3,
                                                           stride=1,
                                                           padding=1,
                                                           bias=False),
                *conv_sequence(in_planes // 2,
                               in_planes,
                               act_layer,
                               norm_layer,
                               drop_layer,
                               conv_layer,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=False)
            ]
        else:
            _layers = conv_sequence(in_channels,
                                    in_planes,
                                    act_layer,
                                    norm_layer,
                                    drop_layer,
                                    conv_layer,
                                    kernel_size=7,
                                    stride=2,
                                    padding=3,
                                    bias=False)
        if stem_pool:
            _layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

        # Optional tensor repetitions along channel axis (mainly for TridentNet)
        if num_repeats > 1:
            _layers.append(ChannelRepeat(num_repeats))

        # Consecutive convolutional blocks
        stride = 1
        # Block args
        if block_args is None:
            block_args = dict(groups=1)
        if not isinstance(block_args, list):
            block_args = [block_args] * len(num_blocks)
        for _num_blocks, _planes, _block_args in zip(num_blocks, planes,
                                                     block_args):
            _layers.append(
                self._make_layer(block,
                                 _num_blocks,
                                 in_planes,
                                 _planes,
                                 stride,
                                 width_per_group,
                                 act_layer=act_layer,
                                 norm_layer=norm_layer,
                                 drop_layer=drop_layer,
                                 avg_downsample=avg_downsample,
                                 num_repeats=num_repeats,
                                 block_args=_block_args))
            in_planes = block.expansion * _planes
            stride = 2

        super().__init__(
            OrderedDict([('features', nn.Sequential(*_layers)),
                         ('pool', GlobalAvgPool2d(flatten=True)),
                         ('head',
                          nn.Linear(num_repeats * in_planes, num_classes))]))

        # Init all layers
        init.init_module(self, nonlinearity='relu')

        # Init shortcut
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    m.convs[2][1].weight.data.zero_()
                elif isinstance(m, BasicBlock):
                    m.convs[1][1].weight.data.zero_()
Beispiel #10
0
    def __init__(
            self,
            in_planes: List[int],
            act_layer: Optional[nn.Module] = None,
            norm_layer: Optional[Callable[[int], nn.Module]] = None,
            drop_layer: Optional[Callable[..., nn.Module]] = None,
            conv_layer: Optional[Callable[..., nn.Module]] = None) -> None:
        super().__init__()

        self.fpn = nn.Sequential(
            *conv_sequence(in_planes[0],
                           in_planes[0] // 2,
                           act_layer,
                           norm_layer,
                           drop_layer,
                           conv_layer,
                           kernel_size=1,
                           bias=False),
            *conv_sequence(in_planes[0] // 2,
                           in_planes[0],
                           act_layer,
                           norm_layer,
                           drop_layer,
                           conv_layer,
                           kernel_size=3,
                           padding=1,
                           bias=False),
            *conv_sequence(in_planes[0],
                           in_planes[0] // 2,
                           act_layer,
                           norm_layer,
                           drop_layer,
                           conv_layer,
                           kernel_size=1,
                           bias=False), SPP([5, 9, 13]),
            *conv_sequence(4 * in_planes[0] // 2,
                           in_planes[0] // 2,
                           act_layer,
                           norm_layer,
                           drop_layer,
                           conv_layer,
                           kernel_size=1,
                           bias=False),
            *conv_sequence(in_planes[0] // 2,
                           in_planes[0],
                           act_layer,
                           norm_layer,
                           drop_layer,
                           conv_layer,
                           kernel_size=3,
                           padding=1,
                           bias=False),
            *conv_sequence(in_planes[0],
                           in_planes[0] // 2,
                           act_layer,
                           norm_layer,
                           drop_layer,
                           conv_layer,
                           kernel_size=1,
                           bias=False))

        self.pan1 = PAN(in_planes[1], act_layer, norm_layer, drop_layer,
                        conv_layer)
        self.pan2 = PAN(in_planes[2], act_layer, norm_layer, drop_layer,
                        conv_layer)
        init_module(self, 'leaky_relu')
Beispiel #11
0
    def __init__(self,
                 width_mult=1.0,
                 depth_mult=1.0,
                 num_classes=1000,
                 in_channels=3,
                 in_planes=16,
                 final_planes=180,
                 use_se=True,
                 se_ratio=12,
                 dropout_ratio=0.2,
                 bn_momentum=0.9,
                 act_layer=None,
                 norm_layer=None,
                 drop_layer=None):
        """Mostly adapted from https://github.com/clovaai/rexnet/blob/master/rexnetv1.py"""
        super().__init__()

        if act_layer is None:
            act_layer = nn.SiLU(inplace=True)
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d

        num_blocks = [1, 2, 2, 3, 3, 5]
        strides = [1, 2, 2, 2, 1, 2]
        num_blocks = [ceil(element * depth_mult) for element in num_blocks]
        strides = sum([[element] + [1] * (num_blocks[idx] - 1)
                       for idx, element in enumerate(strides)], [])
        depth = sum(num_blocks)

        stem_channel = 32 / width_mult if width_mult < 1.0 else 32
        inplanes = in_planes / width_mult if width_mult < 1.0 else in_planes

        # The following channel configuration is a simple instance to make each layer become an expand layer
        chans = [int(round(width_mult * stem_channel))]
        chans.extend([
            int(round(width_mult * (inplanes + idx * final_planes / depth)))
            for idx in range(depth)
        ])

        ses = [False] * (num_blocks[0] + num_blocks[1]) + [use_se] * sum(
            num_blocks[2:])

        _layers = conv_sequence(in_channels,
                                chans[0],
                                act_layer,
                                norm_layer,
                                drop_layer,
                                kernel_size=3,
                                stride=2,
                                padding=1,
                                bias=(norm_layer is None))

        t = 1
        for in_c, c, s, se in zip(chans[:-1], chans[1:], strides, ses):
            _layers.append(
                ReXBlock(in_channels=in_c,
                         channels=c,
                         t=t,
                         stride=s,
                         use_se=se,
                         se_ratio=se_ratio))
            t = 6

        pen_channels = int(width_mult * 1280)
        _layers.extend(
            conv_sequence(chans[-1],
                          pen_channels,
                          act_layer,
                          norm_layer,
                          drop_layer,
                          kernel_size=1,
                          stride=1,
                          padding=0,
                          bias=(norm_layer is None)))

        super().__init__(
            OrderedDict([('features', nn.Sequential(*_layers)),
                         ('pool', GlobalAvgPool2d(flatten=True)),
                         ('head',
                          nn.Sequential(nn.Dropout(dropout_ratio),
                                        nn.Linear(pen_channels,
                                                  num_classes)))]))

        # Init all layers
        init.init_module(self, nonlinearity='relu')
Beispiel #12
0
    def __init__(
        self,
        layout: List[Tuple[int, int]],
        num_classes: int = 20,
        in_channels: int = 3,
        stem_chanels: int = 32,
        anchors: Optional[Tensor] = None,
        passthrough_ratio: int = 8,
        lambda_obj: float = 5,
        lambda_noobj: float = 1,
        lambda_class: float = 1,
        lambda_coords: float = 1,
        rpn_nms_thresh: float = 0.7,
        box_score_thresh: float = 0.05,
        act_layer: Optional[nn.Module] = None,
        norm_layer: Optional[Callable[[int], nn.Module]] = None,
        drop_layer: Optional[Callable[..., nn.Module]] = None,
        conv_layer: Optional[Callable[..., nn.Module]] = None,
        backbone_norm_layer: Optional[Callable[[int],
                                               nn.Module]] = None) -> None:

        super().__init__(num_classes, rpn_nms_thresh, box_score_thresh,
                         lambda_obj, lambda_noobj, lambda_class, lambda_coords)

        if act_layer is None:
            act_layer = nn.LeakyReLU(0.1, inplace=True)
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if backbone_norm_layer is None:
            backbone_norm_layer = norm_layer

        # Priors computed using K-means
        if anchors is None:
            # cf. https://github.com/pjreddie/darknet/blob/master/cfg/yolov2-voc.cfg#L242
            anchors = torch.tensor([[1.3221, 1.73145], [3.19275, 4.00944],
                                    [5.05587, 8.09892], [9.47112, 4.84053],
                                    [11.2364, 10.0071]]) / 13

        self.backbone = DarknetBodyV2(layout, in_channels, stem_chanels, True,
                                      act_layer, backbone_norm_layer,
                                      drop_layer, conv_layer)

        self.block5 = nn.Sequential(
            *conv_sequence(layout[-1][0],
                           layout[-1][0],
                           act_layer,
                           norm_layer,
                           drop_layer,
                           conv_layer,
                           kernel_size=3,
                           padding=1,
                           bias=(norm_layer is None)),
            *conv_sequence(layout[-1][0],
                           layout[-1][0],
                           act_layer,
                           norm_layer,
                           drop_layer,
                           conv_layer,
                           kernel_size=3,
                           padding=1,
                           bias=(norm_layer is None)))

        self.passthrough_layer = nn.Sequential(
            *conv_sequence(layout[-2][0],
                           layout[-2][0] // passthrough_ratio,
                           act_layer,
                           norm_layer,
                           drop_layer,
                           conv_layer,
                           kernel_size=1,
                           bias=(norm_layer is None)),
            ConcatDownsample2d(scale_factor=2))

        self.block6 = nn.Sequential(
            *conv_sequence(layout[-1][0] +
                           layout[-2][0] // passthrough_ratio * 2**2,
                           layout[-1][0],
                           act_layer,
                           norm_layer,
                           drop_layer,
                           conv_layer,
                           kernel_size=3,
                           padding=1,
                           bias=(norm_layer is None)))

        # Each box has P_objectness, 4 coords, and score for each class
        self.head = nn.Conv2d(layout[-1][0],
                              anchors.shape[0] * (5 + num_classes), 1)

        # Register losses
        self.register_buffer('anchors', anchors)

        init_module(self.block5, 'leaky_relu')
        init_module(self.passthrough_layer, 'leaky_relu')
        init_module(self.block6, 'leaky_relu')
        # Initialize the head like a linear (default Conv2D init is the same as Linear)
        if self.head.bias is not None:
            self.head.bias.data.zero_()