예제 #1
0
    def forward(self, x):
        """ The input should be of size [batch_size, 3, img_h, img_w] """
        _, _, img_h, img_w = x.size()
        cfg._tmp_img_h = img_h
        cfg._tmp_img_w = img_w

        #A
        with timer.env('backbone'):
            outs = self.backbone(x)

        # outs[0].size() = (n, 256, 138, 138)
        # outs[1].size() = (n, 512, 69, 69)
        # outs[2].size() = (n, 1024, 34, 34)
        # outs[3].size() = (n, 2048, 17, 17)

        #B
        if cfg.fpn is not None:
            with timer.env('fpn'):
                # Use backbone.selected_layers because we overwrote self.selected_layers
                # 백본을 생성하는 과정에서 selected_layers가 새로 정의 되었음을 주의!
                # outs = outs[1, 2, 3]
                outs = [outs[i] for i in cfg.backbone.selected_layers]
                outs = self.fpn(outs)
        #   outs[0] #(n, 256, 69, 69)
        #   outs[1] #(n, 256, 34, 34)
        #   outs[2] #(n, 256, 17, 17)
        #   outs[3] #(n, 256, 8, 8)
        #   outs[4] #(n, 256, 4, 4)

        #C
        proto_out = None
        if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch:  #True
            with timer.env('proto'):
                proto_x = x if self.proto_src is None else outs[
                    self.proto_src]  # proto_x = outs[0] = P3

                if self.num_grids > 0:  # self.num_grids = 0
                    grids = self.grid.repeat(proto_x.size(0), 1, 1, 1)
                    proto_x = torch.cat([proto_x, grids], dim=1)

                #   proto_x = (n, 256, 69, 69)
                proto_out = self.proto_net(
                    proto_x)  # P3를 proto_net에 통과. -> (n, 32, 138, 138)
                proto_out = cfg.mask_proto_prototype_activation(
                    proto_out)  #relu

                if cfg.mask_proto_prototypes_as_features:  #False
                    # Clone here because we don't want to permute this, though idk if contiguous makes this unnecessary
                    proto_downsampled = proto_out.clone()

                    if cfg.mask_proto_prototypes_as_features_no_grad:
                        proto_downsampled = proto_out.detach()

                # Move the features last so the multiplication is easy
                proto_out = proto_out.permute(0, 2, 3, 1).contiguous(
                )  # (n, h, w, channel)로 변환. ->(n, 138, 138, 32)

                if cfg.mask_proto_bias:  #False
                    bias_shape = [x for x in proto_out.size()]
                    bias_shape[-1] = 1
                    proto_out = torch.cat(
                        [proto_out, torch.ones(*bias_shape)], -1)

        with timer.env('pred_heads'):
            pred_outs = {'loc': [], 'conf': [], 'mask': [], 'priors': []}

            if cfg.use_mask_scoring:  #False
                pred_outs['score'] = []

            if cfg.use_instance_coeff:  #False
                pred_outs['inst'] = []

            for idx, pred_layer in zip(
                    self.selected_layers,
                    self.prediction_layers):  # 5개 나오는 prediction layers
                pred_x = outs[idx]

                if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_prototypes_as_features:  #False
                    # Scale the prototypes down to the current prediction layer's size and add it as inputs
                    proto_downsampled = F.interpolate(
                        proto_downsampled,
                        size=outs[idx].size()[2:],
                        mode='bilinear',
                        align_corners=False)
                    pred_x = torch.cat([pred_x, proto_downsampled], dim=1)

                # A hack for the way dataparallel works
                if cfg.share_prediction_module and pred_layer is not self.prediction_layers[
                        0]:  #1번째 꺼 제외 True.
                    pred_layer.parent = [self.prediction_layers[0]]

                p = pred_layer(pred_x)

                for k, v in p.items():
                    pred_outs[k].append(v)

        # 각 배치 사이즈 별로 prior_box를 모두 합침.
        # #'prior'이 변하지 않는 스케일과 ratio를 가지고 있으므로 index를 유지하면 합쳐도 ok
        for k, v in pred_outs.items():
            pred_outs[k] = torch.cat(v, -2)

        # 추출한 prototype도 pred_outs에 집어넣음.
        if proto_out is not None:
            pred_outs['proto'] = proto_out

        if self.training:  #None
            # For the extra loss functions
            if cfg.use_class_existence_loss:  #False
                pred_outs['classes'] = self.class_existence_fc(
                    outs[-1].mean(dim=(2, 3)))

            if cfg.use_semantic_segmentation_loss:  #True
                pred_outs['segm'] = self.semantic_seg_conv(outs[0])

            return pred_outs
        else:
            if cfg.use_mask_scoring:  #False
                pred_outs['score'] = torch.sigmoid(pred_outs['score'])

            if cfg.use_focal_loss:  #False
                if cfg.use_sigmoid_focal_loss:
                    # Note: even though conf[0] exists, this mode doesn't train it so don't use it
                    pred_outs['conf'] = torch.sigmoid(pred_outs['conf'])
                    if cfg.use_mask_scoring:
                        pred_outs['conf'] *= pred_outs['score']
                elif cfg.use_objectness_score:
                    # See focal_loss_sigmoid in multibox_loss.py for details
                    objectness = torch.sigmoid(pred_outs['conf'][:, :, 0])
                    pred_outs['conf'][:, :,
                                      1:] = objectness[:, :, None] * F.softmax(
                                          pred_outs['conf'][:, :, 1:], -1)
                    pred_outs['conf'][:, :, 0] = 1 - objectness
                else:
                    pred_outs['conf'] = F.softmax(pred_outs['conf'], -1)
            else:

                if cfg.use_objectness_score:  #False
                    objectness = torch.sigmoid(pred_outs['conf'][:, :, 0])

                    pred_outs['conf'][:, :, 1:] = (objectness > 0.10)[..., None] \
                        * F.softmax(pred_outs['conf'][:, :, 1:], dim=-1)

                else:
                    pred_outs['conf'] = F.softmax(pred_outs['conf'], -1)

            return self.detect(pred_outs, self)
예제 #2
0
    def forward(self, x):
        """ The input should be of size [batch_size, 3, img_h, img_w] """
        with timer.env('backbone'):
            outs = self.backbone(x)

        if cfg.fpn is not None:
            with timer.env('fpn'):
                # Use backbone.selected_layers because we overwrote self.selected_layers
                outs = [outs[i] for i in cfg.backbone.selected_layers]
                outs = self.fpn(outs)

        proto_out = None
        if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch:
            with timer.env('proto'):
                proto_x = x if self.proto_src is None else outs[self.proto_src]

                if self.num_grids > 0:
                    grids = self.grid.repeat(proto_x.size(0), 1, 1, 1)
                    proto_x = torch.cat([proto_x, grids], dim=1)

                proto_out = self.proto_net(proto_x)
                proto_out = cfg.mask_proto_prototype_activation(proto_out)

                if cfg.mask_proto_prototypes_as_features:
                    # Clone here because we don't want to permute this, though idk if contiguous makes this unnecessary
                    proto_downsampled = proto_out.clone()

                    if cfg.mask_proto_prototypes_as_features_no_grad:
                        proto_downsampled = proto_out.detach()

                # Move the features last so the multiplication is easy
                proto_out = proto_out.permute(0, 2, 3, 1).contiguous()

                if cfg.mask_proto_bias:
                    bias_shape = [x for x in proto_out.size()]
                    bias_shape[-1] = 1
                    proto_out = torch.cat(
                        [proto_out, torch.ones(*bias_shape)], -1)

        with timer.env('pred_heads'):
            pred_outs = {'loc': [], 'conf': [], 'mask': [], 'priors': []}

            if cfg.use_instance_coeff:
                pred_outs['inst'] = []

            for idx, pred_layer in zip(self.selected_layers,
                                       self.prediction_layers):
                pred_x = outs[idx]

                if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_prototypes_as_features:
                    # Scale the prototypes down to the current prediction layer's size and add it as inputs
                    proto_downsampled = F.interpolate(
                        proto_downsampled,
                        size=outs[idx].size()[2:],
                        mode='bilinear',
                        align_corners=False)
                    pred_x = torch.cat([pred_x, proto_downsampled], dim=1)

                # A hack for the way dataparallel works
                if cfg.share_prediction_module and pred_layer is not self.prediction_layers[
                        0]:
                    pred_layer.parent = [self.prediction_layers[0]]

                p = pred_layer(pred_x)

                for k, v in p.items():
                    pred_outs[k].append(v)

        for k, v in pred_outs.items():
            pred_outs[k] = torch.cat(v, -2)

        if proto_out is not None:
            pred_outs['proto'] = proto_out

        if self.training:

            # For the extra loss functions
            if cfg.use_class_existence_loss:
                pred_outs['classes'] = self.class_existence_fc(
                    outs[-1].mean(dim=(2, 3)))

            if cfg.use_semantic_segmentation_loss:
                pred_outs['segm'] = self.semantic_seg_conv(outs[0])

            return pred_outs
        else:
            if cfg.use_sigmoid_focal_loss:
                # Note: even though conf[0] exists, this mode doesn't train it so don't use it
                pred_outs['conf'] = torch.sigmoid(pred_outs['conf'])
            elif cfg.use_objectness_score:
                # See focal_loss_sigmoid in multibox_loss.py for details
                objectness = torch.sigmoid(pred_outs['conf'][:, :, 0])
                pred_outs['conf'][:, :,
                                  1:] = objectness[:, :, None] * F.softmax(
                                      pred_outs['conf'][:, :, 1:], -1)
                pred_outs['conf'][:, :, 0] = 1 - objectness
            else:
                pred_outs['conf'] = F.softmax(pred_outs['conf'], -1)

            return self.detect(pred_outs)
예제 #3
0
    def forward(self, x):
        """ The input should be of size [batch_size, 3, img_h, img_w] """

        # plt.imshow(x.permute(0,2,3,1)[0,:,:,:].detach().cpu().numpy())
        # plt.savefig('visual_test/input.png')
        # plt.cla()

        with timer.env('backbone'):
            outs = self.backbone(x)

        if cfg.fpn is not None:
            with timer.env('fpn'):
                # Use backbone.selected_layers because we overwrote self.selected_layers
                outs = [outs[i] for i in cfg.backbone.selected_layers]
                outs = self.fpn(outs)

        proto_out = None
        if cfg.fpn_fusion is True:
            fusion_maps = self.fusion_module(
                outs[:self.fusion_layers]
            )  # fusion all levels feature map from map into single one

        if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch:
            with timer.env('proto'):
                proto_x = x if self.proto_src is None else outs[self.proto_src]

                # FPN Fusion
                if cfg.proto_src_fusion is True:
                    proto_x = fusion_maps

                if cfg.cross_attention_fusion is True:
                    P_query = outs[0]
                    proto_x = P_query

                    for layer in range(self.fusion_layers):
                        z = self.CALayer(x_query=P_query,
                                         x_key=outs[layer]) - P_query
                        proto_x = proto_x + z

                if self.num_grids > 0:
                    grids = self.grid.repeat(proto_x.size(0), 1, 1, 1)
                    proto_x = torch.cat([proto_x, grids], dim=1)

                if cfg.proto_coordconv:
                    proto_x = self.addcoords(proto_x)

                proto_out = self.proto_net(proto_x)
                proto_out = cfg.mask_proto_prototype_activation(proto_out)

                if cfg.mask_proto_prototypes_as_features:
                    # Clone here because we don't want to permute this, though idk if contiguous makes this unnecessary
                    proto_downsampled = proto_out.clone()

                    if cfg.mask_proto_prototypes_as_features_no_grad:
                        proto_downsampled = proto_out.detach()

                # Move the features last so the multiplication is easy
                proto_out = proto_out.permute(0, 2, 3, 1).contiguous()

                if cfg.mask_proto_bias:
                    bias_shape = [x for x in proto_out.size()]
                    bias_shape[-1] = 1
                    proto_out = torch.cat(
                        [proto_out, torch.ones(*bias_shape)], -1)

        with timer.env('pred_heads'):
            pred_outs = {'loc': [], 'conf': [], 'mask': [], 'priors': []}

            if cfg.use_instance_coeff:
                pred_outs['inst'] = []

            for idx, pred_layer in zip(self.selected_layers,
                                       self.prediction_layers):
                pred_x = outs[idx]

                if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_prototypes_as_features:
                    # Scale the prototypes down to the current prediction layer's size and add it as inputs
                    proto_downsampled = F.interpolate(
                        proto_downsampled,
                        size=outs[idx].size()[2:],
                        mode='bilinear',
                        align_corners=False)
                    pred_x = torch.cat([pred_x, proto_downsampled], dim=1)

                # A hack for the way dataparallel works
                if cfg.share_prediction_module and pred_layer is not self.prediction_layers[
                        0]:
                    pred_layer.parent = [self.prediction_layers[0]]

                if cfg.ins_coordconv:
                    pred_x = self.addcoords(pred_x)

                p = pred_layer(pred_x)

                for k, v in p.items():
                    pred_outs[k].append(v)

        # ===revised===
        num_priors = []
        for k, v in pred_outs.items():
            if k == 'loc':
                for _v in v:
                    num_priors.append(_v.size(1))
            pred_outs[k] = torch.cat(v, -2)
        pred_outs['layer'] = num_priors

        if proto_out is not None:
            pred_outs['proto'] = proto_out

        if self.training:

            # For the extra loss functions
            if cfg.use_class_existence_loss:
                pred_outs['classes'] = self.class_existence_fc(
                    outs[-1].mean(dim=(2, 3)))

            with timer.env('segm'):
                if cfg.use_semantic_segmentation_loss:
                    sem_in = None
                    if cfg.sem_src_fusion is True:
                        sem_in = fusion_maps
                    elif cfg.sem_lincomb is True:
                        sem_in = outs[-1]

                    if cfg.sem_coordconv:
                        sem_in = self.addcoords(sem_in)

                    pred_outs['segm'] = self.semantic_seg_conv(sem_in)
                    # pred_outs['segm'] = self.semantic_seg_conv(outs[-1]) #lincomb version

            return pred_outs
        else:
            if cfg.use_sigmoid_focal_loss:
                # Note: even though conf[0] exists, this mode doesn't train it so don't use it
                pred_outs['conf'] = torch.sigmoid(pred_outs['conf'])
            elif cfg.use_objectness_score:
                # See focal_loss_sigmoid in multibox_loss.py for details
                objectness = torch.sigmoid(pred_outs['conf'][:, :, 0])
                pred_outs['conf'][:, :,
                                  1:] = objectness[:, :, None] * F.softmax(
                                      pred_outs['conf'][:, :, 1:], -1)
                pred_outs['conf'][:, :, 0] = 1 - objectness
            else:
                pred_outs['conf'] = F.softmax(pred_outs['conf'], -1)

            if cfg.use_sem_output is True:
                sem_in = None
                if cfg.sem_src_fusion is True:
                    sem_in = fusion_maps
                elif cfg.sem_lincomb is True:
                    sem_in = outs[-1]

                if cfg.sem_coordconv:
                    sem_in = self.addcoords(sem_in)

                pred_outs['segm'] = self.semantic_seg_conv(sem_in)

            return self.detect(pred_outs)