Beispiel #1
0
    def __init__(self, cfg: CfgNode, input_channels: int):
        """
        Initialize predictor using configuration options

        Args:
            cfg (CfgNode): configuration options
            input_channels (int): input tensor size along the channel dimension
        """
        super().__init__()
        dim_in = input_channels
        n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
        dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
        kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
        self.ann_index_lowres = ConvTranspose2d(
            dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
        )
        self.index_uv_lowres = ConvTranspose2d(
            dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
        )
        self.u_lowres = ConvTranspose2d(
            dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
        )
        self.v_lowres = ConvTranspose2d(
            dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
        )
        self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
        initialize_module_params(self)
Beispiel #2
0
    def __init__(self, cfg: CfgNode, input_channels: int):
        """
        Initialize predictor using configuration options

        Args:
            cfg (CfgNode): configuration options
            input_channels (int): input tensor size along the channel dimension
        """
        super().__init__()
        dim_in = input_channels
        n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
        embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
        kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
        # coarse segmentation
        self.coarse_segm_lowres = ConvTranspose2d(dim_in,
                                                  n_segm_chan,
                                                  kernel_size,
                                                  stride=2,
                                                  padding=int(kernel_size / 2 -
                                                              1))
        # embedding
        self.embed_lowres = ConvTranspose2d(dim_in,
                                            embed_size,
                                            kernel_size,
                                            stride=2,
                                            padding=int(kernel_size / 2 - 1))
        self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
        initialize_module_params(self)
Beispiel #3
0
 def __init__(self, cfg, input_channels):
     super(DensePosePredictor, self).__init__()
     dim_in = input_channels
     dim_out_ann_index = self.NUM_ANN_INDICES
     dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
     kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
     self.ann_index_lowres = ConvTranspose2d(dim_in,
                                             dim_out_ann_index,
                                             kernel_size,
                                             stride=2,
                                             padding=int(kernel_size / 2 -
                                                         1))
     self.index_uv_lowres = ConvTranspose2d(dim_in,
                                            dim_out_patches,
                                            kernel_size,
                                            stride=2,
                                            padding=int(kernel_size / 2 -
                                                        1))
     self.u_lowres = ConvTranspose2d(dim_in,
                                     dim_out_patches,
                                     kernel_size,
                                     stride=2,
                                     padding=int(kernel_size / 2 - 1))
     self.v_lowres = ConvTranspose2d(dim_in,
                                     dim_out_patches,
                                     kernel_size,
                                     stride=2,
                                     padding=int(kernel_size / 2 - 1))
     self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
     initialize_module_params(self)
Beispiel #4
0
    def __init__(self, cfg, input_channels):

        super(DensePosePredictor, self).__init__()
        dim_in = input_channels
        n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
        dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
        kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
        self.ann_index_lowres = ConvTranspose2d(dim_in,
                                                n_segm_chan,
                                                kernel_size,
                                                stride=2,
                                                padding=int(kernel_size / 2 -
                                                            1))
        self.index_uv_lowres = ConvTranspose2d(dim_in,
                                               dim_out_patches,
                                               kernel_size,
                                               stride=2,
                                               padding=int(kernel_size / 2 -
                                                           1))
        self.u_lowres = ConvTranspose2d(dim_in,
                                        dim_out_patches,
                                        kernel_size,
                                        stride=2,
                                        padding=int(kernel_size / 2 - 1))
        self.v_lowres = ConvTranspose2d(dim_in,
                                        dim_out_patches,
                                        kernel_size,
                                        stride=2,
                                        padding=int(kernel_size / 2 - 1))
        self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
        self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(
            cfg)
        self._initialize_confidence_estimation_layers(
            cfg, self.confidence_model_cfg, dim_in)
        initialize_module_params(self)
Beispiel #5
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            conv_dims: an iterable of output channel counts for each conv in the head
                         e.g. (512, 512, 512) for three convs outputting 512 channels.
            num_keypoints: number of keypoint heatmaps to predicts, determines the number of
                           channels in the final output.
        """
        super(KeypointPRHead, self).__init__()

        # fmt: off
        # default up_scale to 2 (this can eventually be moved to config)
        up_scale      = 2
        layer_channels     = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_HEAD_DIM
        relation_dims = cfg.MODEL.ROI_KEYPOINT_HEAD.RELATION_DIM
        num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS
        self.n_stacked_convs = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_STACKED_CONVS
        num_parts = num_keypoints // 2 + 1
        in_channels   = input_shape.channels
        deconv_kernel = 4
        # fmt: on

        self.blocks = []
        for i in range(self.n_stacked_convs):
            module = Conv2d(in_channels, layer_channels, 3, stride=1, padding=1)
            self.add_module(self._get_layer_name(i), module)
            self.blocks.append(module)
            in_channels = layer_channels
        for i in range(2):
            layer = ConvTranspose2d(
                layer_channels,
                layer_channels,
                kernel_size=deconv_kernel,
                stride=2,
                padding=int(deconv_kernel / 2 - 1),
            )
            layer_name = self._get_deconv_layer_name(i, 'PM')
            self.add_module(layer_name, layer)
        for i in range(2):
            layer = ConvTranspose2d(
                layer_channels,
                layer_channels,
                kernel_size=deconv_kernel,
                stride=2,
                padding=int(deconv_kernel / 2 - 1),
            )
            layer_name = self._get_deconv_layer_name(i, 'KM')
            self.add_module(layer_name, layer)
        self.inter_part_score = Conv2d(layer_channels, num_parts, 3, stride=1, padding=1)
        self.kpt_score = Conv2d(layer_channels + layer_channels, num_keypoints, 3, stride=1, padding=1)
        # self.kpt_score = Conv2d(relation_dims + layer_channels, num_keypoints, 3, stride=1, padding=1)

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
Beispiel #6
0
    def __init__(self,
                 input_shape: ShapeSpec,
                 *,
                 num_classes,
                 conv_dims,
                 conv_norm="",
                 **kwargs):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature
            num_classes (int): the number of foreground classes (i.e. background is not
                included). 1 if using class agnostic prediction.
            conv_dims (list[int]): a list of N>0 integers representing the output dimensions
                of N-1 conv layers and the last upsample layer.
            conv_norm (str or callable): normalization for the conv layers.
                See :func:`detectron2.layers.get_norm` for supported types.
        """
        super().__init__(**kwargs)
        assert len(conv_dims) >= 1, "conv_dims have to be non-empty!"

        self.conv_norm_relus = []

        cur_channels = input_shape.channels
        for k, conv_dim in enumerate(conv_dims[:-1]):
            conv = Conv2d(
                cur_channels,
                conv_dim,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not conv_norm,
                norm=get_norm(conv_norm, conv_dim),
                activation=nn.ReLU(),
            )
            self.add_module("mask_fcn{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
            cur_channels = conv_dim

        self.deconv = ConvTranspose2d(cur_channels,
                                      conv_dims[-1],
                                      kernel_size=2,
                                      stride=2,
                                      padding=0)
        self.add_module("deconv_relu", nn.ReLU())
        cur_channels = conv_dims[-1]

        self.predictor = Conv2d(cur_channels,
                                num_classes,
                                kernel_size=1,
                                stride=1,
                                padding=0)

        for layer in self.conv_norm_relus + [self.deconv]:
            weight_init.c2_msra_fill(layer)
        # use normal distribution initialization for mask prediction layer
        nn.init.normal_(self.predictor.weight, std=0.001)
        if self.predictor.bias is not None:
            nn.init.constant_(self.predictor.bias, 0)
Beispiel #7
0
    def __init__(self, input_shape, *, num_keypoints, conv_dims, **kwargs):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature
            conv_dims: an iterable of output channel counts for each conv in the head
                         e.g. (512, 512, 512) for three convs outputting 512 channels.
        """
        super().__init__(num_keypoints=num_keypoints, **kwargs)

        # default up_scale to 2.0 (this can be made an option)
        up_scale = 2.0
        in_channels = input_shape.channels

        for idx, layer_channels in enumerate(conv_dims, 1):
            module = Conv2d(in_channels, layer_channels, 3, stride=1, padding=1)
            self.add_module("conv_fcn{}".format(idx), module)
            self.add_module("conv_fcn_relu{}".format(idx), nn.ReLU())
            in_channels = layer_channels

        deconv_kernel = 4
        self.score_lowres = ConvTranspose2d(
            in_channels, num_keypoints, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1
        )
        self.up_scale = up_scale

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
Beispiel #8
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            num_conv: the number of conv layers
            conv_dim: the dimension of the conv layers
            norm: normalization for the conv layers
        """
        super().__init__(cfg, input_shape)

        # fmt: off
        num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        conv_dims = cfg.MODEL.ROI_VISIBLE_MASK_HEAD.CONV_DIM
        self.norm = cfg.MODEL.ROI_VISIBLE_MASK_HEAD.NORM
        num_conv = cfg.MODEL.ROI_VISIBLE_MASK_HEAD.NUM_CONV
        input_channels = input_shape.channels
        cls_agnostic_mask = cfg.MODEL.ROI_VISIBLE_MASK_HEAD.CLS_AGNOSTIC_MASK
        # fmt: on

        self.conv_norm_relus = []

        for k in range(num_conv):
            conv = Conv2d(
                input_channels if k == 0 else conv_dims,
                conv_dims,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not self.norm,
                norm=get_norm(self.norm, conv_dims),
                activation=F.relu,
            )
            self.add_module("visible_mask_fcn{}".format(k + 1),
                            conv)  # this mask_fcn means visible_mask_fcn
            self.conv_norm_relus.append(conv)

        self.deconv = ConvTranspose2d(
            conv_dims if num_conv > 0 else input_channels,
            conv_dims,
            kernel_size=2,
            stride=2,
            padding=0,
        )

        num_mask_classes = 1 if cls_agnostic_mask else num_classes
        self.predictor = Conv2d(conv_dims,
                                num_mask_classes,
                                kernel_size=1,
                                stride=1,
                                padding=0)

        for layer in self.conv_norm_relus + [self.deconv]:
            weight_init.c2_msra_fill(layer)

        # use normal distribution initialization for mask prediction layer
        nn.init.normal_(self.predictor.weight, std=0.001)
        if self.predictor.bias is not None:
            nn.init.constant_(self.predictor.bias, 0)
Beispiel #9
0
    def __init__(self,
                 input_shape: ShapeSpec,
                 num_classes,
                 num_conv,
                 conv_dim,
                 conv_norm="",
                 vis_period=0):
        """
        Args:
            input_shape (ShapeSpec): shape of the input feature
            num_classes (int): the number of classes. 1 if using class agnostic prediction.
            num_conv (int): the number of conv layers
            conv_dim (int): the dimension of the conv layers
            conv_norm (str or callable): normalization for the conv layers.
                See :func:`detectron2.layers.get_norm` for supported types.
            vis_period (int): visualization period. 0 to disable visualization.
        """
        super().__init__(vis_period)
        input_channels = input_shape.channels

        self.conv_norm_relus = []

        for k in range(num_conv):
            conv = Conv2d(
                input_channels if k == 0 else conv_dim,
                conv_dim,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not conv_norm,
                norm=get_norm(conv_norm, conv_dim),
                activation=F.relu,
            )
            self.add_module("mask_fcn{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)

        self.deconv = ConvTranspose2d(
            conv_dim if num_conv > 0 else input_channels,
            conv_dim,
            kernel_size=2,
            stride=2,
            padding=0,
        )

        self.predictor = Conv2d(conv_dim,
                                num_classes,
                                kernel_size=1,
                                stride=1,
                                padding=0)

        for layer in self.conv_norm_relus + [self.deconv]:
            weight_init.c2_msra_fill(layer)
        # use normal distribution initialization for mask prediction layer
        nn.init.normal_(self.predictor.weight, std=0.001)
        if self.predictor.bias is not None:
            nn.init.constant_(self.predictor.bias, 0)
Beispiel #10
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        super(DefGridHead, self).__init__()

        self.device = cfg.MODEL.DEVICE
        self.grid_size = cfg.MODEL.DEFGRID_MASK_HEAD.GRID_SIZE  # [20,20]
        self.grid_type = cfg.MODEL.DEFGRID_MASK_HEAD.GRID_TYPE  # dense_quad
        self.state_dim = cfg.MODEL.DEFGRID_MASK_HEAD.STATE_DIM  # 128
        self.out_dim = cfg.MODEL.DEFGRID_MASK_HEAD.OUT_DIM
        self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        self.sigma = cfg.MODEL.DEFGRID_MASK_HEAD.SIGMA
        self.mask_coef = cfg.MODEL.DEFGRID_MASK_HEAD.MASK_COEF

        self.w_variance = cfg.MODEL.DEFGRID_MASK_HEAD.W_VARIANCE
        self.w_area = cfg.MODEL.DEFGRID_MASK_HEAD.W_AREA
        self.w_laplacian = cfg.MODEL.DEFGRID_MASK_HEAD.W_LAPLACIAN
        self.w_reconstruct_loss = cfg.MODEL.DEFGRID_MASK_HEAD.W_RECONSTRUCT_LOSS

        self.matrix = MatrixUtils(1, self.grid_size, self.grid_type, self.device)

        self.model = DeformableGrid(cfg, self.device)

        self.to_three_channel = ConvTranspose2d(
            cfg.MODEL.ROI_MASK_HEAD.CONV_DIM, 3, kernel_size=2, stride=2, padding=0
        )
        # self.to_three_channel = Conv2d(cfg.MODEL.ROI_MASK_HEAD.CONV_DIM, 3, kernel_size=1, stride=1, padding=0)

        self.superpixel = LatticeVariance(
            28,
            28,
            sigma=self.sigma,
            device=self.device,
            add_seg=True,
            mask_coef=self.mask_coef,
        )

        self.mask_deconv = ConvTranspose2d(
            self.out_dim, self.out_dim, kernel_size=2, stride=2, padding=0
        )
        self.mask_predictor = Conv2d(
            self.out_dim, self.num_classes, kernel_size=1, stride=1, padding=0
        )
Beispiel #11
0
    def __init__(self, cfg):
        super(VoxelHead, self).__init__()

        # fmt: off
        self.voxel_size = cfg.MODEL.VOXEL_HEAD.VOXEL_SIZE
        conv_dims = cfg.MODEL.VOXEL_HEAD.CONV_DIM
        num_conv = cfg.MODEL.VOXEL_HEAD.NUM_CONV
        input_channels = cfg.MODEL.VOXEL_HEAD.COMPUTED_INPUT_CHANNELS
        self.norm = cfg.MODEL.VOXEL_HEAD.NORM
        # fmt: on

        assert self.voxel_size % 2 == 0

        self.conv_norm_relus = []
        prev_dim = input_channels
        for k in range(num_conv):
            conv = Conv2d(
                prev_dim,
                conv_dims,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not self.norm,
                norm=get_norm(self.norm, conv_dims),
                activation=F.relu,
            )
            self.add_module("voxel_fcn{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
            prev_dim = conv_dims

        self.deconv = ConvTranspose2d(
            conv_dims if num_conv > 0 else input_channels,
            conv_dims,
            kernel_size=2,
            stride=2,
            padding=0,
        )

        self.predictor = Conv2d(conv_dims,
                                self.voxel_size,
                                kernel_size=1,
                                stride=1,
                                padding=0)

        for layer in self.conv_norm_relus + [self.deconv]:
            weight_init.c2_msra_fill(layer)
        # use normal distribution initialization for voxel prediction layer
        nn.init.normal_(self.predictor.weight, std=0.001)
        if self.predictor.bias is not None:
            nn.init.constant_(self.predictor.bias, 0)
Beispiel #12
0
    def __init__(self, cfg, input_shape: ShapeSpec, K=100):
        self.name = "CVAE"
        self.num_conv = cfg.MODEL.ROI_MASK_HEAD.RECON_NET.NUM_CONV
        self.conv_dims = cfg.MODEL.ROI_MASK_HEAD.RECON_NET.CONV_DIM
        self.norm = cfg.MODEL.ROI_MASK_HEAD.RECON_NET.NORM
        self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        self.rescoring = cfg.MODEL.ROI_MASK_HEAD.RECON_NET.RESCORING
        self.lambda_kl = cfg.MODEL.ROI_MASK_HEAD.RECON_NET.LAMBDA_KL
        self.latent_dim = 128
        input_channels = 1
        super(CVAE, self).__init__()
        self.encoder = []
        self.decoder = []
        for k in range(self.num_conv):
            conv = Conv2d(
                input_channels if k == 0 else self.conv_dims,
                self.conv_dims,
                kernel_size=3,
                stride=2,
                padding=1,
                bias=not self.norm,
                norm=get_norm(self.norm, self.conv_dims),
                activation=F.relu,
            )
            self.add_module("mask_fcn_enc{}".format(k + 1), conv)

            self.encoder.append(conv)

            deconv = nn.Sequential(
                ConvTranspose2d(
                    self.conv_dims if self.num_conv > 0 else input_channels,
                    self.conv_dims,
                    kernel_size=2,
                    stride=2,
                    padding=1
                    if k == self.num_conv - 2 and self.num_conv > 2 else 0),
                nn.ReLU(inplace=True))
            self.add_module("mask_fcn_dec{}".format(k + 1), deconv)
            self.decoder.append(deconv)
        self.outconv = nn.Sequential(
            nn.Conv2d(self.conv_dims, 1, kernel_size=3, stride=1, padding=1),
            nn.Sigmoid())
        d = {1: 14, 2: 7, 3: 4, 4: 2}
        self.mean = nn.Linear(self.conv_dims * d[self.num_conv]**2,
                              self.latent_dim)
        self.log_var = nn.Linear(self.conv_dims * d[self.num_conv]**2,
                                 self.latent_dim)
        self.fc = nn.Linear(
            self.conv_dims * d[self.num_conv]**2 + self.num_classes,
            self.latent_dim)
Beispiel #13
0
    def __init__(self, cfg, input_shape):
        super(VoxelRCNNConvUpsampleHead, self).__init__()

        # fmt: off
        num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        conv_dims = cfg.MODEL.ROI_VOXEL_HEAD.CONV_DIM
        self.norm = cfg.MODEL.ROI_VOXEL_HEAD.NORM
        num_conv = cfg.MODEL.ROI_VOXEL_HEAD.NUM_CONV
        input_channels = input_shape.channels
        cls_agnostic_voxel = cfg.MODEL.ROI_VOXEL_HEAD.CLS_AGNOSTIC_VOXEL
        # fmt: on

        self.conv_norm_relus = []
        self.num_depth = cfg.MODEL.ROI_VOXEL_HEAD.NUM_DEPTH
        self.num_classes = 1 if cls_agnostic_voxel else num_classes

        for k in range(num_conv):
            conv = Conv2d(
                input_channels if k == 0 else conv_dims,
                conv_dims,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not self.norm,
                norm=get_norm(self.norm, conv_dims),
                activation=F.relu,
            )
            self.add_module("voxel_fcn{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)

        self.deconv = ConvTranspose2d(
            conv_dims if num_conv > 0 else input_channels,
            conv_dims,
            kernel_size=2,
            stride=2,
            padding=0,
        )

        self.predictor = Conv2d(conv_dims,
                                self.num_classes * self.num_depth,
                                kernel_size=1,
                                stride=1,
                                padding=0)

        for layer in self.conv_norm_relus + [self.deconv]:
            weight_init.c2_msra_fill(layer)
        # use normal distribution initialization for voxel prediction layer
        nn.init.normal_(self.predictor.weight, std=0.001)
        if self.predictor.bias is not None:
            nn.init.constant_(self.predictor.bias, 0)
Beispiel #14
0
 def _initialize_confidence_estimation_layers(
         self, cfg: CfgNode,
         confidence_model_cfg: DensePoseConfidenceModelConfig, dim_in: int):
     dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
     kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
     if confidence_model_cfg.uv_confidence.enabled:
         if confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
             self.sigma_2_lowres = ConvTranspose2d(
                 dim_in,
                 dim_out_patches,
                 kernel_size,
                 stride=2,
                 padding=int(kernel_size / 2 - 1))
         elif confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO:
             self.sigma_2_lowres = ConvTranspose2d(
                 dim_in,
                 dim_out_patches,
                 kernel_size,
                 stride=2,
                 padding=int(kernel_size / 2 - 1))
             self.kappa_u_lowres = ConvTranspose2d(
                 dim_in,
                 dim_out_patches,
                 kernel_size,
                 stride=2,
                 padding=int(kernel_size / 2 - 1))
             self.kappa_v_lowres = ConvTranspose2d(
                 dim_in,
                 dim_out_patches,
                 kernel_size,
                 stride=2,
                 padding=int(kernel_size / 2 - 1))
         else:
             raise ValueError(
                 f"Unknown confidence model type: {confidence_model_cfg.confidence_model_type}"
             )
    def _initialize_confidence_estimation_layers(self, cfg: CfgNode,
                                                 dim_in: int):
        """
        Initialize confidence estimation layers based on configuration options

        Args:
            cfg (CfgNode): configuration options
            dim_in (int): number of input channels
        """
        dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
        kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
        if self.confidence_model_cfg.uv_confidence.enabled:
            if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
                self.sigma_2_lowres = ConvTranspose2d(
                    dim_in,
                    dim_out_patches,
                    kernel_size,
                    stride=2,
                    padding=int(kernel_size / 2 - 1))
            elif (self.confidence_model_cfg.uv_confidence.type ==
                  DensePoseUVConfidenceType.INDEP_ANISO):
                self.sigma_2_lowres = ConvTranspose2d(
                    dim_in,
                    dim_out_patches,
                    kernel_size,
                    stride=2,
                    padding=int(kernel_size / 2 - 1))
                self.kappa_u_lowres = ConvTranspose2d(
                    dim_in,
                    dim_out_patches,
                    kernel_size,
                    stride=2,
                    padding=int(kernel_size / 2 - 1))
                self.kappa_v_lowres = ConvTranspose2d(
                    dim_in,
                    dim_out_patches,
                    kernel_size,
                    stride=2,
                    padding=int(kernel_size / 2 - 1))
            else:
                raise ValueError(
                    f"Unknown confidence model type: "
                    f"{self.confidence_model_cfg.confidence_model_type}")
        if self.confidence_model_cfg.segm_confidence.enabled:
            self.fine_segm_confidence_lowres = ConvTranspose2d(
                dim_in,
                1,
                kernel_size,
                stride=2,
                padding=int(kernel_size / 2 - 1))
            self.coarse_segm_confidence_lowres = ConvTranspose2d(
                dim_in,
                1,
                kernel_size,
                stride=2,
                padding=int(kernel_size / 2 - 1))
Beispiel #16
0
 def get_conv_up(self, name, num_convs_up, conv_channels):
     """
     Function to create and register a set of ConvTranspose2d layers.
     """
     layers = []
     for k in range(num_convs_up):
         deconv = ConvTranspose2d(
             conv_channels,
             conv_channels,
             kernel_size=2,
             stride=2,
             padding=0,
         )
         self.add_module(name + "{}".format(k + 1), deconv)
         weight_init.c2_msra_fill(deconv)
         layers.append(deconv)
     return layers
Beispiel #17
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            conv_dims: an iterable of output channel counts for each conv in the head
                         e.g. (512, 512, 512) for three convs outputting 512 channels.
            num_keypoints: number of keypoint heatmaps to predicts, determines the number of
                           channels in the final output.
        """
        super(KRCNNConvDeconvUpsampleHead, self).__init__()

        # fmt: off
        # default up_scale to 2 (this can eventually be moved to config)
        up_scale = 2
        conv_dims = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS
        num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS
        in_channels = input_shape.channels
        # fmt: on

        self.blocks = []
        for idx, layer_channels in enumerate(conv_dims, 1):
            module = Conv2d(in_channels,
                            layer_channels,
                            3,
                            stride=1,
                            padding=1)
            self.add_module("conv_fcn{}".format(idx), module)
            self.blocks.append(module)
            in_channels = layer_channels

        deconv_kernel = 4
        self.score_lowres = ConvTranspose2d(in_channels,
                                            num_keypoints,
                                            deconv_kernel,
                                            stride=2,
                                            padding=deconv_kernel // 2 - 1)
        self.up_scale = up_scale

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
    def _initialize_confidence_estimation_layers(self, cfg: CfgNode,
                                                 dim_in: int):
        """
        Initialize confidence estimation layers based on configuration options

        Args:
            cfg (CfgNode): configuration options
            dim_in (int): number of input channels
        """
        kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
        if self.confidence_model_cfg.segm_confidence.enabled:
            self.coarse_segm_confidence_lowres = ConvTranspose2d(  # pyre-ignore[16]
                dim_in,
                1,
                kernel_size,
                stride=2,
                padding=int(kernel_size / 2 - 1))
Beispiel #19
0
    def __init__(self, cur_channels, num_classes, conv_dims, conv_norm=""):
        super().__init__()
        assert len(conv_dims) >= 1, "conv_dims have to be non-empty!"

        self.conv_norm_relus = []
        for k, conv_dim in enumerate(conv_dims[:-1]):
            conv = Conv2d(
                cur_channels,
                conv_dim,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not conv_norm,
                norm=get_norm(conv_norm, conv_dim),
                activation=nn.ReLU(),
            )
            self.add_module("mask_fcn{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
            cur_channels = conv_dim

        self.deconv = ConvTranspose2d(cur_channels,
                                      conv_dims[-1],
                                      kernel_size=2,
                                      stride=2,
                                      padding=0)
        self.add_module("deconv_relu", nn.ReLU())
        cur_channels = conv_dims[-1]

        self.predictor = Conv2d(cur_channels,
                                num_classes,
                                kernel_size=1,
                                stride=1,
                                padding=0)

        for layer in self.conv_norm_relus + [self.deconv]:
            weight_init.c2_msra_fill(layer)
        # use normal distribution initialization for mask prediction layer
        nn.init.normal_(self.predictor.weight, std=0.001)
        if self.predictor.bias is not None:
            nn.init.constant_(self.predictor.bias, 0)
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            conv_dims: an iterable of output channel counts for each conv in the head
                         e.g. (512, 512, 512) for three convs outputting 512 channels.
            num_keypoints: number of keypoint heatmaps to predicts, determines the number of
                           channels in the final output.
        """
        super().__init__()

        # fmt: off
        # default up_scale to 2 (this can eventually be moved to config)
        up_scale = 2
        conv_dims = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS
        num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS
        dwexpand_factor = cfg.MODEL.ROI_KEYPOINT_HEAD.DWEXPAND_FACTOR
        norm = cfg.MODEL.ROI_KEYPOINT_HEAD.NORM
        in_channels = input_shape.channels
        # fmt: on
        norm = ""
        self.conv_fcns = []
        for idx, layer_channel in enumerate(conv_dims):
            expand_channel = in_channels * dwexpand_factor
            conv_fcn = EDWConv(in_channels, expand_channel, layer_channel,
                               norm)
            in_channels = layer_channel
            self.add_module("conv_fcn{}".format(idx + 1), conv_fcn)
            self.conv_fcns.append(conv_fcn)

        deconv_kernel = 4
        self.score_lowres = ConvTranspose2d(in_channels,
                                            num_keypoints,
                                            deconv_kernel,
                                            stride=2,
                                            padding=deconv_kernel // 2 - 1)
        self.up_scale = up_scale
        for layer in [self.score_lowres]:
            weight_init.c2_msra_fill(layer)
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            conv_dims: an iterable of output channel counts for each conv in the head
                         e.g. (512, 512, 512) for three convs outputting 512 channels.
            num_keypoints: number of keypoint heatmaps to predicts, determines the number of
                           channels in the final output.
        """
        super().__init__()

        # fmt: off
        # default up_scale to 2 (this can eventually be moved to config)
        up_scale = 2
        conv_dims = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS
        num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS
        expand_factor = cfg.MODEL.ROI_KEYPOINT_HEAD.DWEXPAND_FACTOR
        norm = cfg.MODEL.ROI_KEYPOINT_HEAD.NORM
        in_channels = input_shape.channels
        # fmt: on
        norm = ""
        conv_fcn = []
        for layer_channel in conv_dims:
            conv_fcn.append(Conv2d(in_channels, layer_channel, kernel_size=1, bias=not norm,\
                               norm=get_norm(norm, layer_channel), activation=F.relu))
            conv_fcn.append(Conv2d(layer_channel, layer_channel, kernel_size=3, padding=1, bias=not norm,\
                               groups=layer_channel, norm=get_norm(norm, layer_channel), activation=F.relu))
            in_channels = layer_channel
        self.add_module('conv_fcn', nn.Sequential(*conv_fcn))

        deconv_kernel = 4
        self.score_lowres = ConvTranspose2d(in_channels,
                                            num_keypoints,
                                            deconv_kernel,
                                            stride=2,
                                            padding=deconv_kernel // 2 - 1)
        self.up_scale = up_scale
        for layer in [*self.conv_fcn, self.score_lowres]:
            weight_init.c2_msra_fill(layer)
Beispiel #22
0
    def __init__(self,
                 in_channels,
                 num_keypoints,
                 conv_dims,
                 loss_weight=1.0,
                 loss_normalizer=1.0):
        super().__init__()
        # default up_scale to 2.0 (this can be made an option)
        up_scale = 2.0

        for idx, layer_channels in enumerate(conv_dims, 1):
            module = Conv2d(in_channels,
                            layer_channels,
                            3,
                            stride=1,
                            padding=1)
            self.add_module("conv_fcn{}".format(idx), module)
            self.add_module("conv_fcn_relu{}".format(idx), nn.ReLU())
            in_channels = layer_channels

        deconv_kernel = 4
        self.score_lowres = ConvTranspose2d(in_channels,
                                            num_keypoints,
                                            deconv_kernel,
                                            stride=2,
                                            padding=deconv_kernel // 2 - 1)
        self.up_scale = up_scale

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
Beispiel #23
0
    def __init__(self, cfg, use_rel_coords=True):
        super().__init__()
        self.num_outputs = cfg.MODEL.CONDINST.IUVHead.OUT_CHANNELS
        norm = cfg.MODEL.CONDINST.IUVHead.NORM
        num_convs = cfg.MODEL.CONDINST.IUVHead.NUM_CONVS
        num_lambda_layer = cfg.MODEL.CONDINST.IUVHead.NUM_LAMBDA_LAYER
        lambda_layer_r = cfg.MODEL.CONDINST.IUVHead.LAMBDA_LAYER_R
        num_dcn_layer = cfg.MODEL.CONDINST.IUVHead.NUM_DCN_LAYER
        assert num_lambda_layer<=num_convs

        agg_channels = cfg.MODEL.CONDINST.MASK_BRANCH.AGG_CHANNELS
        channels = cfg.MODEL.CONDINST.IUVHead.CHANNELS
        self.norm_feat = cfg.MODEL.CONDINST.IUVHead.NORM_FEATURES
        soi = cfg.MODEL.FCOS.SIZES_OF_INTEREST
        self.register_buffer("sizes_of_interest", torch.tensor(soi + [soi[-1] * 2]))
        self.iuv_out_stride = cfg.MODEL.CONDINST.MASK_OUT_STRIDE
        self.use_rel_coords = cfg.MODEL.CONDINST.IUVHead.REL_COORDS
        self.use_abs_coords = cfg.MODEL.CONDINST.IUVHead.ABS_COORDS
        self.use_down_up_sampling = cfg.MODEL.CONDINST.IUVHead.DOWN_UP_SAMPLING
        self.use_partial_conv = cfg.MODEL.CONDINST.IUVHead.PARTIAL_CONV
        self.use_partial_norm = cfg.MODEL.CONDINST.IUVHead.PARTIAL_NORM
        # pdb.set_trace()
        # if self.use_rel_coords:
        #     self.in_channels = channels + 2
        # else:
        self.pos_emb_num_freqs = cfg.MODEL.CONDINST.IUVHead.POSE_EMBEDDING_NUM_FREQS
        self.use_pos_emb = self.pos_emb_num_freqs>0
        if self.use_pos_emb:
            self.position_embedder, self.position_emb_dim = get_embedder(multires=self.pos_emb_num_freqs, input_dims=2)
            self.in_channels = agg_channels + self.position_emb_dim
        else:
            self.in_channels = agg_channels + 2

        if self.use_abs_coords:
            if self.use_pos_emb:
                self.in_channels += self.position_emb_dim
            else:
                self.in_channels += 2


        conv_block = conv_with_kaiming_uniform(norm, activation=True)

        partial_conv_block = conv_with_kaiming_uniform(norm, activation=True, use_partial_conv=True)
        deform_conv_block = conv_with_kaiming_uniform(norm, activation=True, use_deformable=True)

        tower = []
        if self.use_partial_conv:
            # pdb.set_trace()
            layer = partial_conv_block(self.in_channels, channels, 3, 1)
            tower.append(layer)
            self.in_channels = channels

        if num_lambda_layer>0:
            layer = LambdaLayer(
                dim = self.in_channels,
                dim_out = channels,
                r = lambda_layer_r,         # the receptive field for relative positional encoding (23 x 23)
                dim_k = 16,
                heads = 4,
                dim_u = 4
            )
            tower.append(layer)
        else:
            tower.append(conv_block(
                self.in_channels, channels, 3, 1
            ))
        if num_dcn_layer>0:
            tower.append(deform_conv_block(
                    channels, channels, 3, 1
            ))

        if self.use_down_up_sampling:
            for i in range(1,num_convs):
                if i==1:
                    tower.append(conv_block(
                        channels, channels*2, 3, 2
                    ))
                else:
                    tower.append(conv_block(
                        channels*2, channels*2, 3, 1
                    ))

            tower.append(ConvTranspose2d(
                channels*2, self.num_outputs, 4, stride=2, padding=int(4 / 2 - 1)
            ))
        else:
            for i in range(1,num_convs):
                tower.append(conv_block(
                    channels, channels, 3, 1
                ))
            tower.append(nn.Conv2d(
                channels, max(self.num_outputs, 1), 1
            ))

        self.add_module('tower', nn.Sequential(*tower))
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            num_conv: the number of conv layers
            conv_dim: the dimension of the conv layers
            norm: normalization for the conv layers
        """
        super(MaskRCNNConvUpsampleHead, self).__init__()

        # fmt: off
        num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        conv_dims = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM
        self.norm = cfg.MODEL.ROI_MASK_HEAD.NORM
        num_conv = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV
        input_channels = input_shape.channels
        cls_agnostic_mask = cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK
        # fmt: on

        self.conv_norm_relus = []

        for k in range(num_conv):
            conv = Conv2d(
                input_channels if k == 0 else conv_dims,
                conv_dims,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not self.norm,
                norm=get_norm(self.norm, conv_dims),
                activation=F.relu,
            )
            self.add_module("mask_fcn{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
        self.cfg = cfg
        self.deconv = ConvTranspose2d(
            conv_dims if num_conv > 0 else input_channels,
            conv_dims,
            kernel_size=2,
            stride=2,
            padding=0,
        )
        if self.cfg.MODEL.TRANSFER_FUNCTION:
            self.in_feat_dim = 256 * (
                self.cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION *
                2) * (self.cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION * 2)
            self.out_feat_dim = (
                self.cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION *
                2) * (self.cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION * 2)
            self.MLP = nn.Sequential(
                nn.Linear(self.in_feat_dim, 1024),
                nn.LeakyReLU(inplace=True),
                nn.Linear(1024, self.out_feat_dim),
            )

        else:
            self.mask_weights = None
            num_mask_classes = 1 if cls_agnostic_mask else num_classes
            self.predictor = Conv2d(conv_dims,
                                    num_mask_classes,
                                    kernel_size=1,
                                    stride=1,
                                    padding=0)
            # use normal distribution initialization for mask prediction layer
            nn.init.normal_(self.predictor.weight, std=0.001)
            if self.predictor.bias is not None:
                nn.init.constant_(self.predictor.bias, 0)

        for layer in self.conv_norm_relus + [self.deconv]:
            weight_init.c2_msra_fill(layer)
Beispiel #25
0
    def __init__(self,
                 input_shape: ShapeSpec,
                 *,
                 num_classes,
                 conv_dims,
                 conv_norm="",
                 **kwargs):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature
            num_classes (int): the number of classes. 1 if using class agnostic prediction.
            conv_dims (list[int]): a list of N>0 integers representing the output dimensions
                of N-1 conv layers and the last upsample layer.
            conv_norm (str or callable): normalization for the conv layers.
                See :func:`detectron2.layers.get_norm` for supported types.
        """
        super().__init__(**kwargs)
        assert len(conv_dims) >= 1, "conv_dims have to be non-empty!"

        print("initializing with SoftPlusSigmoid activation function")
        self.conv_norm_relus = []

        cur_channels = input_shape.channels
        # this makes the 4 fc layers under (mask head) in the training
        for k, conv_dim in enumerate(conv_dims[:-1]):
            # do convolution layers with our custom activation function
            conv = Conv2d(
                cur_channels,
                conv_dim,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not conv_norm,
                norm=get_norm(conv_norm, conv_dim),
                #activation=nn.ReLU(),
                activation=softplussigmoid(),
            )
            self.add_module("mask_fcn{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
            cur_channels = conv_dim

        # as you might imagine, the deconv layer
        # corresponds to:
        # (deconv) - output in training when showing the model
        self.deconv = ConvTranspose2d(cur_channels,
                                      conv_dims[-1],
                                      kernel_size=2,
                                      stride=2,
                                      padding=0)
        # (deconv_relue)
        self.add_module("deconv_relu", nn.ReLU())
        cur_channels = conv_dims[-1]

        # (predictor): Conv2d
        self.predictor = Conv2d(cur_channels,
                                num_classes,
                                kernel_size=1,
                                stride=1,
                                padding=0)

        for layer in self.conv_norm_relus + [self.deconv]:
            weight_init.c2_msra_fill(layer)
        # use normal distribution initialization for mask prediction layer
        nn.init.normal_(self.predictor.weight, std=0.001)
        if self.predictor.bias is not None:
            nn.init.constant_(self.predictor.bias, 0)
    def __init__(self, cfg):
        super(RecurrentVoxelHead, self).__init__()

        # fmt: off
        self.voxel_size = cfg.MODEL.VOXEL_HEAD.VOXEL_SIZE  #48
        conv_dims = cfg.MODEL.VOXEL_HEAD.CONV_DIM  #256
        num_conv = cfg.MODEL.VOXEL_HEAD.NUM_CONV  #4
        input_channels = cfg.MODEL.VOXEL_HEAD.COMPUTED_INPUT_CHANNELS  #resnet50 - 2048
        self.norm = cfg.MODEL.VOXEL_HEAD.NORM
        # fmt: on

        assert self.voxel_size % 2 == 0

        self.conv_norm_relus = []
        # From ResNet backbone feature extractor
        prev_dim = input_channels

        # Recurrent layers
        self.batch_size = cfg.SOLVER.BATCH_SIZE
        #define the FCConv3DLayers in 3d convolutional gru unit
        #self.n_convfilter = [96, 128, 256, 256, 256, 256]

        #number of filters for each 3d convolution layer in the decoder
        #self.n_deconvfilter = [128, 128, 128, 64, 32, 2]

        self.input_shape = None  #unused
        self.n_gru_vox = 4
        self.n_fc_filters = [
            1024
        ]  #the filter shape of the 3d convolutional gru unit
        self.n_h_feat = 128  #number of features for output tensor

        self.h_shape = (self.batch_size, self.n_h_feat, self.n_gru_vox,
                        self.n_gru_vox, self.n_gru_vox
                        )  #the size of the hidden state
        self.conv3d_filter_shape = (
            self.n_h_feat, self.n_h_feat, 3, 3, 3
        )  #the filter shape of the 3d convolutional gru unit

        self.recurrent_layer = recurrent_layer(self.input_shape, input_channels, \
                                 self.n_fc_filters, self.h_shape, self.conv3d_filter_shape)

        self.reduce_dim = Conv2d(
            self.n_h_feat * self.n_gru_vox,
            conv_dims,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=not self.norm,
            norm=get_norm(self.norm, conv_dims),
            activation=F.relu,
        )
        '''
        for k in range(num_conv):
            conv = Conv2d(
                prev_dim,
                conv_dims,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not self.norm,
                norm=get_norm(self.norm, conv_dims),
                activation=F.relu,
            )
            self.add_module("voxel_fcn{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
            prev_dim = conv_dims

        '''

        # Deconvolutional layers
        self.deconv = ConvTranspose2d(
            conv_dims if num_conv > 0 else input_channels,
            conv_dims,
            kernel_size=2,
            stride=2,
            padding=0,
        )

        self.predictor = Conv2d(conv_dims,
                                self.voxel_size,
                                kernel_size=1,
                                stride=1,
                                padding=0)

        for layer in self.conv_norm_relus + [self.deconv]:
            weight_init.c2_msra_fill(layer)
        # use normal distribution initialization for voxel prediction layer
        nn.init.normal_(self.predictor.weight, std=0.001)
        if self.predictor.bias is not None:
            nn.init.constant_(self.predictor.bias, 0)
Beispiel #27
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            conv_dims: an iterable of output channel counts for each conv in the head
                         e.g. (512, 512, 512) for three convs outputting 512 channels.
            num_keypoints: number of keypoint heatmaps to predicts, determines the number of
                           channels in the final output.
        """
        super(KRCNNPoseRelationConvHead, self).__init__()

        # fmt: off
        # default up_scale to 2 (this can eventually be moved to config)
        # rel_matrix_dir     = cfg.MODEL.ROI_KEYPOINT_HEAD.PART_RELATION_DIR
        # rel_matrix = pickle.load(open(rel_matrix_dir,'rb'))
        # rel_matrix = torch.FloatTensor(rel_matrix)
        # self.rel_matrix = nn.Parameter(data=rel_matrix, requires_grad=False)
        # kpt relation matrix
        kpt_rel_matrix_dir = cfg.MODEL.ROI_KEYPOINT_HEAD.KPT_RELATION_DIR
        kpt_rel_matrix = pickle.load(open(kpt_rel_matrix_dir, 'rb'))
        kpt_rel_matrix = torch.FloatTensor(kpt_rel_matrix)
        self.kpt_rel_matrix = nn.Parameter(data=kpt_rel_matrix, requires_grad=True)

        # word emb
        # word_emb_dir = cfg.MODEL.ROI_KEYPOINT_HEAD.WORD_EMB_DIR
        # word_emb = pickle.load(open(word_emb_dir, 'rb'))
        # word_emb = torch.FloatTensor(word_emb)
        # self.word_emb = nn.Parameter(data=word_emb, requires_grad=True)
        self.rel_scale = 0.5
        # kpt word emb
        # kpt_word_emb_dir = cfg.MODEL.ROI_KEYPOINT_HEAD.KPT_WORD_EMB_DIR
        # kpt_word_emb = pickle.load(open(kpt_word_emb_dir, 'rb'))
        # kpt_word_emb = torch.FloatTensor(kpt_word_emb)
        # self.kpt_word_emb = nn.Parameter(data=kpt_word_emb, requires_grad=True)

        self.up_scale      = 2
        layer_channels     = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_HEAD_DIM
        self.relation_dims = cfg.MODEL.ROI_KEYPOINT_HEAD.RELATION_DIM
        # self.kpt_relation_dim = cfg.MODEL.ROI_KEYPOINT_HEAD.KPT_RELATION_DIM
        num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS
        self.n_stacked_convs = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_STACKED_CONVS
        num_parts = num_keypoints // 2 + 1
        in_channels   = input_shape.channels
        deconv_kernel = 4
        self.feat_channels = layer_channels
        self.deconv_kernel = deconv_kernel
        self.num_keypoints = num_keypoints
        # fmt: on

        self.blocks = []
        for i in range(self.n_stacked_convs):
            module = Conv2d(in_channels, layer_channels, 3, stride=1, padding=1)
            self.add_module(self._get_layer_name(i), module)
            self.blocks.append(module)
            in_channels = layer_channels

        # self.inter_part_score = ConvTranspose2d(
        #     layer_channels, num_parts, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1
        # )
        # self.inter_part_score = Conv2d(layer_channels, num_parts, 3, stride=1, padding=1)
        self.R_emb = nn.Sequential(
            nn.Conv2d(layer_channels+layer_channels, layer_channels, 1, stride=1, padding=0))
            # nn.ReLU(inplace=True))
        # self.kpt_score = Conv2d(layer_channels+ layer_channels, num_keypoints, 3, stride=1, padding=1)
        self.kpt_score = ConvTranspose2d(
            layer_channels, num_keypoints+1, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1
        )
        self.final_kpt_score = ConvTranspose2d(
            layer_channels, num_keypoints, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1
        )
        # self.fcs = []
        # for k in range(2):
        #     fc1 = nn.Linear(layer_channels, layer_channels)
        #     self.add_module("fc1_levels_{}".format(k + 1), fc1)
        #     fc2 = nn.Linear(layer_channels, layer_channels)
        #     self.add_module("att_levels_{}".format(k + 1), fc2)
        #     self.fcs.append([fc1, nn.ReLU(), fc2, nn.Sigmoid()])
        # ama_conv_emb = Conv2d(layer_channels, layer_channels, 3, stride=1, padding=1)
        # self.add_module('ama_dynamic_conv_emb', ama_conv_emb)
        # weight_generator = []
        # param_size = (2 * layer_channels)*deconv_kernel*deconv_kernel
        # weight_generator.append(nn.Linear(self.kpt_relation_dim, param_size))
        # weight_generator.append(nn.LeakyReLU(0.02))
        # for i in range(3):
        #     weight_generator.append(nn.Linear(param_size, param_size))
        #     weight_generator.append(nn.LeakyReLU(0.02))
        # weight_generator.append(
        #     nn.Linear(param_size, param_size))
        # self.weight_generator = nn.Sequential(*weight_generator)
        for name, param in self.named_parameters():
            if name =="kpt_rel_matrix" or name == "kpt_word_emb":
                continue
            print("init:", name)
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
Beispiel #28
0
    def __init__(self,
                 cfg,
                 input_shape: ShapeSpec,
                 vis_period: int = 0,
                 loss_weight: float = 1.0):
        super(ContextualFusionHead, self).__init__()

        self.vis_period = vis_period
        self.loss_weight = loss_weight

        conv_dim = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM
        num_conv = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV
        conv_norm = cfg.MODEL.ROI_MASK_HEAD.NORM
        num_context_conv = cfg.MODEL.CONTEXT_MASK_HEAD.NUM_CONV
        num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        if cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK:
            num_classes = 1

        self.mask_fcns = []
        cur_channels = input_shape.channels
        for k in range(num_conv):
            conv = Conv2d(
                cur_channels,
                conv_dim,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not conv_norm,
                norm=get_norm(conv_norm, conv_dim),
                activation=F.relu,
            )
            self.add_module("mask_fcn{}".format(k + 1), conv)
            self.mask_fcns.append(conv)
            cur_channels = conv_dim

        self.mask_final_fusion = Conv2d(conv_dim,
                                        conv_dim,
                                        kernel_size=3,
                                        padding=1,
                                        stride=1,
                                        bias=not conv_norm,
                                        norm=get_norm(conv_norm, conv_dim),
                                        activation=F.relu)

        self.downsample = Conv2d(conv_dim,
                                 conv_dim,
                                 kernel_size=3,
                                 padding=1,
                                 stride=2,
                                 bias=not conv_norm,
                                 norm=get_norm(conv_norm, conv_dim),
                                 activation=F.relu)
        self.context_fcns = []
        cur_channels = input_shape.channels
        for k in range(num_context_conv):
            conv = Conv2d(
                cur_channels,
                conv_dim,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not conv_norm,
                norm=get_norm(conv_norm, conv_dim),
                activation=F.relu,
            )
            self.add_module("context_fcn{}".format(k + 1), conv)
            self.context_fcns.append(conv)
            cur_channels = conv_dim

        self.context_to_mask = Conv2d(conv_dim,
                                      conv_dim,
                                      kernel_size=1,
                                      padding=0,
                                      stride=1,
                                      bias=not conv_norm,
                                      norm=get_norm(conv_norm, conv_dim),
                                      activation=F.relu)

        self.mask_deconv = ConvTranspose2d(conv_dim,
                                           conv_dim,
                                           kernel_size=2,
                                           stride=2,
                                           padding=0)
        self.mask_predictor = Conv2d(cur_channels,
                                     num_classes,
                                     kernel_size=1,
                                     stride=1,
                                     padding=0)

        for layer in self.mask_fcns + self.context_fcns +\
                     [self.mask_deconv, self.context_to_mask,
                      self.mask_final_fusion, self.downsample]:
            weight_init.c2_msra_fill(layer)
        # use normal distribution initialization for mask prediction layer
        nn.init.normal_(self.mask_predictor.weight, std=0.001)
        if self.mask_predictor.bias is not None:
            nn.init.constant_(self.mask_predictor.bias, 0)
Beispiel #29
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            conv_dims: an iterable of output channel counts for each conv in the head
                         e.g. (512, 512, 512) for three convs outputting 512 channels.
            num_keypoints: number of keypoint heatmaps to predicts, determines the number of
                           channels in the final output.
        """
        super(KRCNNPoseRelationHead, self).__init__()

        # fmt: off
        # default up_scale to 2 (this can eventually be moved to config)
        rel_matrix_dir     = cfg.MODEL.ROI_KEYPOINT_HEAD.RELATION_DIR
        rel_matrix = pickle.load(open(rel_matrix_dir,'rb'))
        rel_matrix = torch.FloatTensor(rel_matrix)
        self.rel_matrix = nn.Parameter(data=rel_matrix, requires_grad=True)
        # word emb
        word_emb_dir = cfg.MODEL.ROI_KEYPOINT_HEAD.WORD_EMB_DIR
        word_emb = pickle.load(open(word_emb_dir, 'rb'))
        word_emb = torch.FloatTensor(word_emb)
        self.word_emb = nn.Parameter(data=word_emb, requires_grad=True)
        self.rel_scale = 0.5

        self.up_scale      = 2
        layer_channels     = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_HEAD_DIM
        self.relation_dims = cfg.MODEL.ROI_KEYPOINT_HEAD.RELATION_DIM
        num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS
        self.n_stacked_convs = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_STACKED_CONVS
        num_parts = num_keypoints // 2 + 1
        in_channels   = input_shape.channels
        deconv_kernel = 4
        # fmt: on

        self.blocks = []
        for i in range(self.n_stacked_convs):
            module = Conv2d(in_channels, layer_channels, 3, stride=1, padding=1)
            self.add_module(self._get_layer_name(i), module)
            self.blocks.append(module)
            in_channels = layer_channels

        self.inter_part_score = ConvTranspose2d(
            layer_channels, num_parts, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1
        )
        # self.inter_part_score = Conv2d(layer_channels, num_parts, 3, stride=1, padding=1)
        self.R_emb = nn.Sequential(
            nn.Conv2d(self.relation_dims, input_shape.channels, 1, stride=1, padding=0))
            # nn.ReLU(inplace=True))
        # self.kpt_score = Conv2d(layer_channels+ layer_channels, num_keypoints, 3, stride=1, padding=1)
        self.kpt_score = ConvTranspose2d(
            layer_channels+ layer_channels, num_keypoints, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1
        )

        for name, param in self.named_parameters():
            if name =="rel_matrix" or name == "word_emb":
                continue
            print("init:", name)
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
Beispiel #30
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            num_conv: the number of conv layers
            conv_dim: the dimension of the conv layers
            norm: normalization for the conv layers
        """
        super(Parallel_Amodal_Visible_Head, self).__init__()

        # fmt: off
        self.cfg = cfg
        num_classes       = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        conv_dims         = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM
        self.norm         = cfg.MODEL.ROI_MASK_HEAD.NORM
        num_conv          = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV
        # num_vis_conv      = cfg.MODEL.ROI_MASK_HEAD.NUM_VIS_CONV
        self.fm           = cfg.MODEL.ROI_MASK_HEAD.AMODAL_FEATURE_MATCHING
        self.fm_beta      = cfg.MODEL.ROI_MASK_HEAD.AMODAL_FM_BETA
        input_channels    = input_shape.channels
        cls_agnostic_mask = cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK
        self.SPRef        = cfg.MODEL.ROI_MASK_HEAD.RECON_NET.MEMORY_REFINE
        self.SPk          = cfg.MODEL.ROI_MASK_HEAD.RECON_NET.MEMORY_REFINE_K
        self.version      = cfg.MODEL.ROI_MASK_HEAD.VERSION
        self._output_size = (input_shape.channels, input_shape.height, input_shape.width)
        self.attention_mode = cfg.MODEL.ROI_MASK_HEAD.ATTENTION_MODE
        # fmt: on

        self.amodal_conv_norm_relus = []
        self.visible_conv_norm_relus = []
        for k in range(num_conv):
            a_conv = Conv2d(
                input_channels if k == 0 else conv_dims,
                conv_dims,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not self.norm,
                norm=get_norm(self.norm, conv_dims),
                activation=F.relu,
            )
            self.add_module("amodal_mask_fcn{}".format(k + 1), a_conv)
            self.amodal_conv_norm_relus.append(a_conv)

            v_conv = Conv2d(
                input_channels if k == 0 else conv_dims,
                conv_dims,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not self.norm,
                norm=get_norm(self.norm, conv_dims),
                activation=F.relu,
            )
            self.add_module("visible_mask_fcn{}".format(k + 1), v_conv)
            self.visible_conv_norm_relus.append(v_conv)

        self.amodal_deconv = ConvTranspose2d(
            conv_dims if num_conv > 0 else input_channels,
            conv_dims,
            kernel_size=2,
            stride=2,
            padding=0,
        )

        self.visible_deconv = ConvTranspose2d(
                conv_dims if num_conv > 0 else input_channels,
                conv_dims,
                kernel_size=2,
                stride=2,
                padding=0,
            )

        num_mask_classes = 1 if cls_agnostic_mask else num_classes
        self.amodal_predictor = Conv2d(conv_dims, num_mask_classes, kernel_size=1, stride=1, padding=0)
        self.visible_predictor = Conv2d(conv_dims, num_mask_classes, kernel_size=1, stride=1, padding=0)

        nn.init.normal_(self.amodal_predictor.weight, std=0.001)
        if self.amodal_predictor.bias is not None:
            nn.init.constant_(self.amodal_predictor.bias, 0)
        # use normal distribution initialization for mask prediction layer
        nn.init.normal_(self.visible_predictor.weight, std=0.001)
        if self.visible_predictor.bias is not None:
            nn.init.constant_(self.visible_predictor.bias, 0)

        for layer in self.amodal_conv_norm_relus + [self.amodal_deconv] + self.visible_conv_norm_relus + [self.visible_deconv]:
            weight_init.c2_msra_fill(layer)
        # use normal distribution initialization for mask prediction layer
        # self.amodal_pool = nn.MaxPool2d(kernel_size=2)
        self.amodal_pool = nn.AvgPool2d(kernel_size=2)
        self.visible_pool = nn.AvgPool2d(kernel_size=2)

        if self.SPRef:
            self.fuse_layer = Conv2d(
                input_channels + self.cfg.MODEL.ROI_MASK_HEAD.RECON_NET.MEMORY_REFINE_K,
                input_channels,
                kernel_size=3,
                stride=1,
                padding=1
            )