Ejemplo n.º 1
0
    def __init__(self, in_channels, layers, dilation):
        """
        Arguments:
            in_channels (int): number of input channels
            layers (list): feature dimensions of each FCN layer
            dilation (int): dilation rate of kernel
        """
        d = OrderedDict()
        next_feature = in_channels
        for layer_idx, layer_features in enumerate(layers, 1):
            d["mask_fcn{}".format(layer_idx)] = misc_nn_ops.Conv2d(
                next_feature,
                layer_features,
                kernel_size=3,
                stride=1,
                padding=dilation,
                dilation=dilation)
            d["relu{}".format(layer_idx)] = nn.ReLU(inplace=True)
            next_feature = layer_features

        super(MaskRCNNHeads, self).__init__(d)
        for name, param in self.named_parameters():
            if "weight" in name:
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
Ejemplo n.º 2
0
    def __init__(self, in_channels, layers, dilation):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        d = OrderedDict()
        next_feature = in_channels
        for layer_idx, layer_features in enumerate(layers, 1):
            d["mask_fcn{}".format(layer_idx)] = misc_nn_ops.Conv2d(
                next_feature,
                layer_features,
                kernel_size=3,
                stride=1,
                padding=dilation,
                dilation=dilation)
            d["relu{}".format(layer_idx)] = nn.ReLU(inplace=True)
            next_feature = layer_features

        super(MaskRCNNHeads, self).__init__(d)
        for name, param in self.named_parameters():
            if "weight" in name:
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
Ejemplo n.º 3
0
    def __init__(self, in_channels, dim_reduced, num_classes):
        super(MaskRCNNPredictor, self).__init__(OrderedDict([
            ("conv5_mask", misc_nn_ops.ConvTranspose2d(in_channels, dim_reduced, 2, 2, 0)),
            ("relu", nn.ReLU(inplace=True)),
            ("mask_fcn_logits", misc_nn_ops.Conv2d(dim_reduced, num_classes, 1, 1, 0)),
        ]))

        for name, param in self.named_parameters():
            if "weight" in name:
                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
 def __init__(self, in_channels, layers):
     d = []
     next_feature = in_channels
     for l in layers:
         d.append(misc_nn_ops.Conv2d(next_feature, l, 3, stride=1, padding=1))
         d.append(nn.ReLU(inplace=True))
         next_feature = l
     super(KeypointRCNNHeads, self).__init__(*d)
     for m in self.children():
         if isinstance(m, misc_nn_ops.Conv2d):
             nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
             nn.init.constant_(m.bias, 0)
Ejemplo n.º 5
0
 def __init__(self,
              nin: int,
              nout: int,
              kernel_size: int = 3,
              activation: Activation = nn.ReLU(),
              stride: int = 1,
              padding: int = 1,
              dilation: int = 1,
              dropout: float = 0.1):
     super(StdConv, self).__init__()
     self.conv = misc_nn_ops.Conv2d(nin, nout, kernel_size, stride=stride, padding=padding, dilation=dilation)
     self.bn = misc_nn_ops.BatchNorm2d(nout)
     self.drop = nn.Dropout(dropout)
     self.activation = activation
Ejemplo n.º 6
0
    def __init__(self,
                 networklist,
                 upsample_mode='bilinear',
                 output_activation='relu',
                 kernel_size=3,
                 padding_n=1,
                 depth=False,
                 encoder_hook_index=None,
                 decoder_hook_index=None):
        d = OrderedDict()
        # next_feature = in_channels
        prev_conv = networklist[0]
        networklist = networklist[1:]
        for layer_idx, layer_conv in enumerate(networklist):
            if 'M' in str(layer_conv):
                scale = int(layer_conv[1])
                d["maxpool2d{}".format(layer_idx)] = nn.MaxPool2d(
                    kernel_size=scale,
                    stride=scale,
                    padding=0,
                    dilation=1,
                    ceil_mode=False)
            elif 'U' in str(layer_conv):
                scale = int(layer_conv[1])
                d["upsample2d{}".format(layer_idx)] = nn.Upsample(
                    scale_factor=scale, mode=upsample_mode, align_corners=True)
            elif 'S' in str(layer_conv):
                scale = int(layer_conv[1])
                d["shuffle2d{}".format(layer_idx)] = nn.PixelShuffle(scale)
                prev_conv = int(prev_conv / (scale * scale))
            else:
                #print(prev_conv, layer_conv)
                if depth:
                    #hidden = int(prev_conv*3)
                    hidden = 260
                    d["conv1{}".format(layer_idx)] = ConvBNReLU(
                        prev_conv,
                        layer_conv,
                        kernel_size=1,
                        groups=layer_conv)
                    #d["conv2{}".format(layer_idx)] = ConvBNReLU(hidden, hidden, kernel_size=1, groups=hidden)
                    #d["conv3{}".format(layer_idx)] = nn.Conv2d(hidden, layer_conv, kernel_size=1,
                    #                                                    stride=1, padding=0, bias=False)
                    d["relu{}".format(layer_idx)] = nn.ReLU(inplace=True)
                    prev_conv = networklist[layer_idx]
                else:
                    d["conv{}".format(layer_idx)] = misc_nn_ops.Conv2d(
                        prev_conv,
                        layer_conv,
                        kernel_size=kernel_size,
                        stride=1,
                        padding=padding_n)
                    d["relu{}".format(layer_idx)] = nn.ReLU(inplace=True)
                    prev_conv = networklist[layer_idx]
            #d["upsample_conv_2d{}".format(layer_idx)] = nn.ConvTranspose2d(prev_conv,
            #    layer_conv, 3, stride=1, padding=(1,1))
        if output_activation == 'sigmoid':
            d.popitem()
            d['sigmoid'] = nn.Sigmoid()
        elif output_activation == 'batchnorm':
            d.popitem()
            d['bn'] = nn.BatchNorm2d(prev_conv)
        elif output_activation == 'remove':
            d.popitem()
        elif output_activation == 'logsoftmax':
            d.popitem()
            d['logsoftmax'] = nn.LogSoftmax(dim=1)
        d = [l for k, l in d.items()]
        if encoder_hook_index is not None:
            register_layers(d, encoder_hook_index, 'student_encoder')
        if decoder_hook_index is not None:
            register_layers(d, decoder_hook_index, 'student_decoder')

        super(CNN_constructer, self).__init__(*d)
        for name, param in self.named_parameters():
            if "weight" in name:
                nn.init.normal_(param, std=0.01)
                # nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
            elif "bias" in name:
                nn.init.constant_(param, 0)
Ejemplo n.º 7
0
    def __init__(
            self,
            backbone,
            num_maskrcnn_classes=None,
            # transform parameters
            min_size=800,
            max_size=1333,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None):

        super(DensePoseRCNN, self).__init__(
            backbone,
            num_maskrcnn_classes,
            # transform parameters
            min_size,
            max_size,
            image_mean,
            image_std,
            # RPN-specific parameters
            rpn_anchor_generator,
            rpn_head,
            rpn_pre_nms_top_n_train,
            rpn_pre_nms_top_n_test,
            rpn_post_nms_top_n_train,
            rpn_post_nms_top_n_test,
            rpn_nms_thresh,
            rpn_fg_iou_thresh,
            rpn_bg_iou_thresh,
            rpn_batch_size_per_image,
            rpn_positive_fraction,
            # Box parameters
            box_roi_pool,
            box_head,
            box_predictor,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights)

        self.roi_heads.densepose_roi_pool = MultiScaleRoIAlign(
            featmap_names=[0, 1, 2, 3], output_size=14, sampling_ratio=2)
        self.roi_heads.densepose_head = MaskRCNNHeads(backbone.out_channels,
                                                      (256, 256, 256, 256), 1)

        # TODO: maybe we should put sigmoid on top (UV coords are always in [0,1] range)?
        self.roi_heads.densepose_uv_predictor = nn.Sequential(
            misc_nn_ops.ConvTranspose2d(256, 256, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
            misc_nn_ops.ConvTranspose2d(256, 256, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
            misc_nn_ops.Conv2d(256, 48, 1, 1, 0),
        )
        self.roi_heads.densepose_class_predictor = nn.Sequential(
            misc_nn_ops.ConvTranspose2d(256, 256, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
            misc_nn_ops.ConvTranspose2d(256, 256, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
            misc_nn_ops.Conv2d(256, 25, 1, 1, 0),
        )
        self.roi_heads.densepose_mask_predictor = nn.Sequential(
            misc_nn_ops.ConvTranspose2d(256, 256, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
            misc_nn_ops.ConvTranspose2d(256, 256, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
            misc_nn_ops.Conv2d(256, 15, 1, 1, 0),
        )