Example #1
0
 def __init__(self, args):
     super(ModelPaperBaselineN_batch_lambda, self).__init__()
     self.args = args
     self.word_size = args.word_size
     self.layer0 = LambdaLayer(
         dim=len(self.args.inputs_type),  # channels going in
         dim_out=args.out_channel0,  # channels out
         n=args.word_size * args.Nbatch,  # number of input pixels (64 x 64 image)
         dim_k=16,  # key dimension
         heads=4,  # number of heads, for multi-query
         dim_u=1  # 'intra-depth' dimension
     )
     self.BN0 = nn.BatchNorm2d(args.out_channel0, eps=0.01, momentum=0.99)
     self.layers_conv = nn.ModuleList()
     self.layers_batch = nn.ModuleList()
     self.numLayers = args.numLayers
     for i in range(args.numLayers - 1):
         self.layers_conv.append(LambdaLayer(
         dim=args.out_channel1,  # channels going in
         dim_out=args.out_channel1,  # channels out
         n=args.word_size * args.Nbatch,  # number of input pixels (64 x 64 image)
         dim_k=16,  # key dimension
         heads=4,  # number of heads, for multi-query
         dim_u=1  # 'intra-depth' dimension
     ))
         self.layers_batch.append(nn.BatchNorm2d(args.out_channel1, eps=0.01, momentum=0.99))
     self.fc1 = nn.Linear(args.out_channel1 * args.word_size * args.Nbatch, args.hidden1)  # 6*6 from image dimension
     self.BN5 = nn.BatchNorm1d(args.hidden1, eps=0.01, momentum=0.99)
     self.fc2 = nn.Linear(args.hidden1, args.hidden1)
     self.BN6 = nn.BatchNorm1d(args.hidden1, eps=0.01, momentum=0.99)
     self.fc3 = nn.Linear(args.hidden1, 1)
Example #2
0
def lambda_conv(in_channels, out_channels, kernel_size, bias=True, dilation=1):
    return LambdaLayer(dim=in_channels,
                       dim_out=out_channels,
                       r=23,
                       dim_k=16,
                       heads=get_heads_count(out_channels),
                       dim_u=1)
def lambda_conv(in_channels, out_channels, **kwargs):
    return LambdaLayer(dim=in_channels,
                       dim_out=out_channels,
                       r=23,
                       dim_k=16,
                       heads=get_heads_count(out_channels),
                       dim_u=1)
Example #4
0
    def __init__(
            self,
            in_channels: int,
            out_channels: int,
            kernel_size: int,
            switch_breadth: int,
            stride: int = 1,
            padding: int = 0,
            dilation: int = 1,
            groups: int = 1,
            bias: bool = True,
            padding_mode: str = 'zeros',
            include_coupler:
        bool = False,  # A 'coupler' is a latent converter which can make any bxcxhxw tensor a compatible switchedconv selector by performing a linear 1x1 conv, softmax and interpolate.
            coupler_mode: str = 'standard',
            coupler_dim_in: int = 0):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.padding_mode = padding_mode
        self.groups = groups

        if include_coupler:
            if coupler_mode == 'standard':
                self.coupler = Conv2d(coupler_dim_in,
                                      switch_breadth,
                                      kernel_size=1)
            elif coupler_mode == 'lambda':
                self.coupler = LambdaLayer(dim=coupler_dim_in,
                                           dim_out=switch_breadth,
                                           r=23,
                                           dim_k=16,
                                           heads=2,
                                           dim_u=1)

        else:
            self.coupler = None

        self.weights = nn.ParameterList([
            nn.Parameter(
                torch.Tensor(out_channels, in_channels // groups, kernel_size,
                             kernel_size)) for _ in range(switch_breadth)
        ])
        if bias:
            self.bias = nn.Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()
Example #5
0
    def __init__(self):
        super().__init__()

        self.layer1 = LambdaLayer(
            dim=3,  # channels going in
            dim_out=16,  # channels out
            r=23,         # the receptive field for relative positional encoding (23 x 23)
            dim_k=32,  # key dimension
            heads=1,  # number of heads, for multi-query
            dim_u=4  # 'intra-depth' dimension
        )
        self.layer2 = LambdaLayer(
            dim=16,  # channels going in
            dim_out=3,  # channels out
            r=15,
            # the receptive field for relative positional encoding (23 x 23)
            dim_k=16,  # key dimension
            heads=1,  # number of heads, for multi-query
            dim_u=4  # 'intra-depth' dimension
        )

        self.last_conv = torch.nn.Conv2d(3, 3, 1, bias=False)
Example #6
0
    def __init__(self, in_channels, middle_channels, out_channels):
        super(DecoderLambda, self).__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            # nn.ReLU(inplace=True),
        )

        self.lambda_layer = LambdaLayer(
            dim=out_channels,
            dim_out=out_channels,
            r=7,  # the receptive field for relative positional encoding (23 x 23)
            dim_k=16,
            heads=1,
            dim_u=1)

        self._initialize_weights()
        def conv_dw(inp: torch.Tensor, oup: torch.Tensor, stride: int,
                    layer_type: str) -> nn.Sequential:
            if layer_type == "c":

                return nn.Sequential(
                    nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
                    nn.BatchNorm2d(inp),
                    nn.ReLU(inplace=True),
                    nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
                    nn.BatchNorm2d(oup),
                    nn.ReLU(inplace=True),
                )
            elif layer_type == "l":
                return LambdaLayer(
                    dim=inp,
                    dim_out=oup,
                    r=3,
                    dim_k=4,
                    heads=4,
                    dim_u=1,
                )
            else:
                return Identity()
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features: torch.Tensor, pe_dim=0):
        super(DecoderSparse, self).__init__()

        # fmt: off
        self.in_features      = in_features
        feature_strides       = {k: v.stride for k, v in input_shape.items()}
        feature_channels      = {k: v.channels for k, v in input_shape.items()}
        # num_classes           = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
        num_classes = 75
        conv_dims             = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
        self.common_stride    = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
        norm                  = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
        num_lambda_layer = cfg.MODEL.CONDINST.IUVHead.NUM_LAMBDA_LAYER
        lambda_layer_r = cfg.MODEL.CONDINST.IUVHead.LAMBDA_LAYER_R
        self.use_agg_feat    = cfg.MODEL.CONDINST.IUVHead.USE_AGG_FEATURES
        self.use_ins_gn = cfg.MODEL.CONDINST.IUVHead.INSTANCE_AWARE_GN
        self.checkpoint_grad_num = cfg.MODEL.CONDINST.CHECKPOINT_GRAD_NUM
        agg_channels = cfg.MODEL.CONDINST.MASK_BRANCH.AGG_CHANNELS
        self.use_aux_global_s = cfg.MODEL.CONDINST.AUX_SUPERVISION_GLOBAL_S
        self.use_aux_global_skeleton = cfg.MODEL.CONDINST.AUX_SUPERVISION_GLOBAL_SKELETON
        self.use_aux_body_semantics = cfg.MODEL.CONDINST.AUX_SUPERVISION_BODY_SEMANTICS

        if self.use_aux_global_s:
            num_classes += 1
        if self.use_aux_global_skeleton:
            "to check"
            num_classes += 55
        if self.use_aux_body_semantics:
            num_classes += 15
        self.predictor_conv_type = cfg.MODEL.CONDINST.IUVHead.PREDICTOR_TYPE
        self.use_dropout = cfg.MODEL.CONDINST.IUVHead.DROPOUT
        self.use_san = cfg.MODEL.CONDINST.IUVHead.USE_SAN
        self.san_type = cfg.MODEL.CONDINST.SAN_TYPE
        # fmt: on

        # if not self.use_agg_feat:
        #     self.scale_heads = []
        #     for in_feature in self.in_features:
        #         head_ops = []
        #         head_length = max(
        #             1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
        #         )
        #         for k in range(head_length):
        #             conv = Conv2d(
        #                 feature_channels[in_feature] if k == 0 else conv_dims,
        #                 conv_dims,
        #                 kernel_size=3,
        #                 stride=1,
        #                 padding=1,
        #                 bias=not norm,
        #                 norm=get_norm(norm, conv_dims),
        #                 activation=F.relu,
        #             )
        #             weight_init.c2_msra_fill(conv)
        #             head_ops.append(conv)
        #             if feature_strides[in_feature] != self.common_stride:
        #                 head_ops.append(
        #                     nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
        #                 )
        #         self.scale_heads.append(nn.Sequential(*head_ops))
        #         self.add_module(in_feature, self.scale_heads[-1])

        # self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
        

        if num_lambda_layer>0:
            self.comb_pe_conv = LambdaLayer(
                dim = agg_channels+pe_dim,
                dim_out = agg_channels,
                r = lambda_layer_r,         # the receptive field for relative positional encoding (23 x 23)
                dim_k = 16,
                heads = 4,
                dim_u = 4
            )
        else:
            self.comb_pe_conv = Conv2d(
                agg_channels+pe_dim,
                agg_channels,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not norm,
                norm=get_norm(norm, agg_channels),
                activation=F.relu,
            )

        if self.use_san:
            # sa_type = 1 ## 0: pairwise; 1: patchwise
            sa_type = 1
            if self.san_type=="SAN_BottleneckGN":
                san_func = SAN_BottleneckGN
            elif self.san_type=="SAN_BottleneckGN_GatedEarly":
                san_func = SAN_BottleneckGN_GatedEarly
            elif self.san_type=="SAN_BottleneckGN_Gated":
                san_func = SAN_BottleneckGN_Gated
            self.san_blk_1 = san_func(sa_type, agg_channels, agg_channels // 16, agg_channels // 4, agg_channels, 8, kernel_size=7, stride=1)

        # weight_init.c2_msra_fill(self.comb_pe_conv)
        if self.use_dropout:
            self.dropout_layer = nn.Dropout2d(0.25)

        self.densepose_head = build_densepose_head(cfg, agg_channels)

        if self.predictor_conv_type=="conv":
            self.predictor = Conv2d(
                cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM, num_classes, 1, stride=1, padding=0
            )
            initialize_module_params(self.predictor)
        elif self.predictor_conv_type=="dcnv1":
            self.predictor = deform_conv.DFConv2d(
                cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM, num_classes,
                with_modulated_dcn=False, kernel_size=3
            )
        elif self.predictor_conv_type=="dcnv2":
            self.predictor = deform_conv.DFConv2d(
                cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM, num_classes,
                with_modulated_dcn=True, kernel_size=3
            )
        elif self.predictor_conv_type=="dcnv2Conv":
            self.predictor = []
            self.predictor.append(deform_conv.DFConv2d(
                cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM, cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM,
                with_modulated_dcn=True, kernel_size=3
            ))
            self.predictor.append(Conv2d(
                cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM, num_classes, 1, stride=1, padding=0
            ))
            initialize_module_params(self.predictor[-1])
            self.predictor = nn.Sequential(*self.predictor)
        elif self.predictor_conv_type=="dcnv2ResConv":
            self.predictor = []
            self.predictor.append(deform_conv.DeformBottleneckBlock(
                cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM, cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM,
                bottleneck_channels=cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM,
                deform_modulated=True
            ))
            self.predictor.append(Conv2d(
                cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM, num_classes, 1, stride=1, padding=0
            ))
            initialize_module_params(self.predictor[-1])
            self.predictor = nn.Sequential(*self.predictor)
        elif self.predictor_conv_type=="sparse":
            # self.predictor = nn.Identity()
            conv = sparse_conv_with_kaiming_uniform(norm=None, activation=None, use_sep=False, 
                                use_submconv=True, use_deconv=False)
            self.predictor = conv(
                        cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM,
                        num_classes,
                        kernel_size=3,
                        stride=1,
                        dilation=1,
                        indice_key="subm0",
                    )
Example #9
0
    def __init__(self, cfg, use_rel_coords=True):
        super().__init__()
        self.num_outputs = cfg.MODEL.CONDINST.IUVHead.OUT_CHANNELS
        norm = cfg.MODEL.CONDINST.IUVHead.NORM
        num_convs = cfg.MODEL.CONDINST.IUVHead.NUM_CONVS
        num_lambda_layer = cfg.MODEL.CONDINST.IUVHead.NUM_LAMBDA_LAYER
        lambda_layer_r = cfg.MODEL.CONDINST.IUVHead.LAMBDA_LAYER_R
        num_dcn_layer = cfg.MODEL.CONDINST.IUVHead.NUM_DCN_LAYER
        assert num_lambda_layer<=num_convs

        agg_channels = cfg.MODEL.CONDINST.MASK_BRANCH.AGG_CHANNELS
        channels = cfg.MODEL.CONDINST.IUVHead.CHANNELS
        self.norm_feat = cfg.MODEL.CONDINST.IUVHead.NORM_FEATURES
        soi = cfg.MODEL.FCOS.SIZES_OF_INTEREST
        self.register_buffer("sizes_of_interest", torch.tensor(soi + [soi[-1] * 2]))
        self.iuv_out_stride = cfg.MODEL.CONDINST.MASK_OUT_STRIDE
        self.use_rel_coords = cfg.MODEL.CONDINST.IUVHead.REL_COORDS
        self.use_abs_coords = cfg.MODEL.CONDINST.IUVHead.ABS_COORDS
        self.use_down_up_sampling = cfg.MODEL.CONDINST.IUVHead.DOWN_UP_SAMPLING
        self.use_partial_conv = cfg.MODEL.CONDINST.IUVHead.PARTIAL_CONV
        self.use_partial_norm = cfg.MODEL.CONDINST.IUVHead.PARTIAL_NORM
        # pdb.set_trace()
        # if self.use_rel_coords:
        #     self.in_channels = channels + 2
        # else:
        self.pos_emb_num_freqs = cfg.MODEL.CONDINST.IUVHead.POSE_EMBEDDING_NUM_FREQS
        self.use_pos_emb = self.pos_emb_num_freqs>0
        if self.use_pos_emb:
            self.position_embedder, self.position_emb_dim = get_embedder(multires=self.pos_emb_num_freqs, input_dims=2)
            self.in_channels = agg_channels + self.position_emb_dim
        else:
            self.in_channels = agg_channels + 2

        if self.use_abs_coords:
            if self.use_pos_emb:
                self.in_channels += self.position_emb_dim
            else:
                self.in_channels += 2


        conv_block = conv_with_kaiming_uniform(norm, activation=True)

        partial_conv_block = conv_with_kaiming_uniform(norm, activation=True, use_partial_conv=True)
        deform_conv_block = conv_with_kaiming_uniform(norm, activation=True, use_deformable=True)

        tower = []
        if self.use_partial_conv:
            # pdb.set_trace()
            layer = partial_conv_block(self.in_channels, channels, 3, 1)
            tower.append(layer)
            self.in_channels = channels

        if num_lambda_layer>0:
            layer = LambdaLayer(
                dim = self.in_channels,
                dim_out = channels,
                r = lambda_layer_r,         # the receptive field for relative positional encoding (23 x 23)
                dim_k = 16,
                heads = 4,
                dim_u = 4
            )
            tower.append(layer)
        else:
            tower.append(conv_block(
                self.in_channels, channels, 3, 1
            ))
        if num_dcn_layer>0:
            tower.append(deform_conv_block(
                    channels, channels, 3, 1
            ))

        if self.use_down_up_sampling:
            for i in range(1,num_convs):
                if i==1:
                    tower.append(conv_block(
                        channels, channels*2, 3, 2
                    ))
                else:
                    tower.append(conv_block(
                        channels*2, channels*2, 3, 1
                    ))

            tower.append(ConvTranspose2d(
                channels*2, self.num_outputs, 4, stride=2, padding=int(4 / 2 - 1)
            ))
        else:
            for i in range(1,num_convs):
                tower.append(conv_block(
                    channels, channels, 3, 1
                ))
            tower.append(nn.Conv2d(
                channels, max(self.num_outputs, 1), 1
            ))

        self.add_module('tower', nn.Sequential(*tower))
Example #10
0
    def __init__(self, cfg, use_rel_coords=True):
        super().__init__()
        self.num_outputs = cfg.MODEL.CONDINST.IUVHead.OUT_CHANNELS
        norm = cfg.MODEL.CONDINST.IUVHead.NORM
        num_convs = cfg.MODEL.CONDINST.IUVHead.NUM_CONVS
        num_lambda_layer = cfg.MODEL.CONDINST.IUVHead.NUM_LAMBDA_LAYER
        lambda_layer_r = cfg.MODEL.CONDINST.IUVHead.LAMBDA_LAYER_R
        assert num_lambda_layer <= num_convs

        agg_channels = cfg.MODEL.CONDINST.MASK_BRANCH.AGG_CHANNELS
        channels = cfg.MODEL.CONDINST.IUVHead.CHANNELS
        self.norm_feat = cfg.MODEL.CONDINST.IUVHead.NORM_FEATURES
        soi = cfg.MODEL.FCOS.SIZES_OF_INTEREST
        self.register_buffer("sizes_of_interest",
                             torch.tensor(soi + [soi[-1] * 2]))
        self.iuv_out_stride = cfg.MODEL.CONDINST.MASK_OUT_STRIDE
        self.use_rel_coords = cfg.MODEL.CONDINST.IUVHead.REL_COORDS
        self.use_abs_coords = cfg.MODEL.CONDINST.IUVHead.ABS_COORDS
        # pdb.set_trace()
        # if self.use_rel_coords:
        #     self.in_channels = channels + 2
        # else:
        self.pos_emb_num_freqs = cfg.MODEL.CONDINST.IUVHead.POSE_EMBEDDING_NUM_FREQS
        self.use_pos_emb = self.pos_emb_num_freqs > 0
        extra_channels = 0
        if self.use_pos_emb:
            self.position_embedder, self.position_emb_dim = get_embedder(
                multires=self.pos_emb_num_freqs, input_dims=2)
            extra_channels += self.position_emb_dim
        else:
            extra_channels += 2

        if self.use_abs_coords:
            if self.use_pos_emb:
                extra_channels += self.position_emb_dim
            else:
                extra_channels += 2

        # pdb.set_trace()
        conv_block = conv_with_kaiming_uniform(norm, activation=True)

        cnt = 0
        self.layers = []
        if num_lambda_layer > 0:
            layer = LambdaLayer(
                dim=agg_channels + extra_channels,
                dim_out=channels,
                r=lambda_layer_r,  # the receptive field for relative positional encoding (23 x 23)
                dim_k=16,
                heads=4,
                dim_u=4)
        else:
            layer = conv_block(channels + extra_channels, channels, 3, 1)
        setattr(self, 'layer_{}'.format(cnt), layer)
        self.layers.append(layer)
        cnt += 1

        for i in range(1, num_convs):
            if i < num_lambda_layer:
                layer = LambdaLayer(
                    dim=channels + extra_channels,
                    dim_out=channels,
                    r=lambda_layer_r,  # the receptive field for relative positional encoding (23 x 23)
                    dim_k=16,
                    heads=4,
                    dim_u=4)
            else:
                layer = conv_block(channels + extra_channels, channels, 3, 1)
            setattr(self, 'layer_{}'.format(cnt), layer)
            self.layers.append(layer)
            cnt += 1

        layer = nn.Conv2d(channels + extra_channels, max(self.num_outputs, 1),
                          1)
        setattr(self, 'layer_{}'.format(cnt), layer)
        self.layers.append(layer)
    def __init__(
        self,
        in_c,
        out_c,
        kernel_sz,
        breadth,
        stride=1,
        bias=True,
        dropout_rate=0.0,
        include_coupler:
        bool = False,  # A 'coupler' is a latent converter which can make any bxcxhxw tensor a compatible switchedconv selector by performing a linear 1x1 conv, softmax and interpolate.
        coupler_mode: str = 'standard',
        coupler_dim_in: int = 0,
        hard_en=True,  # A test switch that, when used in 'emulation mode' (where all convs are calculated using torch functions) computes soft-attention instead of hard-attention.
        emulate_swconv=True,  # When set, performs a nn.Conv2d operation for each breadth. When false, uses the native cuda implementation which computes all switches concurrently.
    ):
        super().__init__()
        self.in_channels = in_c
        self.out_channels = out_c
        self.kernel_size = kernel_sz
        self.stride = stride
        self.has_bias = bias
        self.breadth = breadth
        self.dropout_rate = dropout_rate

        if include_coupler:
            if coupler_mode == 'standard':
                self.coupler = Conv2d(coupler_dim_in,
                                      breadth,
                                      kernel_size=1,
                                      stride=self.stride)
            elif coupler_mode == 'lambda':
                self.coupler = nn.Sequential(
                    nn.Conv2d(coupler_dim_in, coupler_dim_in, 1),
                    nn.BatchNorm2d(coupler_dim_in), nn.ReLU(),
                    LambdaLayer(dim=coupler_dim_in,
                                dim_out=breadth,
                                r=23,
                                dim_k=16,
                                heads=2,
                                dim_u=1), nn.BatchNorm2d(breadth), nn.ReLU(),
                    Conv2d(breadth, breadth, 1, stride=self.stride))
            elif coupler_mode == 'lambda2':
                self.coupler = nn.Sequential(
                    nn.Conv2d(coupler_dim_in, coupler_dim_in, 1),
                    nn.GroupNorm(num_groups=2, num_channels=coupler_dim_in),
                    nn.ReLU(),
                    LambdaLayer(dim=coupler_dim_in,
                                dim_out=coupler_dim_in,
                                r=23,
                                dim_k=16,
                                heads=2,
                                dim_u=1),
                    nn.GroupNorm(num_groups=2, num_channels=coupler_dim_in),
                    nn.ReLU(),
                    LambdaLayer(dim=coupler_dim_in,
                                dim_out=breadth,
                                r=23,
                                dim_k=16,
                                heads=2,
                                dim_u=1),
                    nn.GroupNorm(num_groups=1, num_channels=breadth),
                    nn.ReLU(), Conv2d(breadth, breadth, 1, stride=self.stride))
        else:
            self.coupler = None
        self.gate = HardRoutingGate(breadth, hard_en=True)
        self.hard_en = hard_en

        self.weight = nn.Parameter(
            torch.empty(out_c, in_c, breadth, kernel_sz, kernel_sz))
        if bias:
            self.bias = nn.Parameter(torch.empty(out_c))
        else:
            self.bias = torch.zeros(out_c)
        self.reset_parameters()
Example #12
0
    def __init__(self,
                 cfg,
                 input_shape: Dict[str, ShapeSpec],
                 in_features: torch.Tensor,
                 pe_dim=0):
        super(Decoder, self).__init__()

        # fmt: off
        self.in_features = in_features
        feature_strides = {k: v.stride for k, v in input_shape.items()}
        feature_channels = {k: v.channels for k, v in input_shape.items()}
        # num_classes           = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
        num_classes = 75
        conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
        self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
        norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
        num_lambda_layer = cfg.MODEL.CONDINST.IUVHead.NUM_LAMBDA_LAYER
        lambda_layer_r = cfg.MODEL.CONDINST.IUVHead.LAMBDA_LAYER_R
        self.use_ins_gn = cfg.MODEL.CONDINST.IUVHead.INSTANCE_AWARE_GN
        # fmt: on

        self.scale_heads = []
        for in_feature in self.in_features:
            head_ops = []
            head_length = max(
                1,
                int(
                    np.log2(feature_strides[in_feature]) -
                    np.log2(self.common_stride)))
            for k in range(head_length):
                conv = Conv2d(
                    feature_channels[in_feature] if k == 0 else conv_dims,
                    conv_dims,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    bias=not norm,
                    norm=get_norm(norm, conv_dims),
                    activation=F.relu,
                )
                weight_init.c2_msra_fill(conv)
                head_ops.append(conv)
                if feature_strides[in_feature] != self.common_stride:
                    head_ops.append(
                        nn.Upsample(scale_factor=2,
                                    mode="bilinear",
                                    align_corners=False))
            self.scale_heads.append(nn.Sequential(*head_ops))
            self.add_module(in_feature, self.scale_heads[-1])

        # self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)

        if num_lambda_layer > 0:
            self.comb_pe_conv = LambdaLayer(
                dim=conv_dims + pe_dim,
                dim_out=conv_dims,
                r=lambda_layer_r,  # the receptive field for relative positional encoding (23 x 23)
                dim_k=16,
                heads=4,
                dim_u=4)
        else:
            self.comb_pe_conv = Conv2d(
                conv_dims + pe_dim,
                conv_dims,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not norm,
                norm=get_norm(norm, conv_dims),
                activation=F.relu,
            )
        # weight_init.c2_msra_fill(self.comb_pe_conv)

        self.densepose_head = build_densepose_head(cfg, conv_dims)

        self.predictor = Conv2d(cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM,
                                num_classes,
                                1,
                                stride=1,
                                padding=0)
        initialize_module_params(self.predictor)
Example #13
0
    def __init__(self, cfg, use_rel_coords=True):
        super().__init__()
        self.num_outputs = cfg.MODEL.CONDINST.IUVHead.OUT_CHANNELS
        norm = cfg.MODEL.CONDINST.IUVHead.NORM
        num_convs = cfg.MODEL.CONDINST.IUVHead.NUM_CONVS
        num_lambda_layer = cfg.MODEL.CONDINST.IUVHead.NUM_LAMBDA_LAYER
        lambda_layer_r = cfg.MODEL.CONDINST.IUVHead.LAMBDA_LAYER_R
        assert num_lambda_layer <= num_convs

        agg_channels = cfg.MODEL.CONDINST.MASK_BRANCH.AGG_CHANNELS
        channels = cfg.MODEL.CONDINST.IUVHead.CHANNELS
        self.norm_feat = cfg.MODEL.CONDINST.IUVHead.NORM_FEATURES
        soi = cfg.MODEL.FCOS.SIZES_OF_INTEREST
        self.register_buffer("sizes_of_interest",
                             torch.tensor(soi + [soi[-1] * 2]))
        self.iuv_out_stride = cfg.MODEL.CONDINST.MASK_OUT_STRIDE
        self.use_rel_coords = cfg.MODEL.CONDINST.IUVHead.REL_COORDS
        self.use_abs_coords = cfg.MODEL.CONDINST.IUVHead.ABS_COORDS
        self.use_partial_conv = cfg.MODEL.CONDINST.IUVHead.PARTIAL_CONV
        self.use_partial_norm = cfg.MODEL.CONDINST.IUVHead.PARTIAL_NORM
        # pdb.set_trace()
        # if self.use_rel_coords:
        #     self.in_channels = channels + 2
        # else:
        self.pos_emb_num_freqs = cfg.MODEL.CONDINST.IUVHead.POSE_EMBEDDING_NUM_FREQS
        self.use_pos_emb = self.pos_emb_num_freqs > 0

        if self.use_pos_emb:
            self.position_embedder, self.position_emb_dim = get_embedder(
                multires=self.pos_emb_num_freqs, input_dims=2)
            self.in_channels = agg_channels + self.position_emb_dim
        else:
            self.in_channels = agg_channels + 2

        if self.use_abs_coords:
            if self.use_pos_emb:
                self.in_channels += self.position_emb_dim
            else:
                self.in_channels += 2

        if self.use_partial_conv:
            conv_block = conv_with_kaiming_uniform(norm,
                                                   activation=True,
                                                   use_partial_conv=True)
        else:
            conv_block = conv_with_kaiming_uniform(norm, activation=True)
            # pdb.set_trace()
        conv_block_bn = conv_with_kaiming_uniform("BN", activation=True)

        # tower_attn = []
        # tower_attn.append(conv_block_bn(
        #     self.position_emb_dim, 32, 3, 1
        # ))
        # tower_attn.append(nn.Conv2d(
        #     32, 3, 3, stride=1, padding=1
        # ))
        # self.add_module('tower_attn', nn.Sequential(*tower_attn))

        num_layer = 3

        tower0 = []
        if num_lambda_layer > 0:
            layer = LambdaLayer(
                dim=self.in_channels,
                dim_out=channels,
                r=lambda_layer_r,  # the receptive field for relative positional encoding (23 x 23)
                dim_k=8,
                heads=4,
                dim_u=4)
            tower0.append(layer)
        else:
            tower0.append(conv_block(self.in_channels, channels, 3, 1))
        for i in range(num_layer):
            tower0.append(conv_block(channels, channels, 3, 1))
        self.add_module('tower0', nn.Sequential(*tower0))

        tower1 = []
        if num_lambda_layer > 0:
            layer = LambdaLayer(
                dim=self.in_channels,
                dim_out=channels,
                r=lambda_layer_r,  # the receptive field for relative positional encoding (23 x 23)
                dim_k=8,
                heads=4,
                dim_u=4)
            tower1.append(layer)
        else:
            tower1.append(conv_block(self.in_channels, channels, 3, 1))
        for i in range(num_layer):
            tower1.append(conv_block(channels, channels, 3, 1))
        self.add_module('tower1', nn.Sequential(*tower1))

        tower2 = []
        if num_lambda_layer > 0:
            layer = LambdaLayer(
                dim=self.in_channels,
                dim_out=channels,
                r=lambda_layer_r,  # the receptive field for relative positional encoding (23 x 23)
                dim_k=8,
                heads=4,
                dim_u=4)
            tower2.append(layer)
        else:
            tower2.append(conv_block(self.in_channels, channels, 3, 1))
        for i in range(num_layer):
            tower2.append(conv_block(channels, channels, 3, 1))
        self.add_module('tower2', nn.Sequential(*tower2))

        tower_out = []
        for i in range(num_convs - num_layer - 1):
            if i == 0:
                tower_out.append(conv_block(channels * 3, channels, 1, 1))
            else:
                tower_out.append(conv_block(channels, channels, 3, 1))
        self.add_module('tower_out', nn.Sequential(*tower_out))
Example #14
0
    def __init__(self, cfg, use_rel_coords=True):
        super().__init__()
        self.num_outputs = cfg.MODEL.CONDINST.IUVHead.OUT_CHANNELS
        norm = cfg.MODEL.CONDINST.IUVHead.NORM
        num_convs = cfg.MODEL.CONDINST.IUVHead.NUM_CONVS
        num_lambda_layer = cfg.MODEL.CONDINST.IUVHead.NUM_LAMBDA_LAYER
        assert num_lambda_layer <= num_convs
        channels = cfg.MODEL.CONDINST.IUVHead.CHANNELS
        self.norm_feat = cfg.MODEL.CONDINST.IUVHead.NORM_FEATURES
        soi = cfg.MODEL.FCOS.SIZES_OF_INTEREST
        self.register_buffer("sizes_of_interest",
                             torch.tensor(soi + [soi[-1] * 2]))
        self.iuv_out_stride = cfg.MODEL.CONDINST.MASK_OUT_STRIDE
        self.use_rel_coords = cfg.MODEL.CONDINST.IUVHead.REL_COORDS
        self.use_abs_coords = cfg.MODEL.CONDINST.IUVHead.ABS_COORDS
        # pdb.set_trace()
        # if self.use_rel_coords:
        #     self.in_channels = channels + 2
        # else:
        self.pos_emb_num_freqs = cfg.MODEL.CONDINST.IUVHead.POSE_EMBEDDING_NUM_FREQS
        self.use_pos_emb = self.pos_emb_num_freqs > 0
        if self.use_pos_emb:
            self.position_embedder, self.position_emb_dim = get_embedder(
                multires=self.pos_emb_num_freqs, input_dims=2)
            self.in_channels = channels + self.position_emb_dim
        else:
            self.in_channels = channels + 2

        if self.use_abs_coords:
            if self.use_pos_emb:
                self.in_channels += self.position_emb_dim
            else:
                self.in_channels += 2

        conv_block = conv_with_kaiming_uniform(norm, activation=True)

        tower = []
        if num_lambda_layer > 0:
            layer = LambdaLayer(
                dim=self.in_channels,
                dim_out=channels,
                r=23,  # the receptive field for relative positional encoding (23 x 23)
                dim_k=16,
                heads=4,
                dim_u=4)
            tower.append(layer)
        else:
            tower.append(conv_block(self.in_channels, channels, 3, 1))

        for i in range(1, num_convs - 1):
            if i < num_lambda_layer:
                layer = LambdaLayer(
                    dim=channels,
                    dim_out=channels,
                    r=23,  # the receptive field for relative positional encoding (23 x 23)
                    dim_k=16,
                    heads=4,
                    dim_u=4)
                tower.append(layer)
            else:
                tower.append(conv_block(channels, channels, 3, 1))

        self.add_module('tower', nn.Sequential(*tower))

        self.mid_res_conv = conv_block(channels, channels, 3, 1)
        self.mid_res_out = nn.Conv2d(channels, self.num_outputs, 1)

        self.low_res_conv = conv_block(channels, channels, 3, 2)
        self.low_res_out = nn.Conv2d(channels, self.num_outputs, 1)

        deconv_block = conv_with_kaiming_uniform(norm,
                                                 activation=True,
                                                 use_deconv=True)
        self.high_res_conv = deconv_block(channels, channels, 3, 2)
        self.high_res_out = nn.Conv2d(channels, self.num_outputs, 1)