def __init__(self, in_channels, layers, dilation): """ Arguments: in_channels (int): number of input channels layers (list): feature dimensions of each FCN layer dilation (int): dilation rate of kernel """ d = OrderedDict() next_feature = in_channels for layer_idx, layer_features in enumerate(layers, 1): d["mask_fcn{}".format(layer_idx)] = misc_nn_ops.Conv2d( next_feature, layer_features, kernel_size=3, stride=1, padding=dilation, dilation=dilation) d["relu{}".format(layer_idx)] = nn.ReLU(inplace=True) next_feature = layer_features super(MaskRCNNHeads, self).__init__(d) for name, param in self.named_parameters(): if "weight" in name: nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, in_channels, layers, dilation): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ d = OrderedDict() next_feature = in_channels for layer_idx, layer_features in enumerate(layers, 1): d["mask_fcn{}".format(layer_idx)] = misc_nn_ops.Conv2d( next_feature, layer_features, kernel_size=3, stride=1, padding=dilation, dilation=dilation) d["relu{}".format(layer_idx)] = nn.ReLU(inplace=True) next_feature = layer_features super(MaskRCNNHeads, self).__init__(d) for name, param in self.named_parameters(): if "weight" in name: nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, in_channels, dim_reduced, num_classes): super(MaskRCNNPredictor, self).__init__(OrderedDict([ ("conv5_mask", misc_nn_ops.ConvTranspose2d(in_channels, dim_reduced, 2, 2, 0)), ("relu", nn.ReLU(inplace=True)), ("mask_fcn_logits", misc_nn_ops.Conv2d(dim_reduced, num_classes, 1, 1, 0)), ])) for name, param in self.named_parameters(): if "weight" in name: nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, in_channels, layers): d = [] next_feature = in_channels for l in layers: d.append(misc_nn_ops.Conv2d(next_feature, l, 3, stride=1, padding=1)) d.append(nn.ReLU(inplace=True)) next_feature = l super(KeypointRCNNHeads, self).__init__(*d) for m in self.children(): if isinstance(m, misc_nn_ops.Conv2d): nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(m.bias, 0)
def __init__(self, nin: int, nout: int, kernel_size: int = 3, activation: Activation = nn.ReLU(), stride: int = 1, padding: int = 1, dilation: int = 1, dropout: float = 0.1): super(StdConv, self).__init__() self.conv = misc_nn_ops.Conv2d(nin, nout, kernel_size, stride=stride, padding=padding, dilation=dilation) self.bn = misc_nn_ops.BatchNorm2d(nout) self.drop = nn.Dropout(dropout) self.activation = activation
def __init__(self, networklist, upsample_mode='bilinear', output_activation='relu', kernel_size=3, padding_n=1, depth=False, encoder_hook_index=None, decoder_hook_index=None): d = OrderedDict() # next_feature = in_channels prev_conv = networklist[0] networklist = networklist[1:] for layer_idx, layer_conv in enumerate(networklist): if 'M' in str(layer_conv): scale = int(layer_conv[1]) d["maxpool2d{}".format(layer_idx)] = nn.MaxPool2d( kernel_size=scale, stride=scale, padding=0, dilation=1, ceil_mode=False) elif 'U' in str(layer_conv): scale = int(layer_conv[1]) d["upsample2d{}".format(layer_idx)] = nn.Upsample( scale_factor=scale, mode=upsample_mode, align_corners=True) elif 'S' in str(layer_conv): scale = int(layer_conv[1]) d["shuffle2d{}".format(layer_idx)] = nn.PixelShuffle(scale) prev_conv = int(prev_conv / (scale * scale)) else: #print(prev_conv, layer_conv) if depth: #hidden = int(prev_conv*3) hidden = 260 d["conv1{}".format(layer_idx)] = ConvBNReLU( prev_conv, layer_conv, kernel_size=1, groups=layer_conv) #d["conv2{}".format(layer_idx)] = ConvBNReLU(hidden, hidden, kernel_size=1, groups=hidden) #d["conv3{}".format(layer_idx)] = nn.Conv2d(hidden, layer_conv, kernel_size=1, # stride=1, padding=0, bias=False) d["relu{}".format(layer_idx)] = nn.ReLU(inplace=True) prev_conv = networklist[layer_idx] else: d["conv{}".format(layer_idx)] = misc_nn_ops.Conv2d( prev_conv, layer_conv, kernel_size=kernel_size, stride=1, padding=padding_n) d["relu{}".format(layer_idx)] = nn.ReLU(inplace=True) prev_conv = networklist[layer_idx] #d["upsample_conv_2d{}".format(layer_idx)] = nn.ConvTranspose2d(prev_conv, # layer_conv, 3, stride=1, padding=(1,1)) if output_activation == 'sigmoid': d.popitem() d['sigmoid'] = nn.Sigmoid() elif output_activation == 'batchnorm': d.popitem() d['bn'] = nn.BatchNorm2d(prev_conv) elif output_activation == 'remove': d.popitem() elif output_activation == 'logsoftmax': d.popitem() d['logsoftmax'] = nn.LogSoftmax(dim=1) d = [l for k, l in d.items()] if encoder_hook_index is not None: register_layers(d, encoder_hook_index, 'student_encoder') if decoder_hook_index is not None: register_layers(d, decoder_hook_index, 'student_decoder') super(CNN_constructer, self).__init__(*d) for name, param in self.named_parameters(): if "weight" in name: nn.init.normal_(param, std=0.01) # nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") elif "bias" in name: nn.init.constant_(param, 0)
def __init__( self, backbone, num_maskrcnn_classes=None, # transform parameters min_size=800, max_size=1333, image_mean=None, image_std=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, box_batch_size_per_image=512, box_positive_fraction=0.25, bbox_reg_weights=None): super(DensePoseRCNN, self).__init__( backbone, num_maskrcnn_classes, # transform parameters min_size, max_size, image_mean, image_std, # RPN-specific parameters rpn_anchor_generator, rpn_head, rpn_pre_nms_top_n_train, rpn_pre_nms_top_n_test, rpn_post_nms_top_n_train, rpn_post_nms_top_n_test, rpn_nms_thresh, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, # Box parameters box_roi_pool, box_head, box_predictor, box_score_thresh, box_nms_thresh, box_detections_per_img, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights) self.roi_heads.densepose_roi_pool = MultiScaleRoIAlign( featmap_names=[0, 1, 2, 3], output_size=14, sampling_ratio=2) self.roi_heads.densepose_head = MaskRCNNHeads(backbone.out_channels, (256, 256, 256, 256), 1) # TODO: maybe we should put sigmoid on top (UV coords are always in [0,1] range)? self.roi_heads.densepose_uv_predictor = nn.Sequential( misc_nn_ops.ConvTranspose2d(256, 256, kernel_size=2, stride=2), nn.ReLU(inplace=True), misc_nn_ops.ConvTranspose2d(256, 256, kernel_size=2, stride=2), nn.ReLU(inplace=True), misc_nn_ops.Conv2d(256, 48, 1, 1, 0), ) self.roi_heads.densepose_class_predictor = nn.Sequential( misc_nn_ops.ConvTranspose2d(256, 256, kernel_size=2, stride=2), nn.ReLU(inplace=True), misc_nn_ops.ConvTranspose2d(256, 256, kernel_size=2, stride=2), nn.ReLU(inplace=True), misc_nn_ops.Conv2d(256, 25, 1, 1, 0), ) self.roi_heads.densepose_mask_predictor = nn.Sequential( misc_nn_ops.ConvTranspose2d(256, 256, kernel_size=2, stride=2), nn.ReLU(inplace=True), misc_nn_ops.ConvTranspose2d(256, 256, kernel_size=2, stride=2), nn.ReLU(inplace=True), misc_nn_ops.Conv2d(256, 15, 1, 1, 0), )