def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): """ Args: in_channels (int): number of channels of the input feature """ super(TextHead, self).__init__() # fmt: off pooler_resolution = cfg.MODEL.BATEXT.POOLER_RESOLUTION pooler_scales = cfg.MODEL.BATEXT.POOLER_SCALES sampling_ratio = cfg.MODEL.BATEXT.SAMPLING_RATIO conv_dim = cfg.MODEL.BATEXT.CONV_DIM num_conv = cfg.MODEL.BATEXT.NUM_CONV canonical_size = cfg.MODEL.BATEXT.CANONICAL_SIZE self.in_features = cfg.MODEL.BATEXT.IN_FEATURES self.voc_size = cfg.MODEL.BATEXT.VOC_SIZE recognizer = cfg.MODEL.BATEXT.RECOGNIZER self.top_size = cfg.MODEL.TOP_MODULE.DIM # fmt: on self.pooler = TopPooler(output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type="BezierAlign", canonical_box_size=canonical_size, canonical_level=3, assign_crit="bezier") conv_block = conv_with_kaiming_uniform(norm="BN", activation=True) tower = [] for i in range(num_conv): tower.append(conv_block(conv_dim, conv_dim, 3, 1)) self.tower = nn.Sequential(*tower) self.recognizer = build_recognizer(cfg, recognizer)
def __init__(self, cfg, in_channels): super(CRNN, self).__init__() conv_func = conv_with_kaiming_uniform(norm="GN", activation=True) convs = [] for i in range(2): convs.append(conv_func(in_channels, in_channels, 3, stride=(2, 1))) self.convs = nn.Sequential(*convs) self.rnn = BidirectionalLSTM(in_channels, in_channels, in_channels)
def __init__(self, cfg): super(MaskHead, self).__init__() conv_dim = cfg.MODEL.BATEXT.CONV_DIM conv_block = conv_with_kaiming_uniform(norm="BN", activation=True) convs = [] convs.append(conv_block(258, conv_dim, 3, 1)) for i in range(3): convs.append(conv_block(conv_dim, conv_dim, 3, 1)) self.mask_convs = nn.Sequential(*convs)
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): """ TODO: support deconv and variable channel width """ # official protonet has a relu after each conv super().__init__() # fmt: off mask_dim = cfg.MODEL.BASIS_MODULE.NUM_BASES planes = cfg.MODEL.BASIS_MODULE.CONVS_DIM self.in_features = cfg.MODEL.BASIS_MODULE.IN_FEATURES self.loss_on = cfg.MODEL.BASIS_MODULE.LOSS_ON norm = cfg.MODEL.BASIS_MODULE.NORM num_convs = cfg.MODEL.BASIS_MODULE.NUM_CONVS self.visualize = cfg.MODEL.BLENDMASK.VISUALIZE # fmt: on feature_channels = {k: v.channels for k, v in input_shape.items()} conv_block = conv_with_kaiming_uniform(norm, True) # conv relu bn self.refine = nn.ModuleList() for in_feature in self.in_features: self.refine.append( conv_block(feature_channels[in_feature], planes, 3, 1)) tower = [] for i in range(num_convs): tower.append(conv_block(planes, planes, 3, 1)) tower.append( nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)) tower.append(conv_block(planes, planes, 3, 1)) tower.append(nn.Conv2d(planes, mask_dim, 1)) self.add_module('tower', nn.Sequential(*tower)) if self.loss_on: # fmt: off self.common_stride = cfg.MODEL.BASIS_MODULE.COMMON_STRIDE num_classes = cfg.MODEL.BASIS_MODULE.NUM_CLASSES + 1 self.sem_loss_weight = cfg.MODEL.BASIS_MODULE.LOSS_WEIGHT # fmt: on inplanes = feature_channels[self.in_features[0]] self.seg_head = nn.Sequential( nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(planes), nn.ReLU(), nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(planes), nn.ReLU(), nn.Conv2d(planes, num_classes, kernel_size=1, stride=1))
def __init__(self, conv_dim, roi_size): super().__init__() height = roi_size[0] downsample_level = math.log2(height) - 2 assert math.isclose(downsample_level, int(downsample_level)) downsample_level = int(downsample_level) conv_block = conv_with_kaiming_uniform(norm="BN", activation=True) convs = [] for i in range(downsample_level): convs.append(conv_block(conv_dim, conv_dim, 3, stride=(2, 1))) convs.append( nn.Conv2d(conv_dim, conv_dim, kernel_size=(4, 1), bias=False)) self.convs = nn.Sequential(*convs)
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): super().__init__() self.in_features = cfg.MODEL.CONDINST.MASK_BRANCH.IN_FEATURES self.sem_loss_on = cfg.MODEL.CONDINST.MASK_BRANCH.SEMANTIC_LOSS_ON self.num_outputs = cfg.MODEL.CONDINST.MASK_BRANCH.OUT_CHANNELS norm = cfg.MODEL.CONDINST.MASK_BRANCH.NORM num_convs = cfg.MODEL.CONDINST.MASK_BRANCH.NUM_CONVS channels = cfg.MODEL.CONDINST.MASK_BRANCH.CHANNELS self.out_stride = input_shape[self.in_features[0]].stride feature_channels = {k: v.channels for k, v in input_shape.items()} conv_block = conv_with_kaiming_uniform(norm, activation=True) self.refine = nn.ModuleList() for in_feature in self.in_features: self.refine.append(conv_block( feature_channels[in_feature], channels, 3, 1 )) tower = [] for i in range(num_convs): tower.append(conv_block( channels, channels, 3, 1 )) tower.append(nn.Conv2d( channels, max(self.num_outputs, 1), 1 )) self.add_module('tower', nn.Sequential(*tower)) if self.sem_loss_on: num_classes = cfg.MODEL.FCOS.NUM_CLASSES self.focal_loss_alpha = cfg.MODEL.FCOS.LOSS_ALPHA self.focal_loss_gamma = cfg.MODEL.FCOS.LOSS_GAMMA in_channels = feature_channels[self.in_features[0]] self.seg_head = nn.Sequential( conv_block(in_channels, channels, kernel_size=3, stride=1), conv_block(channels, channels, kernel_size=3, stride=1) ) self.logits = nn.Conv2d(channels, num_classes, kernel_size=1, stride=1) prior_prob = cfg.MODEL.FCOS.PRIOR_PROB bias_value = -math.log((1 - prior_prob) / prior_prob) torch.nn.init.constant_(self.logits.bias, bias_value)
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): """ TODO: support deconv and variable channel width """ # official protonet has a relu after each conv super().__init__() # fmt: off mask_dim = cfg.MODEL.FCPOSE.BASIS_MODULE.NUM_BASES planes = cfg.MODEL.FCPOSE.BASIS_MODULE.CONVS_DIM self.device = torch.device(cfg.MODEL.DEVICE) self.in_features = ["p3", "p4", "p5"] self.loss_on = True norm = cfg.MODEL.FCPOSE.BASIS_MODULE.BN_TYPE #"SyncBN" num_convs = 3 self.visualize = False # fmt: on feature_channels = {k: v.channels for k, v in input_shape.items()} conv_block = conv_with_kaiming_uniform(norm, True) # conv relu bn self.refine = nn.ModuleList() for in_feature in self.in_features: self.refine.append(conv_block( feature_channels[in_feature], planes, 3, 1)) tower = [] for i in range(num_convs): tower.append( conv_block(planes, planes, 3, 1)) tower.append( conv_block(planes, planes, 3, 1)) tower.append( nn.Conv2d(planes, mask_dim+(2*17), 1)) self.add_module('tower', nn.Sequential(*tower)) if self.loss_on: # fmt: off self.common_stride = cfg.MODEL.FCPOSE.BASIS_MODULE.COMMON_STRIDE self.num_classes = cfg.MODEL.FCPOSE.BASIS_MODULE.NUM_CLASSES self.heatmap_loss_weight = cfg.MODEL.FCPOSE.BASIS_MODULE.LOSS_WEIGHT # self.focal_loss_alpha = cfg.MODEL.FCPOSE.BASIS_MODULE.FOCAL_LOSS_ALPHA # self.focal_loss_gamma = cfg.MODEL.FCPOSE.BASIS_MODULE.FOCAL_LOSS_GAMMA # fmt: on inplanes = feature_channels[self.in_features[0]] self.seg_head = nn.Sequential(conv_block(planes, planes, 3,1), conv_block(planes, planes, 3,1),) self.p3_logits = nn.Conv2d(planes, self.num_classes, kernel_size=1, stride=1) self.upsampler = nn.Sequential( ConvTranspose2d(planes+self.num_classes, planes, 8, stride=4, padding=6 // 2 - 1), # get_norm(norm, planes), nn.ReLU(), # conv_block(planes, planes, 3,1), ) self.p1_logits = nn.Conv2d(planes, self.num_classes, kernel_size=3, stride=1, padding=1) prior_prob = cfg.MODEL.FCOS.PRIOR_PROB bias_value = -math.log((1 - prior_prob) / prior_prob) torch.nn.init.constant_(self.p3_logits.bias, 0.0) torch.nn.init.normal_(self.p3_logits.weight, std=0.0001) torch.nn.init.constant_(self.p1_logits.bias, 0.0) torch.nn.init.normal_(self.p1_logits.weight, std=0.0001)