def __init__(self, cfg: YolactConfig): super().__init__() input_channels = 1 last_layer = [(cfg.num_classes - 1, 1, {})] self.maskiou_net, _ = make_net(input_channels, cfg.maskiou_net + last_layer, include_last_relu=True)
def __init__(self, in_channels, out_channels=1024, aspect_ratios=[[1]], scales=[1], parent=None, index=0): super().__init__() self.num_classes = cfg.num_classes self.mask_dim = cfg.mask_dim # Defined by Yolact self.num_priors = sum(len(x)*len(scales) for x in aspect_ratios) self.parent = [parent] # Don't include this in the state dict self.index = index self.num_heads = cfg.num_heads # Defined by Yolact if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb: self.mask_dim = self.mask_dim // self.num_heads if cfg.mask_proto_prototypes_as_features: in_channels += self.mask_dim if parent is None: if cfg.extra_head_net is None: out_channels = in_channels else: self.upfeature, out_channels = make_net(in_channels, cfg.extra_head_net) if cfg.use_prediction_module: self.block = Bottleneck(out_channels, out_channels // 4) self.conv = nn.Conv2d(out_channels, out_channels, kernel_size=1, bias=True) self.bn = nn.BatchNorm2d(out_channels) self.bbox_layer = nn.Conv2d(out_channels, self.num_priors * 4, **cfg.head_layer_params) self.conf_layer = nn.Conv2d(out_channels, self.num_priors * self.num_classes, **cfg.head_layer_params) self.mask_layer = nn.Conv2d(out_channels, self.num_priors * self.mask_dim, **cfg.head_layer_params) if cfg.use_mask_scoring: self.score_layer = nn.Conv2d(out_channels, self.num_priors, **cfg.head_layer_params) if cfg.use_instance_coeff: self.inst_layer = nn.Conv2d(out_channels, self.num_priors * cfg.num_instance_coeffs, **cfg.head_layer_params) # What is this ugly lambda doing in the middle of all this clean prediction module code? def make_extra(num_layers): if num_layers == 0: return lambda x: x else: # Looks more complicated than it is. This just creates an array of num_layers alternating conv-relu return nn.Sequential(*sum([[ nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), nn.ReLU(inplace=True) ] for _ in range(num_layers)], [])) self.bbox_extra, self.conf_extra, self.mask_extra = [make_extra(x) for x in cfg.extra_layers] if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_coeff_gate: self.gate_layer = nn.Conv2d(out_channels, self.num_priors * self.mask_dim, kernel_size=3, padding=1) self.aspect_ratios = aspect_ratios self.scales = scales self.priors = None self.last_conv_size = None self.last_img_size = None
def __init__(self, cfg: YolactConfig): super().__init__() self.cfg = cfg assert cfg.backbone is not None self.backbone = construct_backbone(cfg.backbone) self._current_img_size = ImageSize(-1, -1) if cfg.freeze_bn: self.freeze_bn() # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early! if cfg.mask_type == MaskType.DIRECT: cfg.mask_dim = cfg.mask_size**2 elif cfg.mask_type == MaskType.LINCOMB: if cfg.mask_proto_use_grid: self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file)) self.num_grids = self.grid.size(0) else: self.num_grids = 0 self.proto_src = cfg.mask_proto_src if self.proto_src is None: in_channels = 3 elif cfg.fpn is not None: in_channels = cfg.fpn.num_features else: in_channels = self.backbone.channels[self.proto_src] in_channels += self.num_grids # The include_last_relu=false here is because we might want to change it to another function self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) if cfg.mask_proto_bias: cfg.mask_dim += 1 self.selected_layers = cfg.backbone.selected_layers src_channels = self.backbone.channels if cfg.use_maskiou: self.maskiou_net = FastMaskIoUNet(cfg) if cfg.fpn is not None: # Some hacky rewiring to accomodate the FPN self.fpn = FPN([src_channels[i] for i in self.selected_layers], cfg) self.selected_layers = list( range(len(self.selected_layers) + cfg.fpn.num_downsample)) src_channels = [cfg.fpn.num_features] * len(self.selected_layers) self.prediction_layers = nn.ModuleList() cfg.num_heads = len(self.selected_layers) for idx, layer_idx in enumerate(self.selected_layers): # If we're sharing prediction module weights, have every module's parent be the first one parent = None if cfg.share_prediction_module and idx > 0: parent = self.prediction_layers[0] pred = PredictionModule( cfg, src_channels[layer_idx], self._current_img_size, src_channels[layer_idx], aspect_ratios=cfg.backbone.pred_aspect_ratios[idx], scales=cfg.backbone.pred_scales[idx], parent=parent, index=idx, ) self.prediction_layers.append(pred) # Extra parameters for the extra losses if cfg.use_class_existence_loss: # This comes from the smallest layer selected # Also note that cfg.num_classes includes background self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1) if cfg.use_semantic_segmentation_loss: self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes - 1, kernel_size=1) # For use in evaluation self.detect = Detect( cfg.num_classes, bkg_label=0, top_k=cfg.nms_top_k, conf_thresh=cfg.nms_conf_thresh, nms_thresh=cfg.nms_thresh, cfg=self.cfg, ) self.mask_proto_prototype_activation = get_activation_function( cfg.mask_proto_prototype_activation)