def __init__(self): super().__init__() self.backbone = construct_backbone(cfg.backbone) if cfg.freeze_bn: self.freeze_bn() in_channels = cfg.fpn.num_features # 256 self.proto_net, cfg.coef_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) ''' self.proto_net: Sequential((0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (1): ReLU(inplace) (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (3): ReLU(inplace) (4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (5): ReLU(inplace) (6): InterpolateModule() (7): ReLU(inplace) (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (9): ReLU(inplace) (10): Conv2d(256, 32, kernel_size=(1, 1), stride=(1, 1))) cfg.coef_dim: 32 ''' self.fpn = FPN([512, 1024, 2048]) self.selected_layers = [0, 1, 2, 3, 4] # create a ModuleList to match with the original pre-trained weights (original model state_dict) self.prediction_layers = nn.ModuleList() self.prediction_layers.append(PredictionModule(in_channels)) ''' self.prediction_layers: ModuleList( (0): PredictionModule((upfeature): Sequential((0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (1): ReLU(inplace)) (bbox_layer): Conv2d(256, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conf_layer): Conv2d(256, 243, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (mask_layer): Conv2d(256, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)))) ''' if cfg.train_semantic: # True self.semantic_seg_conv = nn.Conv2d(256, cfg.num_classes - 1, kernel_size=1) self.anchors = [] for i, hw in enumerate(cfg.hws): self.anchors += make_anchors(hw[1], hw[0], cfg.backbone.scales[i]) self.anchors = torch.Tensor(self.anchors).view(-1, 4).cuda()
def forward(self, x): # changed outs = self.backbone(x) #changed outs = [outs[i] for i in [1, 2, 3]] outs = self.fpn(outs[0], outs[1], outs[2]) ''' outs: (n, 3, 550, 550) -> backbone -> (n, 256, 138, 138) -> fpn -> (n, 256, 69, 69) P3 (n, 512, 69, 69) (n, 256, 35, 35) P4 (n, 1024, 35, 35) (n, 256, 18, 18) P5 (n, 2048, 18, 18) (n, 256, 9, 9) P6 (n, 256, 5, 5) P7 ''' if isinstance(self.anchors, list): #changed for i, shape in enumerate([list(aa.shape) for aa in outs]): self.anchors += make_anchors(torch.tensor(shape[2]), torch.tensor(shape[3]), cfg.scales[i]) self.anchors = torch.Tensor(self.anchors).view(-1, 4) # outs[0]: [2, 256, 69, 69], the feature map from P3 proto_out = self.proto_net(outs[0]) # proto_out: (n, 32, 138, 138) proto_out = F.relu(proto_out, inplace=True) proto_out = proto_out.permute(0, 2, 3, 1).contiguous() predictions = {'box': [], 'class': [], 'coef': []} for i in self.selected_layers: # self.selected_layers [0, 1, 2, 3, 4] p = self.prediction_layers[0](outs[i]) for k, v in p.items(): predictions[k].append(v) for k, v in predictions.items(): predictions[k] = torch.cat(v, -2) predictions['proto'] = proto_out predictions['anchors'] = self.anchors if self.training: if cfg.train_semantic: # True predictions['segm'] = self.semantic_seg_conv(outs[0]) return predictions else: predictions['class'] = F.softmax(predictions['class'], -1) return predictions
def forward(self, x): with timer.env('backbone'): outs = self.backbone(x) with timer.env('fpn'): outs = [outs[i] for i in cfg.backbone.selected_layers] outs = self.fpn(outs) ''' outs: (n, 3, 550, 550) -> backbone -> (n, 256, 138, 138) -> fpn -> [n, 256, 69, 69] P3 (n, 512, 69, 69) [n, 256, 35, 35] P4 (n, 1024, 35, 35) [n, 256, 18, 18] P5 (n, 2048, 18, 18) [n, 256, 9, 9] P6 [n, 256, 5, 5] P7 ''' if isinstance(self.anchors, list): for i, shape in enumerate([list(aa.shape) for aa in outs]): self.anchors += make_anchors(shape[2], shape[3], cfg.backbone.scales[i]) self.anchors = torch.Tensor(self.anchors).view(-1, 4).cuda() with timer.env('proto'): # outs[0]: [2, 256, 69, 69], the feature map from P3 proto_out = self.proto_net(outs[0]) # proto_out: [2, 32, 138, 138] proto_out = F.relu(proto_out, inplace=True) proto_out = proto_out.permute(0, 2, 3, 1).contiguous() with timer.env('pred_heads'): predictions = {'box': [], 'class': [], 'coef': []} for i in self.selected_layers: # self.selected_layers [0, 1, 2, 3, 4] p = self.prediction_layers[0](outs[i]) for k, v in p.items(): predictions[k].append(v) for k, v in predictions.items(): predictions[k] = torch.cat(v, -2) predictions['proto'] = proto_out predictions['anchors'] = self.anchors if self.training: if cfg.train_semantic: # True predictions['segm'] = self.semantic_seg_conv(outs[0]) return predictions else: predictions['class'] = F.softmax(predictions['class'], -1) return predictions
def forward(self, img, box_classes=None, masks_gt=None): outs = self.backbone(img) outs = self.fpn(outs[1:4]) ''' outs: (n, 3, 550, 550) -> backbone -> (n, 256, 138, 138) -> fpn -> (n, 256, 69, 69) P3 (n, 512, 69, 69) (n, 256, 35, 35) P4 (n, 1024, 35, 35) (n, 256, 18, 18) P5 (n, 2048, 18, 18) (n, 256, 9, 9) P6 (n, 256, 5, 5) P7 ''' if isinstance(self.anchors, list): for i, shape in enumerate([list(aa.shape) for aa in outs]): self.anchors += make_anchors(self.cfg, shape[2], shape[3], self.cfg.scales[i]) self.anchors = torch.tensor(self.anchors, device=outs[0].device).reshape(-1, 4) # outs[0]: [2, 256, 69, 69], the feature map from P3 proto_out = self.proto_net(outs[0]) # proto_out: (n, 32, 138, 138) proto_out = F.relu(proto_out, inplace=True) proto_out = proto_out.permute(0, 2, 3, 1).contiguous() class_pred, box_pred, coef_pred = [], [], [] for aa in outs: class_p, box_p, coef_p = self.prediction_layers[0](aa) class_pred.append(class_p) box_pred.append(box_p) coef_pred.append(coef_p) class_pred = torch.cat(class_pred, dim=1) box_pred = torch.cat(box_pred, dim=1) coef_pred = torch.cat(coef_pred, dim=1) if self.training: seg_pred = self.semantic_seg_conv(outs[0]) return self.compute_loss(class_pred, box_pred, coef_pred, proto_out, seg_pred, box_classes, masks_gt) else: class_pred = F.softmax(class_pred, -1) return class_pred, box_pred, coef_pred, proto_out, self.anchors
def __init__(self, cfg): super().__init__() self.cfg = cfg self.coef_dim = 32 if cfg.__class__.__name__.startswith('res101'): self.backbone = ResNet(layers=(3, 4, 23, 3)) self.fpn = FPN(in_channels=(512, 1024, 2048)) elif cfg.__class__.__name__.startswith('res50'): self.backbone = ResNet(layers=(3, 4, 6, 3)) self.fpn = FPN(in_channels=(512, 1024, 2048)) elif cfg.__class__.__name__.startswith('swin_tiny'): self.backbone = SwinTransformer() self.fpn = FPN(in_channels=(192, 384, 768)) self.proto_net = ProtoNet(coef_dim=self.coef_dim) self.prediction_layers = PredictionModule(cfg, coef_dim=self.coef_dim) self.anchors = [] fpn_fm_shape = [ math.ceil(cfg.img_size / stride) for stride in (8, 16, 32, 64, 128) ] for i, size in enumerate(fpn_fm_shape): self.anchors += make_anchors(self.cfg, size, size, self.cfg.scales[i]) if cfg.mode == 'train': self.semantic_seg_conv = nn.Conv2d(256, cfg.num_classes - 1, kernel_size=1) # init weights, backbone weights will be covered later for name, module in self.named_modules(): if isinstance(module, nn.Conv2d): nn.init.xavier_uniform_(module.weight.data) if module.bias is not None: module.bias.data.zero_()
self.host = host_mem self.device = device_mem def __str__(self): return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) def __repr__(self): return self.__str__() anchors = [] fpn_fm_shape = [ math.ceil(cfg.img_size / stride) for stride in (8, 16, 32, 64, 128) ] for i, size in enumerate(fpn_fm_shape): anchors += make_anchors(cfg, size, size, cfg.scales[i]) # prepare engine with open(cfg.weight, 'rb') as f, trt.Runtime(trt.Logger(trt.Logger.WARNING)) as runtime: engine = runtime.deserialize_cuda_engine(f.read()) inputs, outputs, bindings = [], [], [] stream = cuda.Stream() for binding in engine: size = trt.volume( engine.get_binding_shape(binding)) * engine.max_batch_size dtype = trt.nptype(engine.get_binding_dtype(binding)) # Allocate host and device buffers host_mem = cuda.pagelocked_empty(size, dtype)