def evalimage(net: Yolact, path: str, save_path: str = None): frame = torch.from_numpy(cv2.imread(path)).float() batch = FastBaseTransform()(frame.unsqueeze(0)) pred_outs = net(batch) #priors = np.array(pred_outs[3]) #np.savetxt('priors.txt', priors, fmt="%f", delimiter=",") detect = Detect(cfg.num_classes, bkg_label=0, top_k=200, conf_thresh=0.05, nms_thresh=0.5) preds = detect({ 'loc': pred_outs[0], 'conf': pred_outs[1], 'mask': pred_outs[2], 'priors': pred_outs[3], 'proto': pred_outs[4] }) dummy_input = Variable(torch.randn(1, 3, 550, 550)) torch.onnx.export(net, dummy_input, "yolact.onnx", verbose=False, opset_version=11) img_numpy = prep_display(preds, frame, None, None, undo_transform=False) if save_path is None: img_numpy = img_numpy[:, :, (2, 1, 0)] cv2.imwrite(save_path, img_numpy)
def evalimage(net:Yolact, path:str, save_path:str=None): frame = torch.from_numpy(cv2.imread(path)).float() batch = FastBaseTransform()(frame.unsqueeze(0)) sess = rt.InferenceSession("yolact.onnx") input_name = sess.get_inputs()[0].name loc_name = sess.get_outputs()[0].name conf_name = sess.get_outputs()[1].name mask_name = sess.get_outputs()[2].name priors_name = sess.get_outputs()[3].name proto_name = sess.get_outputs()[4].name pred_onx = sess.run([loc_name, conf_name, mask_name, priors_name, proto_name], {input_name: batch.cpu().detach().numpy()}) #priors = np.loadtxt('priors.txt', delimiter=',', dtype='float32') detect = Detect(cfg.num_classes, bkg_label=0, top_k=200, conf_thresh=0.05, nms_thresh=0.5) preds = detect({'loc': torch.from_numpy(pred_onx[0]), 'conf': torch.from_numpy(pred_onx[1]), 'mask': torch.from_numpy(pred_onx[2]), 'priors': torch.from_numpy(pred_onx[3]), 'proto': torch.from_numpy(pred_onx[4])}) img_numpy = prep_display(preds, frame, None, None, undo_transform=False) if save_path is None: img_numpy = img_numpy[:, :, (2, 1, 0)] if save_path is None: plt.imshow(img_numpy) plt.title(path) plt.show() else: cv2.imwrite(save_path, img_numpy)
def __init__(self, config, phase, base, extras, head, num_classes, top_k=200): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes # TODO: implement __call__ in PriorBox self.priorbox = PriorBox(config) self.priors = Variable(self.priorbox.forward(), volatile=True) # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax() self.detect = Detect(num_classes, 0, top_k, 0.01, 0.45, variance=config['variance'])
def __init__(self, model_path, device='cuda'): self.sess = onnxruntime.InferenceSession(model_path) self.device = device loc_name = self.sess.get_outputs()[0].name conf_name = self.sess.get_outputs()[1].name mask_name = self.sess.get_outputs()[2].name priors_name = self.sess.get_outputs()[3].name proto_name = self.sess.get_outputs()[4].name self.names = [loc_name, conf_name, mask_name, priors_name, proto_name] self.input_name = self.sess.get_inputs()[0].name # For use in evaluation self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=200, conf_thresh=0.05, nms_thresh=0.5)
def __init__(self, cfg, phase='train'): super(FaceBox, self).__init__() self.phase = phase # model self.conv1 = nn.Conv2d(3, 24, kernel_size=7, stride=4, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(24) self.conv2 = nn.Conv2d(48, 64, kernel_size=5, stride=2, padding=2, bias=False) self.bn2 = nn.BatchNorm2d(64) self.inception1 = Inception() self.inception2 = Inception() self.inception3 = Inception() self.conv3_1 = conv_bn_relu(128, 128, kernel_size=1) self.conv3_2 = conv_bn_relu(128, 256, kernel_size=3, stride=2, padding=1) self.conv4_1 = conv_bn_relu(256, 128, kernel_size=1) self.conv4_2 = conv_bn_relu(128, 256, kernel_size=3, stride=2, padding=1) self.multilbox = MultiBoxLayer() if self.phase == 'test': self.softmax = nn.Softmax(dim=-1) self.test_det = Detect(cfg)
def __init__(self, phase, size, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = (coco, voc)[num_classes == 21] self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = size # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
def evalimage(net:Yolact, path:str, save_path:str=None): frame = torch.from_numpy(cv2.imread(path)).float() batch = FastBaseTransform()(frame.unsqueeze(0)) # sess = rt.InferenceSession("yolact.onnx") # input_name = sess.get_inputs()[0].name # loc_name = sess.get_outputs()[0].name # conf_name = sess.get_outputs()[1].name # mask_name = sess.get_outputs()[2].name # priors_name = sess.get_outputs()[3].name # proto_name = sess.get_outputs()[4].name # pred_onx = sess.run([loc_name, conf_name, mask_name, priors_name, proto_name], {input_name: batch.cpu().detach().numpy()}) #priors = np.loadtxt('priors.txt', delimiter=',', dtype='float32') # print(pred_onx) # exit() pred_onx = [] pred_onx.append(np.load('0.npy')) pred_onx.append(np.load('1.npy')) pred_onx.append(np.load('2.npy')) pred_onx.append(np.load('3.npy')) pred_onx.append(np.load('4.npy')) for pred in pred_onx: print(pred.shape) detect = Detect(81, bkg_label=0, top_k=200, conf_thresh=0.05, nms_thresh=0.5) preds = detect({'loc': torch.from_numpy(pred_onx[0]), 'conf': torch.from_numpy(pred_onx[1]), 'mask': torch.from_numpy(pred_onx[2]), 'priors': torch.from_numpy(pred_onx[3]), 'proto': torch.from_numpy(pred_onx[4])}, net) img_numpy = prep_display(preds, frame, None, None, undo_transform=False) plt.imshow(img_numpy) plt.title(path) plt.show()
def __init__(self, classes, size): super(Retina, self).__init__() self.size = size self.priors = torch.autograd.Variable(prior_box(size), requires_grad=False) mask = ((self.priors[:, 2] > self.priors[:, 0]) & (self.priors[:, 3] > self.priors[:, 1])) self.classes = classes self.num_classes = len(classes) + 1 self._backbone = resnet101(pretrained=True) names, layers = zip(*list( self._backbone.named_children())[:-2]) # leave off avgpool and fc self.backbone = [] i = 0 while i < len(names): j = i + 1 while j < len(names) and not (names[j].startswith('layer')): j += 1 self.backbone.append(torch.nn.Sequential(*layers[i:j])) i = j self.conv6 = torch.nn.Conv2d(2048, 256, 3, stride=2, padding=1) self.conv7 = torch.nn.Conv2d(256, 256, 3, stride=2, padding=1) self.conv5 = torch.nn.Conv2d(2048, 256, 3, padding=1) self.conv4 = torch.nn.Conv2d(1024, 256, 1) self.conv3 = torch.nn.Conv2d(512, 256, 1) self.conv2 = torch.nn.Conv2d(256, 256, 1) self.loc = self.mk_subnet(4, include_sigmoid=False) self.conf = self.mk_subnet(self.num_classes, include_sigmoid=False) self.detect = Detect(self.num_classes, 0, 200, 0.01, 0.45)
def __init__(self, phase, size, base, extras, head, num_classes): super(TBPP, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = { 'num_classes': 2, 'lr_steps': (80000, 100000, 120000), 'max_iter': 120000, 'feature_maps': [64, 32, 16, 8, 4, 2, 1], 'min_dim': 512, 'steps': [8, 16, 32, 64, 128, 256, 512], 'min_sizes': [20, 51, 133, 215, 296, 378, 460], 'max_sizes': [51, 133, 215, 296, 378, 460, 542], 'aspect_ratios': [[2, 3], [2, 3, 5], [2, 3, 5], [2, 3, 5], [2, 3, 5], [2, 3], [2, 3]], # TODO 'variance': [0.1, 0.2], 'clip': True, 'name': 'MINE' } self.priorbox = PriorBox( self.cfg) # calculate the size of prior boxes, i.e. defaults boxes self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = size # TBPP network self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
def __init__(self, num_classes, phase, pretrain=False, finetune=None): super(SSD300, self).__init__() self.num_classes = num_classes self.phase = phase self.base_net = self._base_net() self.extra_net = self._extra_net() self.loc_pred, self.cls_pred = self._predict_net() self.L2Norm = L2Norm(512, 20) self.priorbox = PriorBox(v2) self.priors = Variable(self.priorbox.forward(), volatile=True) if phase == 'test': self.softmax = nn.Softmax() self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) self._init_weight() if pretrain: self._load_weight() if finetune is not None: self._finetune(finetune)
def __init__(self, phase, base, extras, lfpn_cpm, head, num_classes): super(PyramidBox, self).__init__() #self.use_transposed_conv2d = use_transposed_conv2d self.vgg = nn.ModuleList(base) self.extras = nn.ModuleList(extras) self.L2Norm3_3 = L2Norm(256, 10) self.L2Norm4_3 = L2Norm(512, 8) self.L2Norm5_3 = L2Norm(512, 5) """ self.lfpn_topdown = nn.ModuleList([ nn.Conv2d(1024, 512, 1, 1), nn.Conv2d(512, 512, 1, 1), nn.Conv2d(512, 256, 1, 1) ]) self.lfpn_later = nn.ModuleList([ nn.Conv2d(512, 512, 1, 1), nn.Conv2d(512, 512, 1, 1), nn.Conv2d(256, 256, 1, 1) ]) self.cpm = nn.ModuleList([ CPM(256), CPM(512), CPM(512), CPM(1024), CPM(512), CPM(256) ]) """ self.lfpn_topdown = nn.ModuleList(lfpn_cpm[0]) self.lfpn_later = nn.ModuleList(lfpn_cpm[1]) self.cpm = nn.ModuleList(lfpn_cpm[2]) self.loc_layers = nn.ModuleList(head[0]) self.conf_layers = nn.ModuleList(head[1]) self.is_infer = False if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(cfg) self.is_infer = True
def __init__(self): super().__init__() self.backbone = construct_backbone(cfg.backbone) if cfg.freeze_bn: self.freeze_bn() # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early! if cfg.mask_type == mask_type.direct: cfg.mask_dim = cfg.mask_size**2 elif cfg.mask_type == mask_type.lincomb: if cfg.mask_proto_use_grid: self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file)) self.num_grids = self.grid.size(0) else: self.num_grids = 0 self.proto_src = cfg.mask_proto_src self.interpolation_mode = cfg.fpn.interpolation_mode if self.proto_src is None: in_channels = 3 elif cfg.fpn is not None: in_channels = cfg.fpn.num_features else: in_channels = self.backbone.channels[self.proto_src] in_channels += self.num_grids # The include_last_relu=false here is because we might want to change it to another function self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) if cfg.mask_proto_bias: cfg.mask_dim += 1 self.selected_layers = cfg.backbone.selected_layers self.pred_scales = cfg.backbone.pred_scales self.pred_aspect_ratios = cfg.backbone.pred_aspect_ratios self.num_priors = len(self.pred_scales[0]) src_channels = self.backbone.channels if cfg.use_maskiou: self.maskiou_net = FastMaskIoUNet() if cfg.fpn is not None: # Some hacky rewiring to accomodate the FPN self.fpn = FPN([src_channels[i] for i in self.selected_layers]) if cfg.backbone_C2_as_features: self.selected_layers = list( range(1, len(self.selected_layers) + cfg.fpn.num_downsample)) src_channels = [cfg.fpn.num_features ] * (len(self.selected_layers) + 1) else: self.selected_layers = list( range(len(self.selected_layers) + cfg.fpn.num_downsample)) src_channels = [cfg.fpn.num_features] * len( self.selected_layers) # prediction layers for loc, conf, mask self.prediction_layers = nn.ModuleList() cfg.num_heads = len(self.selected_layers) # yolact++ for idx, layer_idx in enumerate(self.selected_layers): # If we're sharing prediction module weights, have every module's parent be the first one parent, parent_t = None, None if cfg.share_prediction_module and idx > 0: parent = self.prediction_layers[0] pred = PredictionModule_FC( src_channels[layer_idx], src_channels[layer_idx], deform_groups=1, pred_aspect_ratios=self.pred_aspect_ratios[idx], pred_scales=self.pred_scales[idx], parent=parent) self.prediction_layers.append(pred) # parameters in temporal correlation net if cfg.temporal_fusion_module: corr_channels = 2 * in_channels + cfg.correlation_patch_size**2 self.TemporalNet = TemporalNet(corr_channels, cfg.mask_proto_n) self.correlation_selected_layer = cfg.correlation_selected_layer # evaluation for frame-level tracking self.Detect_TF = Detect_TF(cfg.num_classes, bkg_label=0, top_k=cfg.nms_top_k, conf_thresh=cfg.nms_conf_thresh, nms_thresh=cfg.nms_thresh) self.Track_TF = Track_TF() # Extra parameters for the extra losses if cfg.use_class_existence_loss: # This comes from the smallest layer selected # Also note that cfg.num_classes includes background self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1) if cfg.use_semantic_segmentation_loss: self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes - 1, kernel_size=1) # For use in evaluation self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=cfg.nms_top_k, conf_thresh=cfg.nms_conf_thresh, nms_thresh=cfg.nms_thresh) self.Track = Track()
set_type = 'test' # load data dataset = VOCDetection(args.voc_root, [('2007', set_type)], dataset_name='VOC0712') # load net torch.set_grad_enabled(False) load_to_cpu = not args.cuda cudnn.benchmark = True device = torch.device('cuda' if args.cuda else 'cpu') if args.wo_refined_anchor: detect = Detect(num_classes, int(args.input_size), 0, confidence_threshold=args.confidence_threshold, nms_threshold=args.nms_threshold, top_k=args.top_k, keep_top_k=args.keep_top_k) else: detect = Detect_RefineDet( num_classes, int(args.input_size), 0, objectness_threshold, confidence_threshold=args.confidence_threshold, nms_threshold=args.nms_threshold, top_k=args.top_k, keep_top_k=args.keep_top_k) net = build_refinedet('test', int(args.input_size), num_classes, backbone_dict)
def train(): if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT cfg = coco dataset = COCODetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') cfg = voc dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif args.dataset == 'Watermark': cfg = voc dataset = watermark.WatermarkDetection( root=args.dataset_root, target_transform=watermark.target_transform, transform=SSDAugmentation(cfg['min_dim'], MEANS)) if args.visdom: import visdom viz = visdom.Visdom() softmax = nn.Softmax(dim=-1) detect = Detect(cfg['num_classes'], 0, 200, 0.01, 0.45) ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) ''' else: vgg_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) ''' if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): if args.visdom and iteration != 0 and (iteration % epoch_size == 0): update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) print('Start data iteration over again.') images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [ torch.tensor(ann, requires_grad=False).cuda() for ann in targets ] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() if iteration % 10 == 0: #print('timer: %.4f sec.' % (t1 - t0)) #print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ') writer.add_scalars('loss', { 'class': loss_c.item(), 'loc': loss_l.item() }, global_step=iteration) # plot the training image with bounding box if iteration % 100 == 0: img = images[0] img = watermark.tv_inv_trans(img) bnd_box_gt = torch.Tensor( [watermark.to_coord(targets[0][0, :-1], img.size()[-2:])]) with torch.no_grad(): detections = detect(out[0].detach(), softmax(out[1].detach()), out[2].detach()) positive_idx = detections[0, :, :, 0] > 0.6 bnd_box_preds = detections[0][positive_idx][:, 1:] bnd_box_pred = [ torch.Tensor(watermark.to_coord(bnd_box_pred, img.size()[-2:])).unsqueeze(0) for bnd_box_pred in bnd_box_preds ] if len(bnd_box_pred): bnd_box_pred = torch.cat(bnd_box_pred, 0) writer.add_image_with_boxes('pred', img, bnd_box_pred, global_step=iteration) else: writer.add_image('pred', img, global_step=iteration) writer.add_image_with_boxes('gt', img, bnd_box_gt, global_step=iteration) # plot gradient of every layer if iteration % 500 == 0: writer.add_figure('grad_flow', vis_grad.plot_grad_flow_v2( net.named_parameters()), global_step=iteration) if args.visdom: update_vis_plot(iteration, loss_l.item(), loss_c.item(), iter_plot, epoch_plot, 'append') if iteration != 0 and (iteration + 1) % 5000 == 0: print('Saving state, iter:', iteration) torch.save( ssd_net.state_dict(), os.path.join(args.save_folder, args.dataset) + repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), args.save_folder + '' + args.dataset + '.pth')
def __init__(self): super().__init__() self.backbone = construct_backbone(cfg.backbone) if cfg.freeze_bn: self.freeze_bn() #Fusion FPN self.fusion_layers = cfg.fusion_layers self.fusion_dim = cfg.fusion_dim # Compute mask_dim here and add it back to the config. if cfg.mask_type == mask_type.direct: cfg.mask_dim = cfg.mask_size**2 elif cfg.mask_type == mask_type.lincomb: if cfg.mask_proto_use_grid: self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file)) self.num_grids = self.grid.size(0) else: self.num_grids = 0 self.proto_src = cfg.mask_proto_src if self.proto_src is None: in_channels = 3 elif cfg.fpn is not None: in_channels = cfg.fpn.num_features else: in_channels = self.backbone.channels[self.proto_src] in_channels += self.num_grids # The include_last_relu=false here is because we might want to change it to another function if cfg.proto_coordconv: in_channels += 2 elif cfg.fpn_fusion: in_channels = self.fusion_dim self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) if cfg.mask_proto_bias: cfg.mask_dim += 1 self.selected_layers = cfg.backbone.selected_layers src_channels = self.backbone.channels if cfg.fpn is not None: # Some hacky rewiring to accomodate the FPN self.fpn = FPN([src_channels[i] for i in self.selected_layers]) self.selected_layers = list( range(len(self.selected_layers) + cfg.fpn.num_downsample)) src_channels = [cfg.fpn.num_features] * len(self.selected_layers) if cfg.fpn_fusion is True: self.fusion_module = FusionModule(src_channels[0], self.fusion_layers, out_dim=self.fusion_dim) if cfg.ins_coordconv or cfg.sem_coordconv or cfg.proto_coordconv: self.addcoords = AddCoords() self.prediction_layers = nn.ModuleList() for idx, layer_idx in enumerate(self.selected_layers): # If we're sharing prediction module weights, have every module's parent be the first one parent = None if cfg.share_prediction_module and idx > 0: parent = self.prediction_layers[0] pred_in_ch = src_channels[ layer_idx] + 2 if cfg.ins_coordconv else src_channels[layer_idx] pred = PredictionModule( pred_in_ch, src_channels[layer_idx], aspect_ratios=cfg.backbone.pred_aspect_ratios[idx], scales=cfg.backbone.pred_scales[idx], parent=parent) self.prediction_layers.append(pred) # Extra parameters for the extra losses if cfg.use_class_existence_loss: # This comes from the smallest layer selected # Also note that cfg.num_classes includes background self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1) if cfg.cross_attention_fusion: self.CALayer = CAModule(src_channels[0], share_conv=False) if cfg.use_semantic_segmentation_loss: sem_in_ch = None if cfg.sem_src_fusion is True: sem_in_ch = self.fusion_dim elif cfg.sem_lincomb is True: sem_in_ch = src_channels[0] else: # normal semantic segmentation head sem_in_ch = src_channels[-1] if cfg.sem_coordconv: sem_in_ch += 2 # Panoptic FPN Fusion Version if cfg.sem_src_fusion is True: self.semantic_seg_conv = nn.Sequential( nn.Conv2d(sem_in_ch, cfg.stuff_num_classes, kernel_size=(1, 1))) elif cfg.sem_lincomb is True: self.semantic_seg_conv = nn.Sequential( nn.Conv2d(sem_in_ch, 256, kernel_size=3), # nn.BatchNorm2d(256), nn.GroupNorm(32, 256), nn.ReLU(True), nn.Conv2d(256, (cfg.stuff_num_classes) * cfg.mask_dim, kernel_size=1), nn.Tanh()) else: self.semantic_seg_conv = nn.Sequential( nn.Conv2d(sem_in_ch, cfg.stuff_num_classes, kernel_size=(1, 1))) # For use in evaluation self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=200, conf_thresh=0.05, nms_thresh=0.5)
def test_net(save_folder, net, cuda, dataset, transform, top_k, im_size=300, thresh=0.05): num_images = len(dataset) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(len(labelmap) + 1)] from layers import Detect num_images = len(dataset) parser = Detect(num_classes, 0, 200, 0.1, 0.45) softmax = nn.Softmax(dim=-1) # timers _t = {'im_detect': Timer(), 'misc': Timer()} output_dir = get_output_dir('ssd300_120000', set_type) det_file = os.path.join(output_dir, 'detections.pkl') for i in range(num_images): break im, gt, h, w = dataset.pull_item(i) x = im.unsqueeze(0) if args.cuda: x = x.cuda() _t['im_detect'].tic() # detections = net(x).data with torch.no_grad(): loc_pred, cls_pred, priors = net(x) detections = parser(loc_pred, softmax(cls_pred), priors.type(type(x))) detect_time = _t['im_detect'].toc(average=False) # skip j = 0, because it's the background class for j in range(1, detections.size(1)): dets = detections[0, j, :] mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, 5) if dets.shape[0] == 0: continue boxes = dets[:, 1:] boxes[:, 0] *= w boxes[:, 2] *= w boxes[:, 1] *= h boxes[:, 3] *= h scores = dets[:, 0].cpu().numpy() cls_dets = np.hstack( (boxes.cpu().numpy(), scores[:, np.newaxis])).astype(np.float32, copy=False) all_boxes[j][i] = cls_dets print('im_detect: {:d}/{:d} {:.3f}s'.format( i + 1, num_images, detect_time)) # # import pdb # # pdb.set_trace() # from data import VOC_CLASSES as labels # top_k=10 # im = cv2.imread(dataset._imgpath % dataset.ids[i]) # plt.figure(figsize=(10,10)) # colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist() # plt.imshow(im) # plot the image for matplotlib # currentAxis = plt.gca() # detections = cls_dets.copy() # # scale each detection back up to the image # scale = torch.Tensor(im.shape[1::-1]).repeat(2) # for i in range(detections.size(1)): # j = 0 # while detections[0,i,j,0] >= 0.6: # score = detections[0,i,j,0] # label_name = labels[i-1] # display_txt = '%s: %.2f'%(label_name, score) # pt = (detections[0,i,j,1:]*scale).cpu().numpy() # coords = (pt[0], pt[1]), pt[2]-pt[0]+1, pt[3]-pt[1]+1 # color = colors[i] # currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2)) # currentAxis.text(pt[0], pt[1], display_txt, bbox={'facecolor':color, 'alpha':0.5}) # j+=1 # print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, # num_images, detect_time)) #with open(det_file, 'wb') as f: # pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) with open(det_file, 'rb') as f: all_boxes = pickle.load(f) print('Evaluating detections') evaluate_detections(all_boxes, output_dir, dataset)
def __init__(self, phase, nms_thresh=0.3, nms_conf_thresh=0.01): super(SSD, self).__init__() self.phase = phase self.num_classes = 2 self.cfg = cfg resnet = torchvision.models.resnet152(pretrained=True) self.layer1 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1) self.layer2 = nn.Sequential(resnet.layer2) self.layer3 = nn.Sequential(resnet.layer3) self.layer4 = nn.Sequential(resnet.layer4) self.layer5 = nn.Sequential(*[ nn.Conv2d(2048, 512, kernel_size=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=2), nn.BatchNorm2d(512), nn.ReLU(inplace=True) ]) self.layer6 = nn.Sequential(*[ nn.Conv2d( 512, 128, kernel_size=1, ), nn.BatchNorm2d(128), nn.ReLU(inplace=True), nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2), nn.BatchNorm2d(256), nn.ReLU(inplace=True) ]) output_channels = [256, 512, 1024, 2048, 512, 256] # FPN fpn_in = output_channels self.latlayer3 = nn.Conv2d(fpn_in[3], fpn_in[2], kernel_size=1, stride=1, padding=0) self.latlayer2 = nn.Conv2d(fpn_in[2], fpn_in[1], kernel_size=1, stride=1, padding=0) self.latlayer1 = nn.Conv2d(fpn_in[1], fpn_in[0], kernel_size=1, stride=1, padding=0) self.smooth3 = nn.Conv2d(fpn_in[2], fpn_in[2], kernel_size=1, stride=1, padding=0) self.smooth2 = nn.Conv2d(fpn_in[1], fpn_in[1], kernel_size=1, stride=1, padding=0) self.smooth1 = nn.Conv2d(fpn_in[0], fpn_in[0], kernel_size=1, stride=1, padding=0) # FEM cpm_in = output_channels self.cpm3_3 = FEM(cpm_in[0]) self.cpm4_3 = FEM(cpm_in[1]) self.cpm5_3 = FEM(cpm_in[2]) self.cpm7 = FEM(cpm_in[3]) self.cpm6_2 = FEM(cpm_in[4]) self.cpm7_2 = FEM(cpm_in[5]) # head head = pa_multibox(output_channels) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) self.softmax = nn.Softmax(dim=-1) if self.phase != 'onnx_export': self.detect = Detect(self.num_classes, 0, cfg['num_thresh'], nms_conf_thresh, nms_thresh, cfg['variance']) self.last_image_size = None self.last_feature_maps = None if self.phase == 'test': self.test_transform = TestBaseTransform((104, 117, 123))
def __init__(self): super().__init__() # yolac++ cfg.backbone = # 'backbone': resnet101_dcn_inter3_backbone.copy({ # 'selected_layers': list(range(1, 4)), # # 'pred_aspect_ratios': [[[1, 1 / 2, 2]]] * 5, # 'pred_scales': [[i * 2 ** (j / 3.0) for j in range(3)] for i in [24, 48, 96, 192, 384]], # 'use_pixel_scales': True, # 'preapply_sqrt': False, # 'use_square_anchors': False, # }) self.backbone = construct_backbone(cfg.backbone) if cfg.freeze_bn: self.freeze_bn() # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early! if cfg.mask_type == mask_type.direct: # 16^2 = 256 ?? cfg.mask_dim = cfg.mask_size**2 elif cfg.mask_type == mask_type.lincomb: # mask_proto_use_grid ALWAYS false ?? if cfg.mask_proto_use_grid: self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file)) self.num_grids = self.grid.size(0) else: self.num_grids = 0 # yolact use 0 self.proto_src = cfg.mask_proto_src if self.proto_src is None: in_channels = 3 elif cfg.fpn is not None: in_channels = cfg.fpn.num_features else: in_channels = self.backbone.channels[self.proto_src] in_channels += self.num_grids # The include_last_relu=false here is because we might want to change it to another function # yolact ++ proto net # 'mask_proto_net': [(256, 3, {'padding': 1})] * 3 # + [(None, -2, {}), (256, 3, {'padding': 1})] # + [(32, 1, {})], self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) if cfg.mask_proto_bias: cfg.mask_dim += 1 ## end of mask type if else ______________________________________________] self.selected_layers = cfg.backbone.selected_layers src_channels = self.backbone.channels if cfg.use_maskiou: self.maskiou_net = FastMaskIoUNet() if cfg.fpn is not None: # Some hacky rewiring to accomodate the FPN self.fpn = FPN( # yolact++ 101 selected layers = 1,2,3 # 2nd 128x4 # 3rd 256x4 # 4th 512x4 [src_channels[i] for i in self.selected_layers] ) self.selected_layers = list( # selected_layers = 0,1,2,3,4 range( # yolact++ # 1 , 2 , 3 2 len(self.selected_layers) + cfg.fpn.num_downsample) ) # num features = 256 x 5 src_channels = [cfg.fpn.num_features] * len(self.selected_layers) self.prediction_layers = nn.ModuleList() cfg.num_heads = len(self.selected_layers) # --> 5 num_heads ?? # sooo... is this making 5 prediction modules ???? for idx, layer_idx in enumerate(self.selected_layers): # If we're sharing prediction module weights, have every module's parent be the first one parent = None # yolact++ share_prediction_module always True if cfg.share_prediction_module and idx > 0: parent = self.prediction_layers[0] pred = PredictionModule( # in_channels= src_channels[layer_idx], # out_channels= src_channels[layer_idx], # 'pred_scales': [[1]] * 6 # 'pred_aspect_ratios': [[[0.66685089, 1.7073535, 0.87508774, 1.16524493, # 0.49059086]]] * 6 aspect_ratios = cfg.backbone.pred_aspect_ratios[idx], scales = cfg.backbone.pred_scales[idx], parent = parent, index = idx) self.prediction_layers.append(pred) # Extra parameters for the extra losses # always False ?? if cfg.use_class_existence_loss: # This comes from the smallest layer selected # Also note that cfg.num_classes includes background self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1) # yolact always True ?? if cfg.use_semantic_segmentation_loss: self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes-1, kernel_size=1) # For use in evaluation self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=cfg.nms_top_k, #'nms_top_k': 200, conf_thresh=cfg.nms_conf_thresh, #'nms_conf_thresh': 0.05 nms_thresh=cfg.nms_thresh #'nms_thresh': 0.5 )
def __init__(self): super().__init__() self.backbone = construct_backbone( cfg.backbone) #resnet101_dcn_inter3_backbone if cfg.freeze_bn: self.freeze_bn() # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early! if cfg.mask_type == mask_type.direct: cfg.mask_dim = cfg.mask_size**2 elif cfg.mask_type == mask_type.lincomb: if cfg.mask_proto_use_grid: #False self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file)) self.num_grids = self.grid.size(0) else: self.num_grids = 0 #cw yolact_plus default:0 self.proto_src = cfg.mask_proto_src if self.proto_src is None: in_channels = 3 #cw 0 != None elif cfg.fpn is not None: in_channels = cfg.fpn.num_features #cw fpn.num_features -- default:'num_features': 256, else: in_channels = self.backbone.channels[self.proto_src] in_channels += self.num_grids #cw (256 + 0) #TODO#Fig. 3 PART # The include_last_relu=false here is because we might want to change it to another function # 'mask_proto_net': [(256, 3, {'padding': 1})] * 3 + [(None, -2, {}), (256, 3, {'padding': 1})] + [(32, 1, {})], self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) #256 , 6개의 conv및 bilinear #cw make_net에 넘기는 cfg.mask_proto_net을 in_channels이 통과하였을 때 마지막 output의 채널을 두번째 인자로 반환하므로. # final in_channels이 cfg.mask_dim이 된다고 보면 되시겠다. if cfg.mask_proto_bias: #False cfg.mask_dim += 1 # cfg.mask_dim = 32 self.selected_layers = cfg.backbone.selected_layers #cw yp -- [1, 2, 3] src_channels = self.backbone.channels #src_channels = [256, 512, 1024, 2048] #True #TODO# if cfg.use_maskiou: self.maskiou_net = FastMaskIoUNet() # 'fpn': fpn_base.copy({ # 'use_conv_downsample': True, # 'num_downsample': 2, # }), #TODO# if cfg.fpn is not None: # Some hacky rewiring to accomodate the FPN self.fpn = FPN([src_channels[i] for i in self.selected_layers ]) #[512, 1024, 2048] 넘김. self.selected_layers = list( range(len(self.selected_layers) + cfg.fpn.num_downsample)) #cw range(3 + 2) src_channels = [cfg.fpn.num_features] * len(self.selected_layers) # src_channels = [256, 256, 256, 256, 256] # selected_layers : [0, 1, 2, 3, 4] self.prediction_layers = nn.ModuleList() cfg.num_heads = len(self.selected_layers) #5 #Prediction Module에서 쓰임. for idx, layer_idx in enumerate(self.selected_layers): # If we're sharing prediction module weights, have every module's parent be the first one parent = None #True if cfg.share_prediction_module and idx > 0: parent = self.prediction_layers[0] #cw src_channels는 본래 resnet의 layer_idx의 채널수를 가지고 있음. # 즉, selected layer에서는 bbox를 prediction하는 것. # call하여 얻은 pred는 prediction_layers에 추가. (selected_layers 수만큼 생성) pred = PredictionModule( src_channels[layer_idx], src_channels[layer_idx], aspect_ratios=cfg.backbone.pred_aspect_ratios[idx], scales=cfg.backbone.pred_scales[idx], parent=parent, index=idx) self.prediction_layers.append(pred) #False # Extra parameters for the extra losses if cfg.use_class_existence_loss: # This comes from the smallest layer selected # Also note that cfg.num_classes includes background self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1) #True if cfg.use_semantic_segmentation_loss: self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes - 1, kernel_size=1) # For use in evaluation self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=cfg.nms_top_k, conf_thresh=cfg.nms_conf_thresh, nms_thresh=cfg.nms_thresh)
def __init__(self): super(Yolact, self).__init__() #################################################### # for mainly net # #################################################### self.backbone = resnet18(pretrained=True) self.fpn1 = nn.Sequential( nn.Conv2d(512, 256, 1, 1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 1, 1, 0), nn.BatchNorm2d(256), nn.ReLU(inplace=True), ) self.fpn2 = nn.Sequential( nn.Conv2d(1024, 256, 1, 1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 1, 1, 0), nn.BatchNorm2d(256), nn.ReLU(inplace=True), ) self.fpn3 = nn.Sequential( nn.Conv2d(2048, 256, 1, 1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 1, 1, 0), nn.BatchNorm2d(256), nn.ReLU(inplace=True), ) self.conv_b = nn.Sequential( nn.Conv2d(256, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), ) self.conv_c = nn.Sequential( nn.Conv2d(256, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), ) self.conv_m = nn.Sequential( nn.Conv2d(256, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), ) self.downsample_layers1 = nn.Sequential( nn.Conv2d(256, 256, 3, 2, 1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), ) self.downsample_layers2 = nn.Sequential( nn.Conv2d(256, 256, 3, 2, 1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), ) self.bbox_layer = nn.Conv2d(256, 12, 3, 1, 1) self.conf_layer = nn.Conv2d(256, 243, 3, 1, 1) self.mask_layer = nn.Conv2d(256, 96, 3, 1, 1) self.semantic_set_conv = nn.Conv2d(256, 80, 1, 1) ################################################## # for proto net # ################################################## self.proto_net1 = nn.Sequential( nn.Conv2d(256, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), ) self.proto_net2 = nn.Sequential(nn.Conv2d(256, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 32, 1, 1)) ######################################################### # forward process # ######################################################### self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=200, conf_thresh=0.05, nms_thresh=0.5)
def __init__(self): super().__init__() self.backbone = construct_backbone(cfg.backbone) if cfg.freeze_bn: self.freeze_bn() # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early! if cfg.mask_type == mask_type.direct: cfg.mask_dim = cfg.mask_size**2 elif cfg.mask_type == mask_type.lincomb: if cfg.mask_proto_use_grid: self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file)) self.num_grids = self.grid.size(0) else: self.num_grids = 0 self.proto_src = cfg.mask_proto_src if self.proto_src is None: in_channels = 3 elif cfg.fpn is not None: in_channels = cfg.fpn.num_features else: in_channels = self.backbone.channels[self.proto_src] in_channels += self.num_grids # The include_last_relu=false here is because we might want to change it to another function self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) if cfg.mask_proto_bias: cfg.mask_dim += 1 self.selected_layers = cfg.backbone.selected_layers src_channels = self.backbone.channels if cfg.fpn is not None: # Some hacky rewiring to accomodate the FPN self.fpn = FPN([src_channels[i] for i in self.selected_layers]) self.selected_layers = list( range(len(self.selected_layers) + cfg.fpn.num_downsample)) src_channels = [cfg.fpn.num_features] * len(self.selected_layers) self.prediction_layers = nn.ModuleList() for idx, layer_idx in enumerate(self.selected_layers): # If we're sharing prediction module weights, have every module's parent be the first one parent = None if cfg.share_prediction_module and idx > 0: parent = self.prediction_layers[0] pred = PredictionModule( src_channels[layer_idx], src_channels[layer_idx], aspect_ratios=cfg.backbone.pred_aspect_ratios[idx], scales=cfg.backbone.pred_scales[idx], parent=parent) self.prediction_layers.append(pred) # Extra parameters for the extra losses if cfg.use_class_existence_loss: # This comes from the smallest layer selected # Also note that cfg.num_classes includes background self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1) if cfg.use_semantic_segmentation_loss: self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes - 1, kernel_size=1) # For use in evaluation self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=200, conf_thresh=0.05, nms_thresh=0.5)
def __init__(self): #super:;call the based-class' init func super().__init__() print('net initial...\n') self.backbone = construct_backbone(cfg.backbone) if cfg.freeze_bn: self.freeze_bn() ##get:: self.proto_net, cfg.mask_dim # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early! if cfg.mask_type == mask_type.direct: cfg.mask_dim = cfg.mask_size**2 elif cfg.mask_type == mask_type.lincomb: if cfg.mask_proto_use_grid: #cfg.mask_proto_grid_file : data/grid.npy , npy is a numpy data file self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file)) self.num_grids = self.grid.size(0) else: self.num_grids = 0 #0 self.proto_src = cfg.mask_proto_src if self.proto_src is None: in_channels = 3 #cfg.fpn is obj elif cfg.fpn is not None: in_channels = cfg.fpn.num_features else: in_channels = self.backbone.channels[self.proto_src] in_channels += self.num_grids # The include_last_relu=false here is because we might want to change it to another function self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) if cfg.mask_proto_bias: cfg.mask_dim += 1 # self.proto_net # Sequential( # (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) # (1): ReLU(inplace) # (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) # (3): ReLU(inplace) # (4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) # (5): ReLU(inplace) # (6): InterpolateModule() # (7): ReLU(inplace) # (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) # (9): ReLU(inplace) # (10): Conv2d(256, 32, kernel_size=(1, 1), stride=(1, 1)) #) # # self.fpn # FPN( # (lat_layers): _ConstModuleList( # (0): WeakScriptModuleProxy() # (1): WeakScriptModuleProxy() # (2): WeakScriptModuleProxy() # ) # (pred_layers): _ConstModuleList( # (0): WeakScriptModuleProxy() # (1): WeakScriptModuleProxy() # (2): WeakScriptModuleProxy() # ) # (downsample_layers): _ConstModuleList( # (0): WeakScriptModuleProxy() # (1): WeakScriptModuleProxy() # ) # ) self.selected_layers = cfg.backbone.selected_layers src_channels = self.backbone.channels if cfg.fpn is not None: # Some hacky rewiring to accomodate the FPN self.fpn = FPN([src_channels[i] for i in self.selected_layers]) self.selected_layers = list(range(len(self.selected_layers) + cfg.fpn.num_downsample)) src_channels = [cfg.fpn.num_features] * len(self.selected_layers) self.prediction_layers = nn.ModuleList() for idx, layer_idx in enumerate(self.selected_layers): # If we're sharing prediction module weights, have every module's parent be the first one parent = None if cfg.share_prediction_module and idx > 0: parent = self.prediction_layers[0] pred = PredictionModule(src_channels[layer_idx], src_channels[layer_idx], aspect_ratios = cfg.backbone.pred_aspect_ratios[idx], scales = cfg.backbone.pred_scales[idx], parent = parent) self.prediction_layers.append(pred) #False # Extra parameters for the extra losses if cfg.use_class_existence_loss: # This comes from the smallest layer selected # Also note that cfg.num_classes includes background self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1) if cfg.use_semantic_segmentation_loss: self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes-1, kernel_size=1) # For use in evaluation self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=200, conf_thresh=0.2, nms_thresh=0.5) self.tmp = 1