def __init__(self, config, phase, base, extras, head, num_classes, top_k=200): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes # TODO: implement __call__ in PriorBox self.priorbox = PriorBox(config) self.priors = Variable(self.priorbox.forward(), volatile=True) # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax() self.detect = Detect(num_classes, 0, top_k, 0.01, 0.45, variance=config['variance'])
def im_detect(net, im, target_size): try: device = net.arm_conf[0].weight.device except: device = net.odm_conf[0].weight.device h, w, _ = im.shape scale = torch.Tensor([w, h, w, h]) scale = scale.to(device) im_orig = im.astype(np.float32, copy=True) im = cv2.resize(im_orig, (target_size, target_size), interpolation=cv2.INTER_LINEAR) x = (im - MEANS).astype(np.float32) x = x[:, :, (2, 1, 0)] # to rgb x = x.transpose(2, 0, 1) x = torch.from_numpy(x).unsqueeze(0) x = x.to(device) if args.wo_refined_anchor: adm_loc, adm_conf, feat_sizes = net(x) else: arm_loc, arm_conf, adm_loc, adm_conf, feat_sizes = net(x) priorbox = PriorBox(net.cfg, feat_sizes, (target_size, target_size), phase='test') priors = priorbox.forward() priors = priors.to(device) if args.wo_refined_anchor: det = detect.forward(adm_loc, adm_conf, priors, scale) else: det = detect.forward(arm_loc, arm_conf, adm_loc, adm_conf, priors, scale) return det
def im_detect_ratio(net, im, target_size1, target_size2): device = net.arm_conf[0].weight.device h, w, _ = im.shape scale = torch.Tensor([w, h, w, h]) scale = scale.to(device) im_orig = im.astype(np.float32, copy=True) if im_orig.shape[0] < im_orig.shape[1]: target_size1, target_size2 = target_size2, target_size1 im = cv2.resize(im_orig, None, None, fx=float(target_size2) / float(w), fy=float(target_size1) / float(h), interpolation=cv2.INTER_LINEAR) x = (im - MEANS).astype(np.float32) x = x[:, :, (2, 1, 0)] # to rgb x = x.transpose(2, 0, 1) x = torch.from_numpy(x).unsqueeze(0) x = x.to(device) arm_loc, arm_conf, adm_loc, adm_conf, feat_sizes = net(x) priorbox = PriorBox(net.cfg, feat_sizes, (target_size1, target_size2), phase='test') priors = priorbox.forward() priors = priors.to(device) det = detect.forward(arm_loc, arm_conf, adm_loc, adm_conf, priors, scale) return det
def forward(self, x): img_size = x.size()[2:] source = [] x = self.conv1(x) x = self.bn1(x) x = F.relu(torch.cat((F.relu(x), F.relu(-x)), 1)) x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) x = self.conv2(x) x = self.bn2(x) x = F.relu(torch.cat((F.relu(x), F.relu(-x)), 1)) x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) x = self.inception1(x) x = self.inception2(x) x = self.inception3(x) source.append(x) x = self.conv3_1(x) x = self.conv3_2(x) source.append(x) x = self.conv4_1(x) x = self.conv4_2(x) source.append(x) feature_maps = [] for feat in source: feature_maps.append([feat.size(2), feat.size(3)]) self.priors = Variable(PriorBox(img_size, feature_maps, cfg).forward()) loc_preds, conf_preds = self.multilbox(source) if self.phase == 'test': output = self.test_det(loc_preds, self.softmax(conf_preds), self.priors) else: output = (loc_preds, conf_preds, self.priors) return output
def __init__(self, phase, size, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = (coco, voc)[num_classes == 21] self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = size # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
def __init__(self, num_classes, phase, pretrain=False, finetune=None): super(SSD300, self).__init__() self.num_classes = num_classes self.phase = phase self.base_net = self._base_net() self.extra_net = self._extra_net() self.loc_pred, self.cls_pred = self._predict_net() self.L2Norm = L2Norm(512, 20) self.priorbox = PriorBox(v2) self.priors = Variable(self.priorbox.forward(), volatile=True) if phase == 'test': self.softmax = nn.Softmax() self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) self._init_weight() if pretrain: self._load_weight() if finetune is not None: self._finetune(finetune)
def __init__(self, phase, size, base, extras, head, num_classes): super(TBPP, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = { 'num_classes': 2, 'lr_steps': (80000, 100000, 120000), 'max_iter': 120000, 'feature_maps': [64, 32, 16, 8, 4, 2, 1], 'min_dim': 512, 'steps': [8, 16, 32, 64, 128, 256, 512], 'min_sizes': [20, 51, 133, 215, 296, 378, 460], 'max_sizes': [51, 133, 215, 296, 378, 460, 542], 'aspect_ratios': [[2, 3], [2, 3, 5], [2, 3, 5], [2, 3, 5], [2, 3, 5], [2, 3], [2, 3]], # TODO 'variance': [0.1, 0.2], 'clip': True, 'name': 'MINE' } self.priorbox = PriorBox( self.cfg) # calculate the size of prior boxes, i.e. defaults boxes self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = size # TBPP network self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
def __init__(self, num_classes): super(DSOD_64_16_1x1, self).__init__() self.num_classes = num_classes self.extractor = DenseNet_64_16_DSSD_s_Pred_D() self.loc_layers = nn.ModuleList() self.cls_layers = nn.ModuleList() self.cfg = cfg_320_64_16 self.priorbox = PriorBox(self.cfg) self.priors = self.priorbox.forward() # in_channels = (768, 768, 768, 256, 256, 256) #pred C in_channels = (256, 256, 256, 256, 256, 256) # pred D num_anchors = (4, 6, 6, 6, 4, 4) for inC, num_anchor in zip(in_channels, num_anchors): # self.loc_layers += [nn.Conv2d(inC, num_anchor*4, kernel_size=3, padding=1)] # self.cls_layers += [nn.Conv2d(inC, num_anchor* num_classes, kernel_size=3, padding=1) # ] self.loc_layers += [ nn.Sequential( nn.Conv2d(inC, num_anchor * 4, kernel_size=1, padding=0, bias=False), nn.BatchNorm2d(num_anchor * 4)) ] self.cls_layers += [ nn.Sequential( nn.Conv2d(inC, num_anchor * num_classes, kernel_size=1, padding=0, bias=False), nn.BatchNorm2d(num_anchor * num_classes)) ] self.normalize = nn.ModuleList( [L2Norm(chan, 20) for chan in in_channels]) self.reset_parameters()
def __init__(self, num_classes): super(DSOD_64_16_GN, self).__init__() self.num_classes = num_classes self.extractor = DSSD_s_GN() self.loc_layers = nn.ModuleList() self.cls_layers = nn.ModuleList() self.cfg = cfg_320_64_16 self.priorbox = PriorBox(self.cfg) self.priors = self.priorbox.forward() in_channels = channel_dict['DSSD'] num_anchors = (4, 6, 6, 6, 4, 4) for inC, num_anchor in zip(in_channels, num_anchors): # self.loc_layers += [nn.Conv2d(inC, num_anchor*4, kernel_size=3, padding=1)] # self.cls_layers += [nn.Conv2d(inC, num_anchor* num_classes, kernel_size=3, padding=1) # ] self.loc_layers += [ nn.Sequential( nn.Conv2d(inC, num_anchor * 4, kernel_size=3, padding=1, bias=False), nn.GroupNorm(4, num_anchor * 4)) ] self.cls_layers += [ nn.Sequential( nn.Conv2d(inC, num_anchor * num_classes, kernel_size=3, padding=1, bias=False), nn.GroupNorm(num_classes, num_anchor * num_classes)) ] self.normalize = nn.ModuleList( [L2Norm(chan, 20) for chan in in_channels]) self.reset_parameters()
def train(): if args.visdom: import visdom viz = visdom.Visdom() print('Loading the dataset...') if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCOroot): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCOroot cfg = coco_refinedet[args.input_size] train_sets = [('train2017')] # train_sets = [('train2017', 'val2017')] dataset = COCODetection(COCOroot, train_sets, SSDAugmentation(cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': '''if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root')''' cfg = voc_refinedet[args.input_size] dataset = VOCDetection(root=VOC_ROOT, transform=SSDAugmentation( cfg['min_dim'], MEANS)) print('Training RefineDet on:', dataset.name) print('Using the specified args:') print(args) refinedet_net = build_refinedet('train', int(args.input_size), cfg['num_classes'], backbone_dict) net = refinedet_net print(net) device = torch.device('cuda:0' if args.cuda else 'cpu') if args.ngpu > 1 and args.cuda: net = torch.nn.DataParallel(refinedet_net, device_ids=list(range(args.ngpu))) cudnn.benchmark = True net = net.to(device) if args.resume: print('Resuming training, loading {}...'.format(args.resume)) state_dict = torch.load(args.resume) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v refinedet_net.load_state_dict(new_state_dict) else: print('Initializing weights...') refinedet_net.init_weights(pretrained=pretrained) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) arm_criterion = RefineDetMultiBoxLoss(2, 0.5, True, 0, True, negpos_ratio, 0.5, False, args.cuda) odm_criterion = RefineDetMultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, negpos_ratio, 0.5, False, args.cuda, use_ARM=True) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() priors = priors.to(device) net.train() # loss counters arm_loc_loss = 0 arm_conf_loss = 0 odm_loc_loss = 0 odm_conf_loss = 0 epoch = 0 + args.resume_epoch epoch_size = math.ceil(len(dataset) / args.batch_size) max_iter = args.max_epoch * epoch_size stepvalues = (args.max_epoch * 2 // 3 * epoch_size, args.max_epoch * 8 // 9 * epoch_size, args.max_epoch * epoch_size) if args.dataset == 'VOC': stepvalues = (args.max_epoch * 2 // 3 * epoch_size, args.max_epoch * 5 // 6 * epoch_size, args.max_epoch * epoch_size) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size for step in stepvalues: if step < start_iter: step_index += 1 else: start_iter = 0 if args.visdom: vis_title = 'RefineDet.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot(viz, 'Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot(viz, 'Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: if args.visdom and iteration != 0: update_vis_plot(viz, epoch, arm_loc_loss, arm_conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters arm_loc_loss = 0 arm_conf_loss = 0 odm_loc_loss = 0 odm_conf_loss = 0 # create batch iterator batch_iterator = iter(data_loader) if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > (args.max_epoch * 2 // 3)): torch.save( net.state_dict(), args.save_folder + 'RefineDet' + args.input_size + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth') epoch += 1 t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) images = images.to(device) targets = [ann.to(device) for ann in targets] # for an in targets: # for instance in an: # for cor in instance[:-1]: # if cor < 0 or cor > 1: # raise StopIteration # forward out = net(images) # backprop optimizer.zero_grad() arm_loss_l, arm_loss_c = arm_criterion(out, priors, targets) odm_loss_l, odm_loss_c = odm_criterion(out, priors, targets) arm_loss = arm_loss_l + arm_loss_c odm_loss = odm_loss_l + odm_loss_c loss = arm_loss + odm_loss loss.backward() optimizer.step() arm_loc_loss += arm_loss_l.item() arm_conf_loss += arm_loss_c.item() odm_loc_loss += odm_loss_l.item() odm_conf_loss += odm_loss_c.item() t1 = time.time() batch_time = t1 - t0 eta = int(batch_time * (max_iter - iteration)) print('Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || ARM_L Loss: {:.4f} ARM_C Loss: {:.4f} ODM_L Loss: {:.4f} ODM_C Loss: {:.4f} loss: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}'.\ format(epoch, args.max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, arm_loss_l.item(), arm_loss_c.item(), odm_loss_l.item(), odm_loss_c.item(), loss.item(), lr, batch_time, str(datetime.timedelta(seconds=eta)))) if args.visdom: update_vis_plot(viz, iteration, arm_loss_l.item(), arm_loss_c.item(), iter_plot, epoch_plot, 'append') torch.save( refinedet_net.state_dict(), args.save_folder + '/RefineDet{}_{}_final.pth'.format(args.input_size, args.dataset))
def main(): global args global minmum_loss args.gpu = 0 args.world_size = 1 if args.distributed: args.gpu = args.local_rank % torch.cuda.device_count() torch.cuda.set_device(args.gpu) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.total_batch_size = args.world_size * args.batch_size ## DATA loading code if args.dataset == 'COCO': train_sets = [('2014', 'train'), ('2014', 'valminusminival')] cfg = (COCO_300, COCO_512)[args.size == '512'] elif args.dataset == 'VOC': train_sets = [('2007', 'trainval'), ('2012', 'trainval')] cfg = (VOC_300, VOC_512)[args.size == '512'] # other impoort parmeters img_dim = (300, 512)[args.size == '512'] rgb_means = ((104, 117, 123), (103.94, 116.78, 123.68))[args.version == 'RFB_mobile'] p = (0.6, 0.2)[args.version == 'RFB_mobile'] num_classes = (21, 81)[args.dataset == 'COCO'] if args.dataset == 'COCO': dataset = COCODetection(root=cfg['coco_root'], image_sets=train_sets, preproc=preproc(img_dim, rgb_means, p)) elif args.dataset == 'VOC': dataset = VOCDetection(root=cfg['voc_root'], image_sets=train_sets, preproc=preproc(img_dim, rgb_means, p), target_transform=AnnotationTransform()) print('Training SSD on:', dataset.name) print('Loading the dataset...') train_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) print("Build RFB network") if args.version == 'RFB_vgg': model = RFB_Net_vgg('train', img_dim, num_classes) elif args.version == 'RFB_E_vgg': model = RFB_Net_E_vgg('train', img_dim, num_classes) elif args.version == 'RFB_mobile': model = RFB_Net_mobile('train', img_dim, num_classes) else: print('Unkown version!') if args.pretrained: base_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') model.base.load_state_dict(base_weights) model = model.cuda() # optimizer and loss function optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False) ## get the priorbox of ssd priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] minmum_loss = checkpoint['minmum_loss'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: print('Initializing weights...') # initialize newly added layers' weights with xavier method model.extras.apply(weights_init) model.loc.apply(weights_init) model.conf.apply(weights_init) model.Norm.apply(weights_init) if args.version == 'RFB_E_vgg': model.reduce.apply(weights_init) model.up_reduce.apply(weights_init) print('Using the specified args:') print(args) for epoch in range(args.start_epoch, args.epochs): # train for one epoch loss = train(train_loader, model, priors, criterion, optimizer, epoch) # remember best prec@1 and save checkpoint if args.local_rank == 0: is_best = loss < minmum_loss minmum_loss = min(loss, minmum_loss) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': minmum_loss, 'optimizer': optimizer.state_dict(), }, is_best, epoch)
def SSD300(input_shape, num_classes=21): """SSD300 architecture. # Arguments input_shape: Shape of the input image, expected to be either (300, 300, 3) or (3, 300, 300)(not tested). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ print('begin building networks') kernel_size = (3, 3) net = {} # Block 1 input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) net['input'] = input_tensor net['conv1_1'] = Conv2D(64, kernel_size, activation='relu', padding='same', name='conv1_1')(net['input']) net['conv1_2'] = Conv2D(64, kernel_size, activation='relu', padding='same', name='conv1_2')(net['conv1_1']) net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool1')(net['conv1_2']) # Block 2 net['conv2_1'] = Conv2D(128, kernel_size, activation='relu', padding='same', name='conv2_1')(net['pool1']) net['conv2_2'] = Conv2D(128, kernel_size, activation='relu', padding='same', name='conv2_2')(net['conv2_1']) net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool2')(net['conv2_2']) # Block 3 net['conv3_1'] = Conv2D(256, kernel_size, activation='relu', padding='same', name='conv3_1')(net['pool2']) net['conv3_2'] = Conv2D(256, kernel_size, activation='relu', padding='same', name='conv3_2')(net['conv3_1']) net['conv3_3'] = Conv2D(256, kernel_size, activation='relu', padding='same', name='conv3_3')(net['conv3_2']) net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool3')(net['conv3_3']) # Block 4 net['conv4_1'] = Conv2D(512, kernel_size, activation='relu', padding='same', name='conv4_1')(net['pool3']) net['conv4_2'] = Conv2D(512, kernel_size, activation='relu', padding='same', name='conv4_2')(net['conv4_1']) net['conv4_3'] = Conv2D(512, kernel_size, activation='relu', padding='same', name='conv4_3')(net['conv4_2']) net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool4')(net['conv4_3']) # Block 5 net['conv5_1'] = Conv2D(512, kernel_size, activation='relu', padding='same', name='conv5_1')(net['pool4']) net['conv5_2'] = Conv2D(512, kernel_size, activation='relu', padding='same', name='conv5_2')(net['conv5_1']) net['conv5_3'] = Conv2D(512, kernel_size, activation='relu', padding='same', name='conv5_3')(net['conv5_2']) net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), padding='same', name='pool5')(net['conv5_3']) # FC6 net['fc6'] = Conv2D(1024, kernel_size, dilation_rate=(6, 6), activation='relu', padding='same', name='fc6')(net['pool5']) # x = Dropout(0.5, name='drop6')(x) # FC7 net['fc7'] = Conv2D(1024, (1, 1), activation='relu', padding='same', name='fc7')(net['fc6']) # x = Dropout(0.5, name='drop7')(x) # Block 6 net['conv6_1'] = Conv2D(256, (1, 1), activation='relu', padding='same', name='conv6_1')(net['fc7']) net['conv6_2'] = Conv2D(512, kernel_size, strides=(2, 2), activation='relu', padding='same', name='conv6_2')(net['conv6_1']) # Block 7 net['conv7_1'] = Conv2D(128, (1, 1), activation='relu', padding='same', name='conv7_1')(net['conv6_2']) net['conv7_2'] = ZeroPadding2D()(net['conv7_1']) net['conv7_2'] = Conv2D(256, kernel_size, strides=(2, 2), activation='relu', padding='valid', name='conv7_2')(net['conv7_2']) # Block 8 net['conv8_1'] = Conv2D(128, (1, 1), activation='relu', padding='same', name='conv8_1')(net['conv7_2']) net['conv8_2'] = Conv2D(256, kernel_size, strides=(2, 2), activation='relu', padding='same', name='conv8_2')(net['conv8_1']) # Last Pool net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2']) print('base network built') # Prediction from conv4_3 net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3']) num_priors = 3 x = Conv2D(num_priors * 4, kernel_size, padding='same', name='conv4_3_norm_mbox_loc')(net['conv4_3_norm']) net['conv4_3_norm_mbox_loc'] = x flatten = Flatten(name='conv4_3_norm_mbox_loc_flat') net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc']) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, kernel_size, padding='same', name=name)(net['conv4_3_norm']) net['conv4_3_norm_mbox_conf'] = x flatten = Flatten(name='conv4_3_norm_mbox_conf_flat') net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf']) priorbox = PriorBox(img_size, 30.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox') net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm']) print('conv4_3_norm_mbox_priorbox built') # Prediction from fc7 num_priors = 6 net['fc7_mbox_loc'] = Conv2D(num_priors * 4, kernel_size, padding='same', name='fc7_mbox_loc')(net['fc7']) flatten = Flatten(name='fc7_mbox_loc_flat') net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc']) name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) net['fc7_mbox_conf'] = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(net['fc7']) flatten = Flatten(name='fc7_mbox_conf_flat') net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf']) priorbox = PriorBox(img_size, 60.0, max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') net['fc7_mbox_priorbox'] = priorbox(net['fc7']) print('fc7_mbox_priorbox built') # Prediction from conv6_2 num_priors = 6 x = Conv2D(num_priors * 4, kernel_size, padding='same', name='conv6_2_mbox_loc')(net['conv6_2']) net['conv6_2_mbox_loc'] = x flatten = Flatten(name='conv6_2_mbox_loc_flat') net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc']) name = 'conv6_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, kernel_size, padding='same', name=name)(net['conv6_2']) net['conv6_2_mbox_conf'] = x flatten = Flatten(name='conv6_2_mbox_conf_flat') net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf']) priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox') net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2']) print('conv6_2_mbox_priorbox built') # Prediction from conv7_2 num_priors = 6 x = Conv2D(num_priors * 4, kernel_size, padding='same', name='conv7_2_mbox_loc')(net['conv7_2']) net['conv7_2_mbox_loc'] = x flatten = Flatten(name='conv7_2_mbox_loc_flat') net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc']) name = 'conv7_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, kernel_size, padding='same', name=name)(net['conv7_2']) net['conv7_2_mbox_conf'] = x flatten = Flatten(name='conv7_2_mbox_conf_flat') net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf']) priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox') net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2']) print('conv7_2_mbox_priorbox built') # Prediction from conv8_2 num_priors = 6 x = Conv2D(num_priors * 4, kernel_size, padding='same', name='conv8_2_mbox_loc')(net['conv8_2']) net['conv8_2_mbox_loc'] = x flatten = Flatten(name='conv8_2_mbox_loc_flat') net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc']) name = 'conv8_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, kernel_size, padding='same', name=name)(net['conv8_2']) net['conv8_2_mbox_conf'] = x flatten = Flatten(name='conv8_2_mbox_conf_flat') net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf']) priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox') net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2']) print('conv8_2_mbox_priorbox built') # Prediction from pool6 num_priors = 6 x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6']) net['pool6_mbox_loc_flat'] = x name = 'pool6_mbox_conf_flat' if num_classes != 21: name += '_{}'.format(num_classes) x = Dense(num_priors * num_classes, name=name)(net['pool6']) net['pool6_mbox_conf_flat'] = x priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox') if K.image_dim_ordering() == 'tf': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) net['pool6_reshaped'] = Reshape(target_shape, name='pool6_reshaped')(net['pool6']) net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped']) print('pool6_mbox_priorbox built') # Gather all predictions net['mbox_loc'] = concatenate([ net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat'] ], axis=1, name='mbox_loc') net['mbox_conf'] = concatenate([ net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat'] ], axis=1, name='mbox_conf') net['mbox_priorbox'] = concatenate([ net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox'] ], axis=1, name='mbox_priorbox') print('gathering all prediction layers built') if hasattr(net['mbox_loc'], '_keras_shape'): # divide 4 for [xmin, ymin, xmax, ymax] num_boxes = net['mbox_loc']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc']) net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) net['predictions'] = concatenate( [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']], axis=2, name='predictions') print('prediction layers built') model = Model(net['input'], net['predictions']) return model
def mini_SSD(num_classes=21): base_kernel_size = 4 + num_classes aspect_ratios = (1, 2, 1 / 2) num_aspect_ratios = len(aspect_ratios) base_model = VGG16(weights='imagenet') base_model.layers[0].name = 'input_1' input_tensor = base_model.input #input_tensor = base_model #input_tensor.name = 'image_array' for layer in base_model.layers: layer.trainable = False body = base_model.get_layer('block4_pool').output body = Convolution2D((base_kernel_size * num_aspect_ratios), 3, 3, border_mode='same')(body) branch_1 = PriorBox(aspect_ratios)(body) body = Convolution2D(32, 3, 3, border_mode='same')(branch_1) body = Activation('relu')(body) body = MaxPooling2D((2, 2))(body) body = Dropout(.5)(body) body = Convolution2D((base_kernel_size * num_aspect_ratios), 3, 3, border_mode='same')(body) branch_2 = PriorBox(aspect_ratios)(body) body = Convolution2D(64, 3, 3, border_mode='same')(branch_2) body = Activation('relu')(body) body = MaxPooling2D((3, 3))(body) body = Dropout(.5)(body) body = Convolution2D((base_kernel_size * num_aspect_ratios), 3, 3, border_mode='same')(body) branch_3 = PriorBox(aspect_ratios)(body) branch_1 = Reshape((-1, 4 + num_classes))(branch_1) local_1 = Lambda(lambda x: x[:, :, :4])(branch_1) class_1 = Lambda(lambda x: K.softmax(x[:, :, 4:]))(branch_1) branch_2 = Reshape((-1, 4 + num_classes))(branch_2) local_2 = Lambda(lambda x: x[:, :, :4])(branch_2) class_2 = Lambda(lambda x: K.softmax(x[:, :, 4:]))(branch_2) branch_3 = Reshape((-1, 4 + num_classes))(branch_3) local_3 = Lambda(lambda x: x[:, :, :4])(branch_3) class_3 = Lambda(lambda x: K.softmax(x[:, :, 4:]))(branch_3) classification_tensor = merge([class_1, class_2, class_3], mode='concat', concat_axis=1, name='classes') localization_tensor = merge([local_1, local_2, local_3], mode='concat', concat_axis=1, name='encoded_box') output_tensor = merge([localization_tensor, classification_tensor], mode='concat', concat_axis=-1, name='predictions') model = Model(input_tensor, output_tensor) return model
def simple_SSD(input_shape, num_classes, min_size, num_priors, max_size, aspect_ratios, variances): input_tensor = Input(shape=input_shape) body = Convolution2D(16, 7, 7)(input_tensor) body = Activation('relu')(body) body = MaxPooling2D(2, 2, border_mode='valid')(body) body = Convolution2D(32, 5, 5)(body) body = Activation('relu')(body) branch_1 = MaxPooling2D(2, 2, border_mode='valid')(body) body = Convolution2D(64, 3, 3)(branch_1) body = Activation('relu')(body) branch_2 = MaxPooling2D(2, 2, border_mode='valid')(body) # first branch norm_1 = Normalize(20)(branch_1) localization_1 = Convolution2D(num_priors * 4, 3, 3, border_mode='same')(norm_1) localization_1 = Flatten(localization_1) classification_1 = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same')(norm_1) classification_1 = Flatten()(classification_1) prior_boxes_1 = PriorBox(input_shape[0:2], min_size, max_size, aspect_ratios) # second branch norm_2 = Normalize(20)(branch_2) localization_2 = Convolution2D(num_priors * 4, 3, 3, border_mode='same')(norm_2) localization_2 = Flatten(localization_2) classification_2 = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same')(norm_2) classification_2 = Flatten()(classification_2) prior_boxes_2 = PriorBox(input_shape[0:2], min_size, max_size, aspect_ratios) localization_head = Merge([localization_1, localization_2], mode='concat', concat_axis=1) classification_head = Merge([classification_1, classification_2], mode='concat', concat_axis=1) prior_boxes_head = Merge([prior_boxes_1, prior_boxes_2], mode='concat', concat_axis=1) if hasattr(localization_head, '_keras_shape'): num_boxes = localization_head._keras_shape[-1] // 4 elif hasattr(localization_head, 'int_shape'): num_boxes = K.int_shape(localization_head)[-1] // 4 localization_head = Reshape((num_boxes, 4))(localization_head) classification_head = Reshape( (num_boxes, num_classes))(classification_head) classification_head = Activation('softmax')(classification_head) predictions = Merge(localization_head, classification_head, prior_boxes_head, mode='concat', concat_axis=2) model = Model(input_tensor, predictions) return model
def main(): global args global minmum_loss args.gpu = 0 args.world_size = 1 if args.distributed: args.gpu = args.local_rank % torch.cuda.device_count() torch.cuda.set_device(args.gpu) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.total_batch_size = args.world_size * args.batch_size # build dsfd network print("Building net...") model = RetinaFace(cfg=cfg) print("Printing net...") # for multi gpu if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model) model = model.cuda() # optimizer and loss function optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.35, True, 0, True, 7, 0.35, False) ## dataset print("loading dataset") train_dataset = WiderFaceDetection( args.training_dataset, preproc(cfg['image_size'], cfg['rgb_mean'])) train_loader = data.DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] minmum_loss = checkpoint['minmum_loss'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) print('Using the specified args:') print(args) # load PriorBox print("Load priorbox") with torch.no_grad(): priorbox = PriorBox(cfg=cfg, image_size=(cfg['image_size'], cfg['image_size'])) priors = priorbox.forward() priors = priors.cuda() print("start traing") for epoch in range(args.start_epoch, args.epochs): # train for one epoch train_loss = train(train_loader, model, priors, criterion, optimizer, epoch) if args.local_rank == 0: is_best = train_loss < minmum_loss minmum_loss = min(train_loss, minmum_loss) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': minmum_loss, 'optimizer': optimizer.state_dict(), }, is_best, epoch)
def main(): global args global minmum_loss args.gpu = 0 args.world_size = 1 if args.distributed: args.gpu = args.local_rank % torch.cuda.device_count() torch.cuda.set_device(args.gpu) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.total_batch_size = args.world_size * args.batch_size # build dsfd network print("Building net...") pyramidbox = build_net('train', cfg.NUM_CLASSES) model = pyramidbox if args.pretrained: vgg_weights = torch.load(args.save_folder + args.basenet) print('Load base network....') model.vgg.load_state_dict(vgg_weights) # for multi gpu if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model) model = model.cuda() # optimizer and loss function optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion1 = MultiBoxLoss(cfg, True) criterion2 = MultiBoxLoss(cfg, True, use_head_loss=True) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] minmum_loss = checkpoint['minmum_loss'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: print('Initializing weights...') pyramidbox.extras.apply(pyramidbox.weights_init) pyramidbox.lfpn_topdown.apply(pyramidbox.weights_init) pyramidbox.lfpn_later.apply(pyramidbox.weights_init) pyramidbox.cpm.apply(pyramidbox.weights_init) pyramidbox.loc_layers.apply(pyramidbox.weights_init) pyramidbox.conf_layers.apply(pyramidbox.weights_init) print('Loading wider dataset...') train_dataset = WIDERDetection(cfg.FACE.TRAIN_FILE, mode='train') val_dataset = WIDERDetection(cfg.FACE.VAL_FILE, mode='val') train_loader = data.DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) val_batchsize = args.batch_size // 2 val_loader = data.DataLoader(val_dataset, val_batchsize, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) print('Using the specified args:') print(args) # load PriorBox with torch.no_grad(): priorbox = PriorBox(input_size=[640, 640], cfg=cfg) priors = priorbox.forward() priors = priors.cuda() for epoch in range(args.start_epoch, args.epochs): # train for one epoch end = time.time() train_loss = train(train_loader, model, priors, criterion1, criterion2, optimizer, epoch) val_loss = val(val_loader, model, priors, criterion1, criterion2) if args.local_rank == 0: is_best = val_loss < minmum_loss minmum_loss = min(val_loss, minmum_loss) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': minmum_loss, 'optimizer': optimizer.state_dict(), }, is_best, epoch) epoch_time = time.time() - end print('Epoch %s time cost %f' % (epoch, epoch_time))
cv2.rectangle(img, (p1[0] - 2 // 2, p1[1] - 2 - baseline), (p1[0] + text_size[0], p1[1] + text_size[1]), [255, 0, 0], -1) cv2.putText(img, conf, (p1[0], p1[1] + baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, 8) t2 = time.time() print('detect:{} timer:{}'.format(img_path, t2 - t1)) cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), img) if __name__ == '__main__': # load PriorBox with torch.no_grad(): priorbox = PriorBox(input_size=[640, 640], cfg=cfg) priors = priorbox.forward() priors = priors.cuda() net = build_net('test', cfg.NUM_CLASSES) net.load_state_dict(torch.load(args.model)) net.eval() if use_cuda: net.cuda() cudnn.benckmark = True img_path = './img' img_list = [ os.path.join(img_path, x) for x in os.listdir(img_path) if x.endswith('jpg')
def mini_SSD300(input_shape=(300,300,3), num_classes=21): """SSD300 architecture. # Arguments input_shape: Shape of the input image, expected to be either (300, 300, 3) or (3, 300, 300)(not tested). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ net = {} # Block 1 input_tensor = input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) net['input'] = input_tensor net['conv1_1'] = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='conv1_1')(net['input']) net['conv1_2'] = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='conv1_2')(net['conv1_1']) net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool1')(net['conv1_2']) # Block 2 net['conv2_1'] = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv2_1')(net['pool1']) net['conv2_2'] = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv2_2')(net['conv2_1']) net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool2')(net['conv2_2']) # Block 3 net['conv3_1'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_1')(net['pool2']) net['conv3_2'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_2')(net['conv3_1']) net['conv3_3'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_3')(net['conv3_2']) net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool3')(net['conv3_3']) # Block 4 net['conv4_1'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv4_1')(net['pool3']) net['conv4_2'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv4_2')(net['conv4_1']) net['conv4_3'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv4_3')(net['conv4_2']) net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool4')(net['conv4_3']) # Block 5 net['conv5_1'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv5_1')(net['pool4']) net['conv5_2'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv5_2')(net['conv5_1']) net['conv5_3'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv5_3')(net['conv5_2']) net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), border_mode='same', name='pool5')(net['conv5_3']) # FC6 net['fc6'] = AtrousConvolution2D(1024, 3, 3, atrous_rate=(6, 6), activation='relu', border_mode='same', name='fc6')(net['pool5']) # x = Dropout(0.5, name='drop6')(x) # FC7 net['fc7'] = Convolution2D(1024, 1, 1, activation='relu', border_mode='same', name='fc7')(net['fc6']) # x = Dropout(0.5, name='drop7')(x) # Block 6 # deleted net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3']) num_priors = 3 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv4_3_norm_mbox_loc')(net['conv4_3_norm']) net['conv4_3_norm_mbox_loc'] = x flatten = Flatten(name='conv4_3_norm_mbox_loc_flat') net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc']) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv4_3_norm']) net['conv4_3_norm_mbox_conf'] = x flatten = Flatten(name='conv4_3_norm_mbox_conf_flat') net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf']) priorbox = PriorBox(img_size, 30.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox') net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm']) # Prediction from fc7 num_priors = 6 net['fc7_mbox_loc'] = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='fc7_mbox_loc')(net['fc7']) flatten = Flatten(name='fc7_mbox_loc_flat') net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc']) name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) net['fc7_mbox_conf'] = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['fc7']) flatten = Flatten(name='fc7_mbox_conf_flat') net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf']) priorbox = PriorBox(img_size, 60.0, max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') net['fc7_mbox_priorbox'] = priorbox(net['fc7']) # Gather all predictions net['mbox_loc'] = merge([net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat']], mode='concat', concat_axis=1, name='mbox_loc') net['mbox_conf'] = merge([net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat']], mode='concat', concat_axis=1, name='mbox_conf') if hasattr(net['mbox_loc'], '_keras_shape'): num_boxes = net['mbox_loc']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc']) net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) net['mbox_priorbox'] = merge([net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox']], mode='concat', concat_axis=1, name='mbox_priorbox') net['predictions'] = merge([net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']], mode='concat', concat_axis=2, name='predictions') model = Model(net['input'], net['predictions']) return model
def mini_SSD300(input_shape=(300, 300, 3), num_classes=21): net = {} # Block 1 input_tensor = input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) net['input'] = input_tensor net['conv1_1'] = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='conv1_1')(net['input']) net['conv1_2'] = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='conv1_2')(net['conv1_1']) net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool1')(net['conv1_2']) # Block 2 net['conv2_1'] = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv2_1')(net['pool1']) net['conv2_2'] = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv2_2')(net['conv2_1']) net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool2')(net['conv2_2']) # Block 3 net['conv3_1'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_1')(net['pool2']) net['conv3_2'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_2')(net['conv3_1']) net['conv3_3'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_3')(net['conv3_2']) net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool3')(net['conv3_3']) net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv3_3']) num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv4_3_norm_mbox_loc')(net['conv4_3_norm']) net['conv4_3_norm_mbox_loc'] = x flatten = Flatten(name='conv4_3_norm_mbox_loc_flat') net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc']) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv4_3_norm']) net['conv4_3_norm_mbox_conf'] = x flatten = Flatten(name='conv4_3_norm_mbox_conf_flat') net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf']) priorbox = PriorBox(img_size, 30.0, max_size=60, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox') net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm']) # Prediction from fc7 if hasattr(net['conv4_3_norm_mbox_loc_flat'], '_keras_shape'): num_boxes = net['conv4_3_norm_mbox_loc_flat']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['conv4_3_norm_mbox_loc_flat'])[-1] // 4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['conv4_3_norm_mbox_loc_flat']) net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['conv4_3_norm_mbox_conf_flat']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) net['predictions'] = merge([net['mbox_loc'], net['mbox_conf'], net['conv4_3_norm_mbox_priorbox']], mode='concat', concat_axis=2, name='predictions') model = Model(net['input'], net['predictions']) return model