Exemple #1
0
    def __init__(self,
                 config,
                 phase,
                 base,
                 extras,
                 head,
                 num_classes,
                 top_k=200):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        # TODO: implement __call__ in PriorBox
        self.priorbox = PriorBox(config)
        self.priors = Variable(self.priorbox.forward(), volatile=True)

        # SSD network
        self.vgg = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])

        if phase == 'test':
            self.softmax = nn.Softmax()
            self.detect = Detect(num_classes,
                                 0,
                                 top_k,
                                 0.01,
                                 0.45,
                                 variance=config['variance'])
Exemple #2
0
def im_detect(net, im, target_size):
    try:
        device = net.arm_conf[0].weight.device
    except:
        device = net.odm_conf[0].weight.device
    h, w, _ = im.shape
    scale = torch.Tensor([w, h, w, h])
    scale = scale.to(device)
    im_orig = im.astype(np.float32, copy=True)
    im = cv2.resize(im_orig, (target_size, target_size),
                    interpolation=cv2.INTER_LINEAR)
    x = (im - MEANS).astype(np.float32)
    x = x[:, :, (2, 1, 0)]  # to rgb
    x = x.transpose(2, 0, 1)
    x = torch.from_numpy(x).unsqueeze(0)
    x = x.to(device)

    if args.wo_refined_anchor:
        adm_loc, adm_conf, feat_sizes = net(x)
    else:
        arm_loc, arm_conf, adm_loc, adm_conf, feat_sizes = net(x)
    priorbox = PriorBox(net.cfg,
                        feat_sizes, (target_size, target_size),
                        phase='test')
    priors = priorbox.forward()
    priors = priors.to(device)
    if args.wo_refined_anchor:
        det = detect.forward(adm_loc, adm_conf, priors, scale)
    else:
        det = detect.forward(arm_loc, arm_conf, adm_loc, adm_conf, priors,
                             scale)
    return det
Exemple #3
0
def im_detect_ratio(net, im, target_size1, target_size2):
    device = net.arm_conf[0].weight.device
    h, w, _ = im.shape
    scale = torch.Tensor([w, h, w, h])
    scale = scale.to(device)
    im_orig = im.astype(np.float32, copy=True)
    if im_orig.shape[0] < im_orig.shape[1]:
        target_size1, target_size2 = target_size2, target_size1
    im = cv2.resize(im_orig,
                    None,
                    None,
                    fx=float(target_size2) / float(w),
                    fy=float(target_size1) / float(h),
                    interpolation=cv2.INTER_LINEAR)
    x = (im - MEANS).astype(np.float32)
    x = x[:, :, (2, 1, 0)]  # to rgb
    x = x.transpose(2, 0, 1)
    x = torch.from_numpy(x).unsqueeze(0)
    x = x.to(device)

    arm_loc, arm_conf, adm_loc, adm_conf, feat_sizes = net(x)
    priorbox = PriorBox(net.cfg,
                        feat_sizes, (target_size1, target_size2),
                        phase='test')
    priors = priorbox.forward()
    priors = priors.to(device)
    det = detect.forward(arm_loc, arm_conf, adm_loc, adm_conf, priors, scale)
    return det
    def forward(self, x):
        img_size = x.size()[2:]
        source = []

        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(torch.cat((F.relu(x), F.relu(-x)), 1))

        x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(torch.cat((F.relu(x), F.relu(-x)), 1))

        x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
        x = self.inception1(x)
        x = self.inception2(x)
        x = self.inception3(x)
        source.append(x)

        x = self.conv3_1(x)
        x = self.conv3_2(x)
        source.append(x)

        x = self.conv4_1(x)
        x = self.conv4_2(x)
        source.append(x)

        feature_maps = []
        for feat in source:
            feature_maps.append([feat.size(2), feat.size(3)])

        self.priors = Variable(PriorBox(img_size, feature_maps, cfg).forward())

        loc_preds, conf_preds = self.multilbox(source)

        if self.phase == 'test':
            output = self.test_det(loc_preds, self.softmax(conf_preds),
                                   self.priors)
        else:
            output = (loc_preds, conf_preds, self.priors)
        return output
Exemple #5
0
    def __init__(self, phase, size, base, extras, head, num_classes):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = (coco, voc)[num_classes == 21]
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.size = size

        # SSD network
        self.vgg = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])

        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
Exemple #6
0
    def __init__(self, num_classes, phase, pretrain=False, finetune=None):
        super(SSD300, self).__init__()
        self.num_classes = num_classes
        self.phase = phase
        
        self.base_net = self._base_net()
        self.extra_net = self._extra_net()
        self.loc_pred, self.cls_pred = self._predict_net()

        self.L2Norm = L2Norm(512, 20)
        self.priorbox = PriorBox(v2)
        self.priors = Variable(self.priorbox.forward(), volatile=True)

        if phase == 'test':
            self.softmax = nn.Softmax()
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)

        self._init_weight()
        if pretrain:
            self._load_weight()
        if finetune is not None:
            self._finetune(finetune)
Exemple #7
0
    def __init__(self, phase, size, base, extras, head, num_classes):
        super(TBPP, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = {
            'num_classes':
            2,
            'lr_steps': (80000, 100000, 120000),
            'max_iter':
            120000,
            'feature_maps': [64, 32, 16, 8, 4, 2, 1],
            'min_dim':
            512,
            'steps': [8, 16, 32, 64, 128, 256, 512],
            'min_sizes': [20, 51, 133, 215, 296, 378, 460],
            'max_sizes': [51, 133, 215, 296, 378, 460, 542],
            'aspect_ratios': [[2, 3], [2, 3, 5], [2, 3, 5], [2, 3, 5],
                              [2, 3, 5], [2, 3], [2, 3]],  # TODO
            'variance': [0.1, 0.2],
            'clip':
            True,
            'name':
            'MINE'
        }
        self.priorbox = PriorBox(
            self.cfg)  # calculate the size of prior boxes, i.e. defaults boxes
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.size = size

        # TBPP network
        self.vgg = nn.ModuleList(base)
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)
        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])

        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
Exemple #8
0
    def __init__(self, num_classes):
        super(DSOD_64_16_1x1, self).__init__()
        self.num_classes = num_classes
        self.extractor = DenseNet_64_16_DSSD_s_Pred_D()
        self.loc_layers = nn.ModuleList()
        self.cls_layers = nn.ModuleList()
        self.cfg = cfg_320_64_16
        self.priorbox = PriorBox(self.cfg)
        self.priors = self.priorbox.forward()

        # in_channels = (768, 768, 768, 256, 256, 256) #pred C
        in_channels = (256, 256, 256, 256, 256, 256)  # pred D
        num_anchors = (4, 6, 6, 6, 4, 4)
        for inC, num_anchor in zip(in_channels, num_anchors):
            # self.loc_layers += [nn.Conv2d(inC, num_anchor*4, kernel_size=3, padding=1)]
            # self.cls_layers += [nn.Conv2d(inC, num_anchor* num_classes, kernel_size=3, padding=1)
            #                                   ]
            self.loc_layers += [
                nn.Sequential(
                    nn.Conv2d(inC,
                              num_anchor * 4,
                              kernel_size=1,
                              padding=0,
                              bias=False), nn.BatchNorm2d(num_anchor * 4))
            ]
            self.cls_layers += [
                nn.Sequential(
                    nn.Conv2d(inC,
                              num_anchor * num_classes,
                              kernel_size=1,
                              padding=0,
                              bias=False),
                    nn.BatchNorm2d(num_anchor * num_classes))
            ]
        self.normalize = nn.ModuleList(
            [L2Norm(chan, 20) for chan in in_channels])

        self.reset_parameters()
Exemple #9
0
    def __init__(self, num_classes):
        super(DSOD_64_16_GN, self).__init__()
        self.num_classes = num_classes
        self.extractor = DSSD_s_GN()
        self.loc_layers = nn.ModuleList()
        self.cls_layers = nn.ModuleList()
        self.cfg = cfg_320_64_16
        self.priorbox = PriorBox(self.cfg)
        self.priors = self.priorbox.forward()

        in_channels = channel_dict['DSSD']
        num_anchors = (4, 6, 6, 6, 4, 4)
        for inC, num_anchor in zip(in_channels, num_anchors):
            # self.loc_layers += [nn.Conv2d(inC, num_anchor*4, kernel_size=3, padding=1)]
            # self.cls_layers += [nn.Conv2d(inC, num_anchor* num_classes, kernel_size=3, padding=1)
            #                                   ]
            self.loc_layers += [
                nn.Sequential(
                    nn.Conv2d(inC,
                              num_anchor * 4,
                              kernel_size=3,
                              padding=1,
                              bias=False), nn.GroupNorm(4, num_anchor * 4))
            ]
            self.cls_layers += [
                nn.Sequential(
                    nn.Conv2d(inC,
                              num_anchor * num_classes,
                              kernel_size=3,
                              padding=1,
                              bias=False),
                    nn.GroupNorm(num_classes, num_anchor * num_classes))
            ]
        self.normalize = nn.ModuleList(
            [L2Norm(chan, 20) for chan in in_channels])
        self.reset_parameters()
Exemple #10
0
def train():
    if args.visdom:
        import visdom
        viz = visdom.Visdom()

    print('Loading the dataset...')
    if args.dataset == 'COCO':
        if args.dataset_root == VOC_ROOT:
            if not os.path.exists(COCOroot):
                parser.error('Must specify dataset_root if specifying dataset')
            print("WARNING: Using default COCO dataset_root because " +
                  "--dataset_root was not specified.")
            args.dataset_root = COCOroot
        cfg = coco_refinedet[args.input_size]
        train_sets = [('train2017')]
        # train_sets = [('train2017', 'val2017')]
        dataset = COCODetection(COCOroot, train_sets,
                                SSDAugmentation(cfg['min_dim'], MEANS))
    elif args.dataset == 'VOC':
        '''if args.dataset_root == COCO_ROOT:
            parser.error('Must specify dataset if specifying dataset_root')'''
        cfg = voc_refinedet[args.input_size]
        dataset = VOCDetection(root=VOC_ROOT,
                               transform=SSDAugmentation(
                                   cfg['min_dim'], MEANS))
    print('Training RefineDet on:', dataset.name)
    print('Using the specified args:')
    print(args)

    refinedet_net = build_refinedet('train', int(args.input_size),
                                    cfg['num_classes'], backbone_dict)
    net = refinedet_net
    print(net)

    device = torch.device('cuda:0' if args.cuda else 'cpu')
    if args.ngpu > 1 and args.cuda:
        net = torch.nn.DataParallel(refinedet_net,
                                    device_ids=list(range(args.ngpu)))
    cudnn.benchmark = True
    net = net.to(device)

    if args.resume:
        print('Resuming training, loading {}...'.format(args.resume))
        state_dict = torch.load(args.resume)
        # create new OrderedDict that does not contain `module.`
        from collections import OrderedDict
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            head = k[:7]
            if head == 'module.':
                name = k[7:]  # remove `module.`
            else:
                name = k
            new_state_dict[name] = v
        refinedet_net.load_state_dict(new_state_dict)
    else:
        print('Initializing weights...')
        refinedet_net.init_weights(pretrained=pretrained)

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    arm_criterion = RefineDetMultiBoxLoss(2, 0.5, True, 0, True, negpos_ratio,
                                          0.5, False, args.cuda)
    odm_criterion = RefineDetMultiBoxLoss(cfg['num_classes'],
                                          0.5,
                                          True,
                                          0,
                                          True,
                                          negpos_ratio,
                                          0.5,
                                          False,
                                          args.cuda,
                                          use_ARM=True)
    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward()
        priors = priors.to(device)

    net.train()
    # loss counters
    arm_loc_loss = 0
    arm_conf_loss = 0
    odm_loc_loss = 0
    odm_conf_loss = 0
    epoch = 0 + args.resume_epoch

    epoch_size = math.ceil(len(dataset) / args.batch_size)
    max_iter = args.max_epoch * epoch_size

    stepvalues = (args.max_epoch * 2 // 3 * epoch_size,
                  args.max_epoch * 8 // 9 * epoch_size,
                  args.max_epoch * epoch_size)
    if args.dataset == 'VOC':
        stepvalues = (args.max_epoch * 2 // 3 * epoch_size,
                      args.max_epoch * 5 // 6 * epoch_size,
                      args.max_epoch * epoch_size)
    step_index = 0

    if args.resume_epoch > 0:
        start_iter = args.resume_epoch * epoch_size
        for step in stepvalues:
            if step < start_iter:
                step_index += 1
    else:
        start_iter = 0

    if args.visdom:
        vis_title = 'RefineDet.PyTorch on ' + dataset.name
        vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss']
        iter_plot = create_vis_plot(viz, 'Iteration', 'Loss', vis_title,
                                    vis_legend)
        epoch_plot = create_vis_plot(viz, 'Epoch', 'Loss', vis_title,
                                     vis_legend)

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)
    for iteration in range(start_iter, max_iter):
        if iteration % epoch_size == 0:
            if args.visdom and iteration != 0:
                update_vis_plot(viz, epoch, arm_loc_loss, arm_conf_loss,
                                epoch_plot, None, 'append', epoch_size)
                # reset epoch loss counters
                arm_loc_loss = 0
                arm_conf_loss = 0
                odm_loc_loss = 0
                odm_conf_loss = 0
            # create batch iterator
            batch_iterator = iter(data_loader)
            if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch >
                                                   (args.max_epoch * 2 // 3)):
                torch.save(
                    net.state_dict(),
                    args.save_folder + 'RefineDet' + args.input_size + '_' +
                    args.dataset + '_epoches_' + repr(epoch) + '.pth')
            epoch += 1

        t0 = time.time()
        if iteration in stepvalues:
            step_index += 1
        lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index,
                                  iteration, epoch_size)

        # load train data
        images, targets = next(batch_iterator)
        images = images.to(device)
        targets = [ann.to(device) for ann in targets]
        # for an in targets:
        #     for instance in an:
        #         for cor in instance[:-1]:
        #             if cor < 0 or cor > 1:
        #                 raise StopIteration

        # forward
        out = net(images)

        # backprop
        optimizer.zero_grad()

        arm_loss_l, arm_loss_c = arm_criterion(out, priors, targets)
        odm_loss_l, odm_loss_c = odm_criterion(out, priors, targets)
        arm_loss = arm_loss_l + arm_loss_c
        odm_loss = odm_loss_l + odm_loss_c
        loss = arm_loss + odm_loss

        loss.backward()
        optimizer.step()

        arm_loc_loss += arm_loss_l.item()
        arm_conf_loss += arm_loss_c.item()
        odm_loc_loss += odm_loss_l.item()
        odm_conf_loss += odm_loss_c.item()
        t1 = time.time()
        batch_time = t1 - t0
        eta = int(batch_time * (max_iter - iteration))
        print('Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || ARM_L Loss: {:.4f} ARM_C Loss: {:.4f} ODM_L Loss: {:.4f} ODM_C Loss: {:.4f} loss: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}'.\
            format(epoch, args.max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, arm_loss_l.item(), arm_loss_c.item(), odm_loss_l.item(), odm_loss_c.item(), loss.item(), lr, batch_time, str(datetime.timedelta(seconds=eta))))

        if args.visdom:
            update_vis_plot(viz, iteration, arm_loss_l.item(),
                            arm_loss_c.item(), iter_plot, epoch_plot, 'append')

    torch.save(
        refinedet_net.state_dict(), args.save_folder +
        '/RefineDet{}_{}_final.pth'.format(args.input_size, args.dataset))
Exemple #11
0
def main():
    global args
    global minmum_loss
    args.gpu = 0
    args.world_size = 1

    if args.distributed:
        args.gpu = args.local_rank % torch.cuda.device_count()
        torch.cuda.set_device(args.gpu)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()

    args.total_batch_size = args.world_size * args.batch_size

    ## DATA loading code
    if args.dataset == 'COCO':
        train_sets = [('2014', 'train'), ('2014', 'valminusminival')]
        cfg = (COCO_300, COCO_512)[args.size == '512']
    elif args.dataset == 'VOC':
        train_sets = [('2007', 'trainval'), ('2012', 'trainval')]
        cfg = (VOC_300, VOC_512)[args.size == '512']

    # other impoort parmeters
    img_dim = (300, 512)[args.size == '512']
    rgb_means = ((104, 117, 123), (103.94, 116.78,
                                   123.68))[args.version == 'RFB_mobile']
    p = (0.6, 0.2)[args.version == 'RFB_mobile']
    num_classes = (21, 81)[args.dataset == 'COCO']

    if args.dataset == 'COCO':
        dataset = COCODetection(root=cfg['coco_root'],
                                image_sets=train_sets,
                                preproc=preproc(img_dim, rgb_means, p))
    elif args.dataset == 'VOC':
        dataset = VOCDetection(root=cfg['voc_root'],
                               image_sets=train_sets,
                               preproc=preproc(img_dim, rgb_means, p),
                               target_transform=AnnotationTransform())

    print('Training SSD on:', dataset.name)
    print('Loading the dataset...')
    train_loader = data.DataLoader(dataset,
                                   args.batch_size,
                                   num_workers=args.num_workers,
                                   shuffle=True,
                                   collate_fn=detection_collate,
                                   pin_memory=True)

    print("Build RFB network")
    if args.version == 'RFB_vgg':
        model = RFB_Net_vgg('train', img_dim, num_classes)
    elif args.version == 'RFB_E_vgg':
        model = RFB_Net_E_vgg('train', img_dim, num_classes)
    elif args.version == 'RFB_mobile':
        model = RFB_Net_mobile('train', img_dim, num_classes)
    else:
        print('Unkown version!')

    if args.pretrained:
        base_weights = torch.load(args.save_folder + args.basenet)
        print('Loading base network...')
        model.base.load_state_dict(base_weights)

    model = model.cuda()
    # optimizer and loss function
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
                             False)

    ## get the priorbox of ssd
    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward()
        priors = priors.cuda()

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(
                args.resume,
                map_location=lambda storage, loc: storage.cuda(args.gpu))
            args.start_epoch = checkpoint['epoch']
            minmum_loss = checkpoint['minmum_loss']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        print('Initializing weights...')
        # initialize newly added layers' weights with xavier method
        model.extras.apply(weights_init)
        model.loc.apply(weights_init)
        model.conf.apply(weights_init)
        model.Norm.apply(weights_init)
        if args.version == 'RFB_E_vgg':
            model.reduce.apply(weights_init)
            model.up_reduce.apply(weights_init)

    print('Using the specified args:')
    print(args)
    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch
        loss = train(train_loader, model, priors, criterion, optimizer, epoch)
        # remember best prec@1 and save checkpoint
        if args.local_rank == 0:
            is_best = loss < minmum_loss
            minmum_loss = min(loss, minmum_loss)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_prec1': minmum_loss,
                    'optimizer': optimizer.state_dict(),
                }, is_best, epoch)
Exemple #12
0
def SSD300(input_shape, num_classes=21):
    """SSD300 architecture.

    # Arguments
        input_shape: Shape of the input image,
            expected to be either (300, 300, 3) or (3, 300, 300)(not tested).
        num_classes: Number of classes including background.

    # References
        https://arxiv.org/abs/1512.02325
    """
    print('begin building networks')
    kernel_size = (3, 3)

    net = {}
    # Block 1
    input_tensor = Input(shape=input_shape)
    img_size = (input_shape[1], input_shape[0])
    net['input'] = input_tensor
    net['conv1_1'] = Conv2D(64,
                            kernel_size,
                            activation='relu',
                            padding='same',
                            name='conv1_1')(net['input'])
    net['conv1_2'] = Conv2D(64,
                            kernel_size,
                            activation='relu',
                            padding='same',
                            name='conv1_2')(net['conv1_1'])
    net['pool1'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                padding='same',
                                name='pool1')(net['conv1_2'])
    # Block 2
    net['conv2_1'] = Conv2D(128,
                            kernel_size,
                            activation='relu',
                            padding='same',
                            name='conv2_1')(net['pool1'])
    net['conv2_2'] = Conv2D(128,
                            kernel_size,
                            activation='relu',
                            padding='same',
                            name='conv2_2')(net['conv2_1'])
    net['pool2'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                padding='same',
                                name='pool2')(net['conv2_2'])
    # Block 3
    net['conv3_1'] = Conv2D(256,
                            kernel_size,
                            activation='relu',
                            padding='same',
                            name='conv3_1')(net['pool2'])
    net['conv3_2'] = Conv2D(256,
                            kernel_size,
                            activation='relu',
                            padding='same',
                            name='conv3_2')(net['conv3_1'])
    net['conv3_3'] = Conv2D(256,
                            kernel_size,
                            activation='relu',
                            padding='same',
                            name='conv3_3')(net['conv3_2'])
    net['pool3'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                padding='same',
                                name='pool3')(net['conv3_3'])
    # Block 4
    net['conv4_1'] = Conv2D(512,
                            kernel_size,
                            activation='relu',
                            padding='same',
                            name='conv4_1')(net['pool3'])
    net['conv4_2'] = Conv2D(512,
                            kernel_size,
                            activation='relu',
                            padding='same',
                            name='conv4_2')(net['conv4_1'])
    net['conv4_3'] = Conv2D(512,
                            kernel_size,
                            activation='relu',
                            padding='same',
                            name='conv4_3')(net['conv4_2'])
    net['pool4'] = MaxPooling2D((2, 2),
                                strides=(2, 2),
                                padding='same',
                                name='pool4')(net['conv4_3'])
    # Block 5
    net['conv5_1'] = Conv2D(512,
                            kernel_size,
                            activation='relu',
                            padding='same',
                            name='conv5_1')(net['pool4'])
    net['conv5_2'] = Conv2D(512,
                            kernel_size,
                            activation='relu',
                            padding='same',
                            name='conv5_2')(net['conv5_1'])
    net['conv5_3'] = Conv2D(512,
                            kernel_size,
                            activation='relu',
                            padding='same',
                            name='conv5_3')(net['conv5_2'])
    net['pool5'] = MaxPooling2D((3, 3),
                                strides=(1, 1),
                                padding='same',
                                name='pool5')(net['conv5_3'])
    # FC6
    net['fc6'] = Conv2D(1024,
                        kernel_size,
                        dilation_rate=(6, 6),
                        activation='relu',
                        padding='same',
                        name='fc6')(net['pool5'])
    # x = Dropout(0.5, name='drop6')(x)
    # FC7
    net['fc7'] = Conv2D(1024, (1, 1),
                        activation='relu',
                        padding='same',
                        name='fc7')(net['fc6'])
    # x = Dropout(0.5, name='drop7')(x)
    # Block 6
    net['conv6_1'] = Conv2D(256, (1, 1),
                            activation='relu',
                            padding='same',
                            name='conv6_1')(net['fc7'])
    net['conv6_2'] = Conv2D(512,
                            kernel_size,
                            strides=(2, 2),
                            activation='relu',
                            padding='same',
                            name='conv6_2')(net['conv6_1'])
    # Block 7
    net['conv7_1'] = Conv2D(128, (1, 1),
                            activation='relu',
                            padding='same',
                            name='conv7_1')(net['conv6_2'])
    net['conv7_2'] = ZeroPadding2D()(net['conv7_1'])
    net['conv7_2'] = Conv2D(256,
                            kernel_size,
                            strides=(2, 2),
                            activation='relu',
                            padding='valid',
                            name='conv7_2')(net['conv7_2'])
    # Block 8
    net['conv8_1'] = Conv2D(128, (1, 1),
                            activation='relu',
                            padding='same',
                            name='conv8_1')(net['conv7_2'])
    net['conv8_2'] = Conv2D(256,
                            kernel_size,
                            strides=(2, 2),
                            activation='relu',
                            padding='same',
                            name='conv8_2')(net['conv8_1'])
    # Last Pool
    net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2'])
    print('base network built')

    # Prediction from conv4_3
    net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3'])
    num_priors = 3
    x = Conv2D(num_priors * 4,
               kernel_size,
               padding='same',
               name='conv4_3_norm_mbox_loc')(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_loc'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_loc_flat')
    net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc'])
    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes,
               kernel_size,
               padding='same',
               name=name)(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_conf'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_conf_flat')
    net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf'])
    priorbox = PriorBox(img_size,
                        30.0,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv4_3_norm_mbox_priorbox')
    net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm'])
    print('conv4_3_norm_mbox_priorbox built')
    # Prediction from fc7
    num_priors = 6
    net['fc7_mbox_loc'] = Conv2D(num_priors * 4,
                                 kernel_size,
                                 padding='same',
                                 name='fc7_mbox_loc')(net['fc7'])
    flatten = Flatten(name='fc7_mbox_loc_flat')
    net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc'])
    name = 'fc7_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    net['fc7_mbox_conf'] = Conv2D(num_priors * num_classes, (3, 3),
                                  padding='same',
                                  name=name)(net['fc7'])
    flatten = Flatten(name='fc7_mbox_conf_flat')
    net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf'])
    priorbox = PriorBox(img_size,
                        60.0,
                        max_size=114.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='fc7_mbox_priorbox')
    net['fc7_mbox_priorbox'] = priorbox(net['fc7'])
    print('fc7_mbox_priorbox built')
    # Prediction from conv6_2
    num_priors = 6
    x = Conv2D(num_priors * 4,
               kernel_size,
               padding='same',
               name='conv6_2_mbox_loc')(net['conv6_2'])
    net['conv6_2_mbox_loc'] = x
    flatten = Flatten(name='conv6_2_mbox_loc_flat')
    net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc'])
    name = 'conv6_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes,
               kernel_size,
               padding='same',
               name=name)(net['conv6_2'])
    net['conv6_2_mbox_conf'] = x
    flatten = Flatten(name='conv6_2_mbox_conf_flat')
    net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        114.0,
                        max_size=168.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv6_2_mbox_priorbox')
    net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2'])
    print('conv6_2_mbox_priorbox built')
    # Prediction from conv7_2
    num_priors = 6
    x = Conv2D(num_priors * 4,
               kernel_size,
               padding='same',
               name='conv7_2_mbox_loc')(net['conv7_2'])
    net['conv7_2_mbox_loc'] = x
    flatten = Flatten(name='conv7_2_mbox_loc_flat')
    net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc'])
    name = 'conv7_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes,
               kernel_size,
               padding='same',
               name=name)(net['conv7_2'])
    net['conv7_2_mbox_conf'] = x
    flatten = Flatten(name='conv7_2_mbox_conf_flat')
    net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        168.0,
                        max_size=222.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv7_2_mbox_priorbox')
    net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2'])
    print('conv7_2_mbox_priorbox built')
    # Prediction from conv8_2
    num_priors = 6
    x = Conv2D(num_priors * 4,
               kernel_size,
               padding='same',
               name='conv8_2_mbox_loc')(net['conv8_2'])
    net['conv8_2_mbox_loc'] = x
    flatten = Flatten(name='conv8_2_mbox_loc_flat')
    net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc'])
    name = 'conv8_2_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Conv2D(num_priors * num_classes,
               kernel_size,
               padding='same',
               name=name)(net['conv8_2'])
    net['conv8_2_mbox_conf'] = x
    flatten = Flatten(name='conv8_2_mbox_conf_flat')
    net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf'])
    priorbox = PriorBox(img_size,
                        222.0,
                        max_size=276.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv8_2_mbox_priorbox')
    net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2'])
    print('conv8_2_mbox_priorbox built')
    # Prediction from pool6
    num_priors = 6
    x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6'])
    net['pool6_mbox_loc_flat'] = x
    name = 'pool6_mbox_conf_flat'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Dense(num_priors * num_classes, name=name)(net['pool6'])
    net['pool6_mbox_conf_flat'] = x
    priorbox = PriorBox(img_size,
                        276.0,
                        max_size=330.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='pool6_mbox_priorbox')
    if K.image_dim_ordering() == 'tf':
        target_shape = (1, 1, 256)
    else:
        target_shape = (256, 1, 1)
    net['pool6_reshaped'] = Reshape(target_shape,
                                    name='pool6_reshaped')(net['pool6'])
    net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped'])
    print('pool6_mbox_priorbox built')
    # Gather all predictions
    net['mbox_loc'] = concatenate([
        net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'],
        net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'],
        net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat']
    ],
                                  axis=1,
                                  name='mbox_loc')
    net['mbox_conf'] = concatenate([
        net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'],
        net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'],
        net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat']
    ],
                                   axis=1,
                                   name='mbox_conf')
    net['mbox_priorbox'] = concatenate([
        net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'],
        net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'],
        net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox']
    ],
                                       axis=1,
                                       name='mbox_priorbox')
    print('gathering all prediction layers built')
    if hasattr(net['mbox_loc'], '_keras_shape'):
        # divide 4 for [xmin, ymin, xmax, ymax]
        num_boxes = net['mbox_loc']._keras_shape[-1] // 4
    elif hasattr(net['mbox_loc'], 'int_shape'):
        num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4
    net['mbox_loc'] = Reshape((num_boxes, 4),
                              name='mbox_loc_final')(net['mbox_loc'])
    net['mbox_conf'] = Reshape((num_boxes, num_classes),
                               name='mbox_conf_logits')(net['mbox_conf'])
    net['mbox_conf'] = Activation('softmax',
                                  name='mbox_conf_final')(net['mbox_conf'])
    net['predictions'] = concatenate(
        [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']],
        axis=2,
        name='predictions')
    print('prediction layers built')
    model = Model(net['input'], net['predictions'])
    return model
def mini_SSD(num_classes=21):

    base_kernel_size = 4 + num_classes
    aspect_ratios = (1, 2, 1 / 2)
    num_aspect_ratios = len(aspect_ratios)

    base_model = VGG16(weights='imagenet')
    base_model.layers[0].name = 'input_1'
    input_tensor = base_model.input
    #input_tensor = base_model

    #input_tensor.name = 'image_array'

    for layer in base_model.layers:
        layer.trainable = False
    body = base_model.get_layer('block4_pool').output
    body = Convolution2D((base_kernel_size * num_aspect_ratios),
                         3,
                         3,
                         border_mode='same')(body)
    branch_1 = PriorBox(aspect_ratios)(body)

    body = Convolution2D(32, 3, 3, border_mode='same')(branch_1)
    body = Activation('relu')(body)
    body = MaxPooling2D((2, 2))(body)
    body = Dropout(.5)(body)
    body = Convolution2D((base_kernel_size * num_aspect_ratios),
                         3,
                         3,
                         border_mode='same')(body)
    branch_2 = PriorBox(aspect_ratios)(body)

    body = Convolution2D(64, 3, 3, border_mode='same')(branch_2)
    body = Activation('relu')(body)
    body = MaxPooling2D((3, 3))(body)
    body = Dropout(.5)(body)
    body = Convolution2D((base_kernel_size * num_aspect_ratios),
                         3,
                         3,
                         border_mode='same')(body)
    branch_3 = PriorBox(aspect_ratios)(body)

    branch_1 = Reshape((-1, 4 + num_classes))(branch_1)
    local_1 = Lambda(lambda x: x[:, :, :4])(branch_1)
    class_1 = Lambda(lambda x: K.softmax(x[:, :, 4:]))(branch_1)

    branch_2 = Reshape((-1, 4 + num_classes))(branch_2)
    local_2 = Lambda(lambda x: x[:, :, :4])(branch_2)
    class_2 = Lambda(lambda x: K.softmax(x[:, :, 4:]))(branch_2)

    branch_3 = Reshape((-1, 4 + num_classes))(branch_3)
    local_3 = Lambda(lambda x: x[:, :, :4])(branch_3)
    class_3 = Lambda(lambda x: K.softmax(x[:, :, 4:]))(branch_3)

    classification_tensor = merge([class_1, class_2, class_3],
                                  mode='concat',
                                  concat_axis=1,
                                  name='classes')

    localization_tensor = merge([local_1, local_2, local_3],
                                mode='concat',
                                concat_axis=1,
                                name='encoded_box')
    output_tensor = merge([localization_tensor, classification_tensor],
                          mode='concat',
                          concat_axis=-1,
                          name='predictions')
    model = Model(input_tensor, output_tensor)
    return model
def simple_SSD(input_shape, num_classes, min_size, num_priors, max_size,
               aspect_ratios, variances):
    input_tensor = Input(shape=input_shape)

    body = Convolution2D(16, 7, 7)(input_tensor)
    body = Activation('relu')(body)
    body = MaxPooling2D(2, 2, border_mode='valid')(body)

    body = Convolution2D(32, 5, 5)(body)
    body = Activation('relu')(body)
    branch_1 = MaxPooling2D(2, 2, border_mode='valid')(body)

    body = Convolution2D(64, 3, 3)(branch_1)
    body = Activation('relu')(body)
    branch_2 = MaxPooling2D(2, 2, border_mode='valid')(body)

    # first branch
    norm_1 = Normalize(20)(branch_1)
    localization_1 = Convolution2D(num_priors * 4, 3, 3,
                                   border_mode='same')(norm_1)
    localization_1 = Flatten(localization_1)
    classification_1 = Convolution2D(num_priors * num_classes,
                                     3,
                                     3,
                                     border_mode='same')(norm_1)
    classification_1 = Flatten()(classification_1)
    prior_boxes_1 = PriorBox(input_shape[0:2], min_size, max_size,
                             aspect_ratios)

    # second branch
    norm_2 = Normalize(20)(branch_2)
    localization_2 = Convolution2D(num_priors * 4, 3, 3,
                                   border_mode='same')(norm_2)
    localization_2 = Flatten(localization_2)
    classification_2 = Convolution2D(num_priors * num_classes,
                                     3,
                                     3,
                                     border_mode='same')(norm_2)
    classification_2 = Flatten()(classification_2)
    prior_boxes_2 = PriorBox(input_shape[0:2], min_size, max_size,
                             aspect_ratios)

    localization_head = Merge([localization_1, localization_2],
                              mode='concat',
                              concat_axis=1)

    classification_head = Merge([classification_1, classification_2],
                                mode='concat',
                                concat_axis=1)

    prior_boxes_head = Merge([prior_boxes_1, prior_boxes_2],
                             mode='concat',
                             concat_axis=1)

    if hasattr(localization_head, '_keras_shape'):
        num_boxes = localization_head._keras_shape[-1] // 4
    elif hasattr(localization_head, 'int_shape'):
        num_boxes = K.int_shape(localization_head)[-1] // 4

    localization_head = Reshape((num_boxes, 4))(localization_head)
    classification_head = Reshape(
        (num_boxes, num_classes))(classification_head)
    classification_head = Activation('softmax')(classification_head)
    predictions = Merge(localization_head,
                        classification_head,
                        prior_boxes_head,
                        mode='concat',
                        concat_axis=2)

    model = Model(input_tensor, predictions)

    return model
Exemple #15
0
def main():
    global args
    global minmum_loss
    args.gpu = 0
    args.world_size = 1

    if args.distributed:
        args.gpu = args.local_rank % torch.cuda.device_count()
        torch.cuda.set_device(args.gpu)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()

    args.total_batch_size = args.world_size * args.batch_size

    # build dsfd network
    print("Building net...")
    model = RetinaFace(cfg=cfg)
    print("Printing net...")

    # for multi gpu
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model)

    model = model.cuda()
    # optimizer and loss function
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    criterion = MultiBoxLoss(cfg['num_classes'], 0.35, True, 0, True, 7, 0.35,
                             False)

    ## dataset
    print("loading dataset")
    train_dataset = WiderFaceDetection(
        args.training_dataset, preproc(cfg['image_size'], cfg['rgb_mean']))

    train_loader = data.DataLoader(train_dataset,
                                   args.batch_size,
                                   num_workers=args.num_workers,
                                   shuffle=True,
                                   collate_fn=detection_collate,
                                   pin_memory=True)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(
                args.resume,
                map_location=lambda storage, loc: storage.cuda(args.gpu))
            args.start_epoch = checkpoint['epoch']
            minmum_loss = checkpoint['minmum_loss']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    print('Using the specified args:')
    print(args)
    # load PriorBox
    print("Load priorbox")
    with torch.no_grad():
        priorbox = PriorBox(cfg=cfg,
                            image_size=(cfg['image_size'], cfg['image_size']))
        priors = priorbox.forward()
        priors = priors.cuda()

    print("start traing")
    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch
        train_loss = train(train_loader, model, priors, criterion, optimizer,
                           epoch)
        if args.local_rank == 0:
            is_best = train_loss < minmum_loss
            minmum_loss = min(train_loss, minmum_loss)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_prec1': minmum_loss,
                    'optimizer': optimizer.state_dict(),
                }, is_best, epoch)
Exemple #16
0
def main():
    global args
    global minmum_loss
    args.gpu = 0
    args.world_size = 1

    if args.distributed:
        args.gpu = args.local_rank % torch.cuda.device_count()
        torch.cuda.set_device(args.gpu)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()

    args.total_batch_size = args.world_size * args.batch_size

    # build dsfd network
    print("Building net...")
    pyramidbox = build_net('train', cfg.NUM_CLASSES)
    model = pyramidbox

    if args.pretrained:
        vgg_weights = torch.load(args.save_folder + args.basenet)
        print('Load base network....')
        model.vgg.load_state_dict(vgg_weights)

    # for multi gpu
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model)

    model = model.cuda()
    # optimizer and loss function
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    criterion1 = MultiBoxLoss(cfg, True)
    criterion2 = MultiBoxLoss(cfg, True, use_head_loss=True)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(
                args.resume,
                map_location=lambda storage, loc: storage.cuda(args.gpu))
            args.start_epoch = checkpoint['epoch']
            minmum_loss = checkpoint['minmum_loss']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        print('Initializing weights...')
        pyramidbox.extras.apply(pyramidbox.weights_init)
        pyramidbox.lfpn_topdown.apply(pyramidbox.weights_init)
        pyramidbox.lfpn_later.apply(pyramidbox.weights_init)
        pyramidbox.cpm.apply(pyramidbox.weights_init)
        pyramidbox.loc_layers.apply(pyramidbox.weights_init)
        pyramidbox.conf_layers.apply(pyramidbox.weights_init)

    print('Loading wider dataset...')
    train_dataset = WIDERDetection(cfg.FACE.TRAIN_FILE, mode='train')

    val_dataset = WIDERDetection(cfg.FACE.VAL_FILE, mode='val')

    train_loader = data.DataLoader(train_dataset,
                                   args.batch_size,
                                   num_workers=args.num_workers,
                                   shuffle=True,
                                   collate_fn=detection_collate,
                                   pin_memory=True)
    val_batchsize = args.batch_size // 2
    val_loader = data.DataLoader(val_dataset,
                                 val_batchsize,
                                 num_workers=args.num_workers,
                                 shuffle=False,
                                 collate_fn=detection_collate,
                                 pin_memory=True)

    print('Using the specified args:')
    print(args)

    # load PriorBox
    with torch.no_grad():
        priorbox = PriorBox(input_size=[640, 640], cfg=cfg)
        priors = priorbox.forward()
        priors = priors.cuda()

    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch
        end = time.time()
        train_loss = train(train_loader, model, priors, criterion1, criterion2,
                           optimizer, epoch)
        val_loss = val(val_loader, model, priors, criterion1, criterion2)
        if args.local_rank == 0:
            is_best = val_loss < minmum_loss
            minmum_loss = min(val_loss, minmum_loss)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_prec1': minmum_loss,
                    'optimizer': optimizer.state_dict(),
                }, is_best, epoch)
        epoch_time = time.time() - end
        print('Epoch %s time cost %f' % (epoch, epoch_time))
Exemple #17
0
            cv2.rectangle(img, (p1[0] - 2 // 2, p1[1] - 2 - baseline),
                          (p1[0] + text_size[0], p1[1] + text_size[1]),
                          [255, 0, 0], -1)
            cv2.putText(img, conf, (p1[0], p1[1] + baseline),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, 8)

    t2 = time.time()
    print('detect:{} timer:{}'.format(img_path, t2 - t1))

    cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), img)


if __name__ == '__main__':
    # load PriorBox
    with torch.no_grad():
        priorbox = PriorBox(input_size=[640, 640], cfg=cfg)
        priors = priorbox.forward()
        priors = priors.cuda()

    net = build_net('test', cfg.NUM_CLASSES)
    net.load_state_dict(torch.load(args.model))
    net.eval()

    if use_cuda:
        net.cuda()
        cudnn.benckmark = True

    img_path = './img'
    img_list = [
        os.path.join(img_path, x) for x in os.listdir(img_path)
        if x.endswith('jpg')
Exemple #18
0
def mini_SSD300(input_shape=(300,300,3), num_classes=21):
    """SSD300 architecture.

    # Arguments
        input_shape: Shape of the input image,
            expected to be either (300, 300, 3) or (3, 300, 300)(not tested).
        num_classes: Number of classes including background.

    # References
        https://arxiv.org/abs/1512.02325
    """
    net = {}
    # Block 1
    input_tensor = input_tensor = Input(shape=input_shape)
    img_size = (input_shape[1], input_shape[0])
    net['input'] = input_tensor
    net['conv1_1'] = Convolution2D(64, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv1_1')(net['input'])
    net['conv1_2'] = Convolution2D(64, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv1_2')(net['conv1_1'])
    net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same',
                                name='pool1')(net['conv1_2'])
    # Block 2
    net['conv2_1'] = Convolution2D(128, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv2_1')(net['pool1'])
    net['conv2_2'] = Convolution2D(128, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv2_2')(net['conv2_1'])
    net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same',
                                name='pool2')(net['conv2_2'])
    # Block 3
    net['conv3_1'] = Convolution2D(256, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_1')(net['pool2'])
    net['conv3_2'] = Convolution2D(256, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_2')(net['conv3_1'])
    net['conv3_3'] = Convolution2D(256, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_3')(net['conv3_2'])
    net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same',
                                name='pool3')(net['conv3_3'])
    # Block 4
    net['conv4_1'] = Convolution2D(512, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv4_1')(net['pool3'])
    net['conv4_2'] = Convolution2D(512, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv4_2')(net['conv4_1'])
    net['conv4_3'] = Convolution2D(512, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv4_3')(net['conv4_2'])
    net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same',
                                name='pool4')(net['conv4_3'])
    # Block 5
    net['conv5_1'] = Convolution2D(512, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv5_1')(net['pool4'])
    net['conv5_2'] = Convolution2D(512, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv5_2')(net['conv5_1'])
    net['conv5_3'] = Convolution2D(512, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv5_3')(net['conv5_2'])
    net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), border_mode='same',
                                name='pool5')(net['conv5_3'])
    # FC6
    net['fc6'] = AtrousConvolution2D(1024, 3, 3, atrous_rate=(6, 6),
                                     activation='relu', border_mode='same',
                                     name='fc6')(net['pool5'])
    # x = Dropout(0.5, name='drop6')(x)
    # FC7
    net['fc7'] = Convolution2D(1024, 1, 1, activation='relu',
                               border_mode='same', name='fc7')(net['fc6'])
    # x = Dropout(0.5, name='drop7')(x)
    # Block 6
    # deleted

    net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3'])
    num_priors = 3
    x = Convolution2D(num_priors * 4, 3, 3, border_mode='same',
                      name='conv4_3_norm_mbox_loc')(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_loc'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_loc_flat')
    net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc'])
    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same',
                      name=name)(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_conf'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_conf_flat')
    net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf'])
    priorbox = PriorBox(img_size, 30.0, aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv4_3_norm_mbox_priorbox')
    net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm'])
    # Prediction from fc7
    num_priors = 6
    net['fc7_mbox_loc'] = Convolution2D(num_priors * 4, 3, 3,
                                        border_mode='same',
                                        name='fc7_mbox_loc')(net['fc7'])
    flatten = Flatten(name='fc7_mbox_loc_flat')
    net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc'])
    name = 'fc7_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    net['fc7_mbox_conf'] = Convolution2D(num_priors * num_classes, 3, 3,
                                         border_mode='same',
                                         name=name)(net['fc7'])
    flatten = Flatten(name='fc7_mbox_conf_flat')
    net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf'])
    priorbox = PriorBox(img_size, 60.0, max_size=114.0, aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='fc7_mbox_priorbox')
    net['fc7_mbox_priorbox'] = priorbox(net['fc7'])
    # Gather all predictions
    net['mbox_loc'] = merge([net['conv4_3_norm_mbox_loc_flat'],
                             net['fc7_mbox_loc_flat']],
                            mode='concat', concat_axis=1, name='mbox_loc')
    net['mbox_conf'] = merge([net['conv4_3_norm_mbox_conf_flat'],
                              net['fc7_mbox_conf_flat']],
                             mode='concat', concat_axis=1, name='mbox_conf')
    if hasattr(net['mbox_loc'], '_keras_shape'):
        num_boxes = net['mbox_loc']._keras_shape[-1] // 4
    elif hasattr(net['mbox_loc'], 'int_shape'):
        num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4
    net['mbox_loc'] = Reshape((num_boxes, 4),
                              name='mbox_loc_final')(net['mbox_loc'])
    net['mbox_conf'] = Reshape((num_boxes, num_classes),
                               name='mbox_conf_logits')(net['mbox_conf'])
    net['mbox_conf'] = Activation('softmax',
                                  name='mbox_conf_final')(net['mbox_conf'])
    net['mbox_priorbox'] = merge([net['conv4_3_norm_mbox_priorbox'],
                                  net['fc7_mbox_priorbox']],
                                 mode='concat', concat_axis=1,
                                 name='mbox_priorbox')

    net['predictions'] = merge([net['mbox_loc'],
                               net['mbox_conf'],
                               net['mbox_priorbox']],
                               mode='concat', concat_axis=2,
                               name='predictions')
    model = Model(net['input'], net['predictions'])
    return model
def mini_SSD300(input_shape=(300, 300, 3), num_classes=21):
    net = {}
    # Block 1
    input_tensor = input_tensor = Input(shape=input_shape)
    img_size = (input_shape[1], input_shape[0])
    net['input'] = input_tensor
    net['conv1_1'] = Convolution2D(64, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv1_1')(net['input'])
    net['conv1_2'] = Convolution2D(64, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv1_2')(net['conv1_1'])
    net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same',
                                name='pool1')(net['conv1_2'])
    # Block 2
    net['conv2_1'] = Convolution2D(128, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv2_1')(net['pool1'])
    net['conv2_2'] = Convolution2D(128, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv2_2')(net['conv2_1'])
    net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same',
                                name='pool2')(net['conv2_2'])
    # Block 3
    net['conv3_1'] = Convolution2D(256, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_1')(net['pool2'])
    net['conv3_2'] = Convolution2D(256, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_2')(net['conv3_1'])
    net['conv3_3'] = Convolution2D(256, 3, 3,
                                   activation='relu',
                                   border_mode='same',
                                   name='conv3_3')(net['conv3_2'])
    net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same',
                                name='pool3')(net['conv3_3'])

    net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv3_3'])
    num_priors = 6
    x = Convolution2D(num_priors * 4, 3, 3, border_mode='same',
                      name='conv4_3_norm_mbox_loc')(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_loc'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_loc_flat')
    net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc'])
    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same',
                      name=name)(net['conv4_3_norm'])
    net['conv4_3_norm_mbox_conf'] = x
    flatten = Flatten(name='conv4_3_norm_mbox_conf_flat')
    net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf'])
    priorbox = PriorBox(img_size, 30.0, max_size=60, aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv4_3_norm_mbox_priorbox')
    net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm'])

    # Prediction from fc7
    if hasattr(net['conv4_3_norm_mbox_loc_flat'], '_keras_shape'):
        num_boxes = net['conv4_3_norm_mbox_loc_flat']._keras_shape[-1] // 4
    elif hasattr(net['mbox_loc'], 'int_shape'):
        num_boxes = K.int_shape(net['conv4_3_norm_mbox_loc_flat'])[-1] // 4
    net['mbox_loc'] = Reshape((num_boxes, 4),
                              name='mbox_loc_final')(net['conv4_3_norm_mbox_loc_flat'])
    net['mbox_conf'] = Reshape((num_boxes, num_classes),
                               name='mbox_conf_logits')(net['conv4_3_norm_mbox_conf_flat'])
    net['mbox_conf'] = Activation('softmax',
                                  name='mbox_conf_final')(net['mbox_conf'])

    net['predictions'] = merge([net['mbox_loc'],
                               net['mbox_conf'],
                               net['conv4_3_norm_mbox_priorbox']],
                               mode='concat', concat_axis=2,
                               name='predictions')
    model = Model(net['input'], net['predictions'])
    return model