Esempio n. 1
0
    def __init__(self, phase, num_classes):
        super(BlazeFace, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.conv_1 = nn.Conv2d(3, 24, kernel_size=3, stride=2, padding=1, bias=True)
        self.bn_1 = nn.BatchNorm2d(24)
        self.relu = nn.ReLU(inplace=True)
        self.blaze_1 = BlazeBlock(24, 24)
        self.blaze_2 = BlazeBlock(24, 24)
        self.blaze_3 = BlazeBlock(24, 48, stride=2)
        self.blaze_4 = BlazeBlock(48, 48)
        self.blaze_5 = BlazeBlock(48, 48)
        self.blaze_6 = BlazeBlock(48, 24, 96, stride=2)
        self.blaze_7 = BlazeBlock(96, 24, 96)
        self.blaze_8 = BlazeBlock(96, 24, 96)
        self.blaze_9 = BlazeBlock(96, 24, 96, stride=2)
        self.blaze_10 = BlazeBlock(96, 24, 96)
        self.blaze_11 = BlazeBlock(96, 24, 96)

        self.apply(initialize)

        self.head = mbox([self.blaze_9, self.blaze_10], [2, 6], 2)

        self.loc = nn.ModuleList(self.head[0])
        self.conf = nn.ModuleList(self.head[1])

        self.cfg = (wider_face)
        # print(self.cfg)
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)

        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
Esempio n. 2
0
    def get_model(cls):
        """Get the model object for this instance, loading it if it's not already loaded."""
        trained_model = '/opt/ml/model/m2det512_vgg.pth'
        #trained_model = '../../m2det512_vgg.pth'

        anchor_config = anchors(cfg)
        print_info('The Anchor info: \n{}'.format(anchor_config))
        priorbox = PriorBox(anchor_config)
        net = build_net('test',
                        size=cfg.model.input_size,
                        config=cfg.model.m2det_config)
        init_net(net, cfg, trained_model)
        print_info('===> Finished constructing and loading model',
                   ['yellow', 'bold'])
        net.eval()
        with torch.no_grad():
            priors = priorbox.forward()
            if cfg.test_cfg.cuda:
                net = net.cuda()
                priors = priors.cuda()
                cudnn.benchmark = True
            else:
                net = net.cpu()
        _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means,
                                    (2, 0, 1))
        detector = Detect(cfg.model.m2det_config.num_classes,
                          cfg.loss.bkg_label, anchor_config)

        return net, priors, _preprocess, detector
Esempio n. 3
0
 def _create_optimization(self):
     self.optimizer = optim.SGD(self.model.parameters(),
                                lr=4e-3,
                                weight_decay=0,
                                momentum=0)
     self.criterion = MultiBoxLoss(self.num_classes, 0.5, True, 0, True, 3,
                                   0.5, False).to(device)
     self.priorbox = PriorBox(self.cfg)
     with torch.no_grad():
         self.priors = self.priorbox.forward()
         if torch.cuda.is_available():
             self.priors = self.priors.cuda()
Esempio n. 4
0
    def __init__(self,
                num_class = 21,
                levels = 3,
                num_channels = 128,
                model_name = 'efficientnet-b0'):
        super(EfficientDet, self).__init__()
        self.num_class = num_class 
        self.levels = levels
        self.num_channels = num_channels
        self.efficientnet = EfficientNet.from_pretrained(model_name)
        print('efficientnet: ', self.efficientnet)
        self.bifpn = BiFPN(num_channels = self.num_channels)

        self.cfg = (coco, voc)[num_class == 21]
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
Esempio n. 5
0
class Pelee_Det(object):
    def __init__(self):

        self.anchor_config = anchors(cfg.model)
        self.priorbox = PriorBox(self.anchor_config)
        self.net = build_net('test', cfg.model.input_size, cfg.model)
        init_net(self.net, cfg, args.trained_model)
        self.net.eval()

        self.num_classes = cfg.model.num_classes

        with torch.no_grad():
            self.priors = self.priorbox.forward()
            self.net = self.net.cuda()
            self.priors = self.priors.cuda()
            cudnn.benchmark = True
        self._preprocess = BaseTransform(cfg.model.input_size,
                                         cfg.model.rgb_means, (2, 0, 1))
        self.detector = Detect(num_classes, cfg.loss.bkg_label,
                               self.anchor_config)

    def detect(self, image):

        loop_start = time.time()
        w, h = image.shape[1], image.shape[0]
        img = self._preprocess(image).unsqueeze(0)
        if cfg.test_cfg.cuda:
            img = img.cuda()
        scale = torch.Tensor([w, h, w, h])
        out = self.net(img)
        boxes, scores = self.detector.forward(out, self.priors)
        boxes = (boxes[0] * scale).cpu().numpy()
        scores = scores[0].cpu().numpy()
        allboxes = []
        count = 0
        # for j in [2, 6, 7, 14, 15]:
        for j in range(1, len(ch_labels)):
            inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0]
            if len(inds) == 0:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            soft_nms = cfg.test_cfg.soft_nms
            keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms)
            keep = keep[:cfg.test_cfg.keep_per_class]
            c_dets = c_dets[keep, :]
            allboxes.extend([_.tolist() + [j] for _ in c_dets])

        loop_time = time.time() - loop_start
        allboxes = np.array(allboxes)
        boxes = allboxes[:, :4]
        scores = allboxes[:, 4]
        cls_inds = allboxes[:, 5]
        infos, im2show = draw_detection(image, boxes, scores, cls_inds, -1,
                                        args.thresh)
        return infos, im2show
Esempio n. 6
0
class EfficientDet(nn.Module):
    def __init__(self,
                num_class = 21,
                levels = 3,
                num_channels = 128,
                model_name = 'efficientnet-b0'):
        super(EfficientDet, self).__init__()
        self.num_class = num_class 
        self.levels = levels
        self.num_channels = num_channels
        self.efficientnet = EfficientNet.from_pretrained(model_name)
        print('efficientnet: ', self.efficientnet)
        self.bifpn = BiFPN(num_channels = self.num_channels)

        self.cfg = (coco, voc)[num_class == 21]
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        
    
    def forward(self, inputs):
        P1, P2, P3, P4, P5, P6, P7 = self.efficientnet(inputs)
        P3 = self.bifpn.Conv(in_channels=P3.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P3)
        P4 = self.bifpn.Conv(in_channels=P4.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P4)
        P5 = self.bifpn.Conv(in_channels=P5.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P5)
        P6 = self.bifpn.Conv(in_channels=P6.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P6)
        P7 = self.bifpn.Conv(in_channels=P7.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P7)
        for _ in range(self.levels):
            P3, P4, P5, P6, P7 = self.bifpn([P3, P4, P5, P6, P7])
        P = [P3, P4, P5, P6, P7]
        
        features_class = [self.class_net(p, self.num_class) for p in P]
        features_class = torch.cat(features_class, axis=0)
        features_bbox = [self.regression_net(p) for p in P]
        features_bbox = torch.cat(features_bbox, axis=0)
        output = (
                features_bbox.view(inputs.size(0), -1, 4),
                features_class.view(inputs.size(0), -1, self.num_class),
                self.priors
            )
        return output
        
    @staticmethod
    def class_net(features, num_class, num_anchor=5):
        features = nn.Sequential(
            nn.Conv2d(in_channels=features.size(1), out_channels=features.size(2), kernel_size = 3, stride=1),
            nn.Conv2d(in_channels=features.size(2), out_channels=num_anchor*num_class, kernel_size = 3, stride=1)
        )(features)
        features = features.view(-1, num_class)
        features = nn.Sigmoid()(features)
        return features 
    @staticmethod
    def regression_net(features, num_anchor=5):
        features = nn.Sequential(
            nn.Conv2d(in_channels=features.size(1), out_channels=features.size(2), kernel_size = 3, stride=1),
            nn.Conv2d(in_channels=features.size(2), out_channels=num_anchor*4, kernel_size = 3, stride=1)
        )(features)
        features = features.view(-1, 4)
        features = nn.Sigmoid()(features)
        return features 
Esempio n. 7
0
    def __init__(self):

        self.anchor_config = anchors(cfg.model)
        self.priorbox = PriorBox(self.anchor_config)
        self.net = build_net('test', cfg.model.input_size, cfg.model)
        init_net(self.net, cfg, args.trained_model)
        self.net.eval()

        self.num_classes = cfg.model.num_classes

        with torch.no_grad():
            self.priors = self.priorbox.forward()
            # self.net = self.net.cuda()
            # self.priors = self.priors.cuda()
            cudnn.benchmark = True
        self._preprocess = BaseTransform(cfg.model.input_size,
                                         cfg.model.rgb_means, (2, 0, 1))
        self.detector = Detect(num_classes, cfg.loss.bkg_label,
                               self.anchor_config)
Esempio n. 8
0
    def __init__(self, img_size=300, thresh=0.56):
        assert img_size == 300 or img_size == 512, 'net input image size must be 300 or 512'
        self.labels_name = LABELS_SET
        self.labels_numb = len(LABELS_SET)
        self.img_size = img_size
        self.cfg = VOC_300 if img_size == 300 else VOC_512
        self.thresh = thresh
        self.gpu_is_available = torch.cuda.is_available()
        self.gpu_numb = torch.cuda.device_count()
        self.net = build_net('test', self.img_size, self.labels_numb)
        self.detect = Detect(self.labels_numb, 0, self.cfg)
        self.transform = BaseTransform(self.img_size)

        # load net weights
        state_dict = torch.load(trained_model, map_location='cpu')
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            head = k[:7]
            if head == 'module.':
                name = k[7:]  # remove `module.`
            else:
                name = k
            new_state_dict[name] = v
        self.net.load_state_dict(new_state_dict)
        self.net.eval()
        print('Finished loading model!')

        if self.gpu_numb > 1:
            self.net = torch.nn.DataParallel(self.net,
                                             device_ids=list(
                                                 range(self.gpu_numb)))

        # set net gpu or cpu model
        if self.gpu_is_available:
            self.net.cuda()
            cudnn.benchmark = True

        # define box generator
        priorbox = PriorBox(self.cfg)
        with torch.no_grad():
            self.priors = priorbox.forward()
            if self.gpu_is_available:
                self.priors = self.priors.cuda()
    def __init__(self,
                 num_class=21,
                 levels=3,
                 num_channels=128,
                 model_name='efficientnet-b0'):
        super(EfficientDet, self).__init__()
        self.num_class = num_class
        self.levels = levels
        self.num_channels = num_channels
        self.efficientnet = EfficientNet.from_pretrained(model_name)

        self.cfg = (coco, voc)[num_class == 21]
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.num_anchor = 9
        self.class_module = list()
        self.regress_module = list()
        for _ in range(3, 8):
            self.class_module.append(
                nn.Sequential(
                    nn.Conv2d(in_channels=self.num_channels,
                              out_channels=64,
                              kernel_size=2,
                              stride=1),
                    nn.Conv2d(in_channels=64,
                              out_channels=self.num_anchor * num_class,
                              kernel_size=2,
                              stride=1)))
            self.regress_module.append(
                nn.Sequential(
                    nn.Conv2d(in_channels=self.num_channels,
                              out_channels=64,
                              kernel_size=2,
                              stride=1),
                    nn.Conv2d(in_channels=64,
                              out_channels=self.num_anchor * 4,
                              kernel_size=2,
                              stride=1)))
            self.BIFPN = BIFPN(in_channels=[40, 80, 112, 192, 320],
                               out_channels=self.num_channels,
                               num_outs=5)
            self.sigmoid = nn.Sigmoid()
    def __init__(self, phase, size, base, extras, head, num_classes):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = (coco, voc)[num_classes == 21]
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.size = size

        # SSD network
        self.vgg = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])

        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
Esempio n. 11
0
    def _init_model(self):
        if torch.cuda.is_available():
            cuda = True
        if '300' in self.model_path:
            cfg = COCO_300
            self.img_dim = 300
            print('Model input size is 300')
        else:
            cfg = COCO_512
            self.img_dim = 512
            print('Model input size is 512')

        priorbox = PriorBox(cfg)
        with torch.no_grad():
            priors = priorbox.forward()
            if cuda:
                self.priors = priors.cuda()

        self.net = build_rfb_vgg_net('test', self.img_dim, self.num_classes)  # initialize detector
        state_dict = torch.load(self.model_path)['state_dict']
        # create new OrderedDict that does not contain `module.`
        from collections import OrderedDict
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            head = k[:7]
            if head == 'module.':
                name = k[7:]  # remove `module.`
            else:
                name = k
            new_state_dict[name] = v
        self.net.load_state_dict(new_state_dict)
        self.net.eval()
        if cuda:
            self.net = self.net.cuda()
            cudnn.benchmark = True
        else:
            self.net = self.net.cpu()
        print('Finished loading model!')
        # print(net)
        self.detector = Detect(self.num_classes, 0, cfg)
def im_detect(net, im_org, target_size, transform, cuda, means):
    # im = cv2.resize(im_org,target_size,target_size,3)
    im = cv2.resize(np.array(im_org), (target_size, target_size),
                    interpolation=cv2.INTER_LINEAR).astype(np.float32)
    im -= means
    im = im.transpose((2, 0, 1))
    scale = torch.Tensor(
        [im_org.shape[1], im_org.shape[0], im_org.shape[1], im_org.shape[0]])

    x = Variable((torch.from_numpy(im)).unsqueeze(0), volatile=True)
    if cuda:
        x = x.cuda()
        scale = scale.cuda()

    out = net(x)

    cfg_temp = VOC_512
    cfg['min_dim'] = target_size
    size = math.ceil(target_size / 4)
    multi = target_size / 300
    for i in range(0, len(cfg['feature_maps'])):
        size = net.sizes[i]
        cfg['feature_maps'][i] = size
    # for i in range(0,len(cfg['min_sizes'])):
    #     cfg['min_sizes'][i] *= multi
    #     cfg['max_sizes'][i] *= multi
    priorbox_temp = PriorBox(cfg_temp)
    priors_temp = priorbox_temp.forward().cuda()
    priors_temp = Variable(priors_temp, volatile=True)

    boxes, scores = detector.forward(out, priors_temp)
    boxes = boxes[0]
    scores = scores[0]

    # scale = target_size
    boxes *= scale
    boxes = boxes.cpu().numpy()
    scores = scores.cpu().numpy()

    return (boxes, scores)
Esempio n. 13
0
    def __init__(self):
        super(RFB_GUI, self).__init__()

        MyMessageBox(self)
        self.setWindowTitle("RFB-GUI Demo Program")
        self.resize(1280, 900)
        self.setFocus()

        self.file_item = QtWidgets.QAction('Open image', self)
        self.file_item.setShortcut('Ctrl+O')
        self.file_item.triggered.connect(self.select_file)

        self.label = DragLabel(
            "Please drag image here\nor\nPress Ctrl+O to select", self)
        self.label.addAction(self.file_item)
        self.setCentralWidget(self.label)

        self.priorbox = PriorBox(self.cfg)
        self.cuda = True
        self.numclass = 21
        self.net = build_net('test', self.input_size,
                             self.numclass)  # initialize detector
        state_dict = torch.load(self.trained_model)

        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            head = k[:7]
            if head == 'module.':
                name = k[7:]
            else:
                name = k
            new_state_dict[name] = v
        self.net.load_state_dict(new_state_dict)
        self.net.eval()
        if self.cuda:
            self.net = self.net.cuda()
            cudnn.benchmark = True
        else:
            self.net = self.net.cpu()
        print('Finished loading model!')
net = build_net(
    'train',
    size=cfg.model.input_size,  # Only 320, 512, 704 and 800 are supported
    config=cfg.model.m2det_config)
init_net(net, cfg, args.resume_net
         )  # init the network with pretrained weights or resumed weights

if args.ngpu > 1:
    net = torch.nn.DataParallel(net)
if cfg.train_cfg.cuda:
    net.cuda()
    cudnn.benchmark = True

optimizer = set_optimizer(net, cfg)
criterion = set_criterion(cfg)
priorbox = PriorBox(anchors(cfg))

with torch.no_grad():
    priors = priorbox.forward()
    if cfg.train_cfg.cuda:
        priors = priors.cuda()

if __name__ == '__main__':
    net.train()
    epoch = args.resume_epoch
    print_info('===> Loading Dataset...', ['yellow', 'bold'])
    dataset = get_dataloader(cfg, args.dataset, 'train_sets')
    epoch_size = len(dataset) // (cfg.train_cfg.per_batch_size * args.ngpu)
    max_iter = getattr(cfg.train_cfg.step_lr, args.dataset)[-1] * epoch_size
    stepvalues = [
        _ * epoch_size
Esempio n. 15
0
        new_state_dict[name] = v
    net.load_state_dict(new_state_dict)

if args.ngpu > 1:
    net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))

if args.cuda:
    net.cuda()
    cudnn.benchmark = True

detector = Detect(num_classes, 0, cfg)
optimizer = optim.SGD(net.parameters(), lr=args.lr,
                      momentum=args.momentum, weight_decay=args.weight_decay)

criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False)
priorbox = PriorBox(cfg)
priors = Variable(priorbox.forward())
# dataset
print('Loading Dataset...')
if args.dataset == 'VOC':
    testset = VOCDetection(
        VOCroot, [('2007', 'test')], None, AnnotationTransform())
    train_dataset = VOCDetection(VOCroot, train_sets, preproc(
        img_dim, rgb_means, rgb_std, p), AnnotationTransform())
elif args.dataset == 'COCO':
    testset = COCODetection(
        COCOroot, [('2017', 'val')], None)
    #testset = COCODetection(COCOroot, [('2017', 'test-dev')], None)
    train_dataset = COCODetection(COCOroot, train_sets, preproc(
        img_dim, rgb_means, rgb_std, p))
else:
Esempio n. 16
0
logger = set_logger(args.tensorboard)
global cfg
cfg = Config.fromfile(args.config)
net = get_network(build_net, cfg, args.dataset)
init_net(net, cfg, args.resume_net)  # init the network with pretrained
# weights or resumed weights

if args.ngpu > 1:
    net = torch.nn.DataParallel(net)
if cfg.train_cfg.cuda:
    net.cuda()
    cudnn.benckmark = True

optimizer = set_optimizer(net, cfg)
criterion = set_criterion(cfg, args.dataset)
priorbox = PriorBox(anchors(cfg.model, args.dataset))

with torch.no_grad():
    priors = priorbox.forward()
    if cfg.train_cfg.cuda:
        priors = priors.cuda()

if __name__ == '__main__':
    net.train()
    epoch = args.resume_epoch
    print_info('===> Loading Dataset...', ['yellow', 'bold'])
    dataset = get_dataloader(cfg, args.dataset, 'train_sets')
    epoch_size = len(dataset) // (cfg.train_cfg.per_batch_size * args.ngpu)
    max_iter = getattr(cfg.train_cfg.step_lr, args.dataset)[-1] * epoch_size
    stepvalues = [
        _ * epoch_size
Esempio n. 17
0
def handler(context):
    dataset_alias = context.datasets

    trainval_dataset_id = dataset_alias['trainval']
    test_dataset_id = dataset_alias['test']

    trainval_dataset = list(load_dataset_from_api(trainval_dataset_id))
    test_dataset = list(load_dataset_from_api(test_dataset_id))

    trainval = DetectionDatasetFromAPI(trainval_dataset,
                                       transform=SSDAugmentation(
                                           min_dim, MEANS))
    test = DetectionDatasetFromAPI(test_dataset,
                                   transform=SSDAugmentation(min_dim, MEANS))
    train_dataset = trainval
    test_dataset = test

    priorbox = PriorBox(min_dim, PARAMS)
    with torch.no_grad():
        priors = priorbox.forward().to(device)

    ssd_net = build_ssd('train', priors, min_dim, num_classes)
    ssd_net = ssd_net.to(device)

    url = 'https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth'
    weight_file = os.path.join(ABEJA_TRAINING_RESULT_DIR,
                               'vgg16_reducedfc.pth')
    download(url, weight_file)

    vgg_weights = torch.load(weight_file)
    print('Loading base network...')
    ssd_net.vgg.load_state_dict(vgg_weights)

    optimizer = optim.SGD(ssd_net.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=5e-4)
    criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False,
                             PARAMS['variance'], device)

    # loss counters
    step_index = 0

    trainloader = data.DataLoader(train_dataset,
                                  batch_size,
                                  num_workers=0,
                                  shuffle=True,
                                  collate_fn=tools.detection_collate,
                                  pin_memory=True)

    testloader = data.DataLoader(test_dataset,
                                 batch_size,
                                 num_workers=0,
                                 shuffle=False,
                                 collate_fn=tools.detection_collate,
                                 pin_memory=True)

    # create batch iterator
    iteration = 1
    while iteration <= max_iter:
        ssd_net.train()
        for images, targets in trainloader:
            if iteration > max_iter:
                break

            if iteration in lr_steps:
                step_index += 1
                adjust_learning_rate(optimizer, 0.1, step_index)

            # load train data
            images = images.to(device)
            targets = [ann.to(device) for ann in targets]

            # forward
            out = ssd_net(images)

            # backprop
            optimizer.zero_grad()
            loss_l, loss_c = criterion(out, targets)
            loss = loss_l + loss_c
            loss.backward()
            optimizer.step()

            if iteration % 100 == 0:
                print('[Train] iter {}, loss: {:.4f}'.format(
                    iteration, loss.item()))
                statistics(iteration, loss.item(), None, None, None)
                writer.add_scalar('main/loss', loss.item(), iteration)
                writer.add_scalar('main/loc_loss', loss_l.item(), iteration)
                writer.add_scalar('main/conf_loss', loss_c.item(), iteration)

            if iteration % 10000 == 0:
                eval(testloader, ssd_net, criterion, iteration)
                ssd_net.train()

            iteration += 1
    torch.save(ssd_net.state_dict(),
               os.path.join(ABEJA_TRAINING_RESULT_DIR, 'model.pth'))
Esempio n. 18
0
def demo(v_f):
    cfg = Config.fromfile(config_f)
    anchor_config = anchors(cfg)
    priorbox = PriorBox(anchor_config)
    net = build_net('test',
                    size=cfg.model.input_size,
                    config=cfg.model.m2det_config)
    init_net(net, cfg, checkpoint_path)
    net.eval().to(device)
    with torch.no_grad():
        priors = priorbox.forward().to(device)
    _preprocess = BaseTransform(
        cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1))
    detector = Detect(cfg.model.m2det_config.num_classes,
                      cfg.loss.bkg_label, anchor_config)
    logging.info('detector initiated.')

    cap = cv2.VideoCapture(v_f)
    logging.info('detect on: {}'.format(v_f))
    logging.info('video width: {}, height: {}'.format(int(cap.get(3)), int(cap.get(4))))
    out_video = cv2.VideoWriter("result.mp4", cv2.VideoWriter_fourcc(*'MJPG'), 24, (int(cap.get(3)), int(cap.get(4))))

    while True:
        ret, image = cap.read()
        if not ret:
            out_video.release()
            cv2.destroyAllWindows()
            cap.release()
            break
        w, h = image.shape[1], image.shape[0]
        img = _preprocess(image).unsqueeze(0).to(device)
        scale = torch.Tensor([w, h, w, h])
        out = net(img)
        boxes, scores = detector.forward(out, priors)
        boxes = (boxes[0]*scale).cpu().numpy()
        scores = scores[0].cpu().numpy()
        allboxes = []
        for j in range(1, cfg.model.m2det_config.num_classes):
            inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0]
            if len(inds) == 0:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(
                np.float32, copy=False)
            soft_nms = cfg.test_cfg.soft_nms
            # min_thresh, device_id=0 if cfg.test_cfg.cuda else None)
            keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms)
            keep = keep[:cfg.test_cfg.keep_per_class]
            c_dets = c_dets[keep, :]
            allboxes.extend([_.tolist()+[j] for _ in c_dets])
        if len(allboxes) > 0:
            allboxes = np.array(allboxes)
            # [boxes, scores, label_id] -> [id, score, boxes] 0, 1, 2, 3, 4, 5
            allboxes = allboxes[:, [5, 4, 0, 1, 2, 3]]
            logging.info('allboxes shape: {}'.format(allboxes.shape))
            res = visualize_det_cv2(image, allboxes, classes=classes, thresh=0.2)
            # res = visualize_det_cv2_fancy(image, allboxes, classes=classes, thresh=0.2, r=4, d=6)
            cv2.imshow('rr', res)
            out_video.write(res)
            cv2.waitKey(1)
def train(cfg):
    cfg = Config.fromfile(cfg)
    net = build_net('train',
                    size=cfg.model.input_size,  # Only 320, 512, 704 and 800 are supported
                    config=cfg.model.m2det_config)
    init_net(net, cfg, False)
    net.to(device)
    if os.path.exists(checkpoint_path.format(start_epoch)):
        checkpoints = torch.load(checkpoint_path.format(start_epoch))
        net.load_state_dict(checkpoints)
        logging.info('checkpoint loaded.')

    optimizer = optim.SGD(net.parameters(),
                          lr=cfg.train_cfg.lr[0],
                          momentum=cfg.optimizer.momentum,
                          weight_decay=cfg.optimizer.weight_decay)
    criterion = MultiBoxLoss(cfg.model.m2det_config.num_classes,
                             overlap_thresh=cfg.loss.overlap_thresh,
                             prior_for_matching=cfg.loss.prior_for_matching,
                             bkg_label=cfg.loss.bkg_label,
                             neg_mining=cfg.loss.neg_mining,
                             neg_pos=cfg.loss.neg_pos,
                             neg_overlap=cfg.loss.neg_overlap,
                             encode_target=cfg.loss
                             .encode_target)
    priorbox = PriorBox(anchors(cfg))
    with torch.no_grad():
        priors = priorbox.forward().to(device)
    net.train()

    anchor_config = anchors(cfg)
    detector = Detect(cfg.model.m2det_config.num_classes,
                      cfg.loss.bkg_label, anchor_config)
    logging.info('detector initiated.')

    dataset = get_dataloader(cfg, 'Helmet', 'train_sets')
    train_ds = DataLoader(dataset, cfg.train_cfg.per_batch_size,
                          shuffle=True,
                          num_workers=0,
                          collate_fn=detection_collate)
    logging.info('dataset loaded, start to train...')

    for epoch in range(start_epoch, cfg.model.epochs):
        for i, data in enumerate(train_ds):
            try:
                lr = adjust_learning_rate_helmet(optimizer, epoch, cfg)
                images, targets = data
                images = images.to(device)
                targets = [anno.to(device) for anno in targets]
                out = net(images)

                optimizer.zero_grad()
                loss_l, loss_c = criterion(out, priors, targets)
                loss = loss_l + loss_c
                loss.backward()
                optimizer.step()

                if i % 30 == 0:
                    logging.info('Epoch: {}, iter: {}, loc_loss: {}, conf_loss: {}, loss: {}, lr: {}'.format(
                        epoch, i, loss_l.item(), loss_c.item(), loss.item(), lr
                    ))

                if i % 2000 == 0:
                    # two_imgs = images[0:2, :]
                    # out = net(two_imgs)
                    # snap_middle_result(two_imgs[0], out[0], priors, detector, cfg, epoch)
                    torch.save(net.state_dict(), checkpoint_path.format(epoch))
                    logging.info('model saved.')
            except KeyboardInterrupt:
                torch.save(net.state_dict(), checkpoint_path.format(epoch))
                logging.info('model saved.')
                exit(0)
    torch.save(net.state_dict(), checkpoint_path.format(epoch))
Esempio n. 20
0
if args.gpu_id:
    net = torch.nn.DataParallel(net, device_ids=args.gpu_id)

if args.cuda:
    net.cuda()
    cudnn.benchmark = True

optimizer = optim.SGD(net.parameters(),
                      lr=args.lr,
                      momentum=args.momentum,
                      weight_decay=args.weight_decay)
#optimizer = optim.RMSprop(net.parameters(), lr=args.lr,alpha = 0.9, eps=1e-08,
#                     momentum=args.momentum, weight_decay=args.weight_decay)

criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False)
priorbox = PriorBox(cfg)
priors = Variable(priorbox.forward(), volatile=True)
#dataset
print('Loading Dataset...')
if args.dataset == 'VOC':
    testset = VOCDetection(VOCroot, [('2007', 'test')], None,
                           AnnotationTransform())
    train_dataset = VOCDetection(VOCroot, train_sets,
                                 preproc(img_dim, rgb_means, p, rgb_std),
                                 AnnotationTransform())
elif args.dataset == 'COCO':
    testset = COCODetection(COCOroot, [('2014', 'minival')], None)
    train_dataset = COCODetection(COCOroot, train_sets,
                                  preproc(img_dim, rgb_means, p, rgb_std))
else:
    print('Only VOC and COCO are supported now!')
Esempio n. 21
0
    plt.show()

if __name__ == "__main__":
    Image = os.listdir('image/')
    for img_name in Image:
        img = cv2.imread("image/"+img_name)
        model = 'fssd_voc_79_74.pth'
        net = build_net(300, 21)
        state_dict = torch.load(model)
        from collections import OrderedDict
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            head = k[:7]
            if head == 'module.':
                name = k[7:] # remove `module.`
            else:
                name = k
            new_state_dict[name] = v
        net.load_state_dict(new_state_dict)
        net.eval()
        net = net.cuda()
        cudnn.benchmark = True
        print("Finished loading model")
        transform = BaseTransform(300, (104, 117, 123), (2, 0, 1))
        detector = Detect(21, 0, VOC_300)
        priorbox = PriorBox(VOC_300)
        with torch.no_grad():
            priors = priorbox.forward()
            priors = priors.cuda()
        test_net(net, img, img_name, detector, transform, priors,top_k=200, thresh=0.4)
from layers.bbox_utils import match, match_ssd, decode

import matplotlib.pyplot as plt

dataset = WIDERDetection(cfg.TRAIN_FILE,
                         transform=S3FDValTransform(cfg.INPUT_SIZE),
                         train=False)

data_loader = data.DataLoader(dataset,
                              64,
                              num_workers=4,
                              shuffle=False,
                              collate_fn=detection_collate,
                              pin_memory=True)

anchor_boxes = PriorBox(cfg).forward()
num_priors = anchor_boxes.size(0)
variance = cfg.VARIANCE

savepath = 'tmp'
if not os.path.exists(savepath):
    os.makedirs(savepath)

filename = os.path.join(savepath, 'match_anchor.pkl')


def anchor_match_count():
    anchor_scale_map = {16: 0, 32: 0, 64: 0, 128: 0, 256: 0, 512: 0}
    thresh = cfg.OVERLAP_THRESH
    sfd_scales = []
    for idx, (_, target) in enumerate(data_loader):
Esempio n. 23
0
if args.ngpu > 1:
    net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))

if args.cuda:
    net.cuda()
    cudnn.benchmark = True


optimizer = optim.SGD(net.parameters(), lr=args.lr,
                      momentum=args.momentum, weight_decay=args.weight_decay)
#optimizer = optim.RMSprop(net.parameters(), lr=args.lr,alpha = 0.9, eps=1e-08,
#                      momentum=args.momentum, weight_decay=args.weight_decay)

criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False)
priorbox = PriorBox(cfg)
priors = Variable(priorbox.forward(), volatile=True)


def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0 + args.resume_epoch
    print('Loading Dataset...')

    if args.dataset == 'VOC':
        dataset = VOCDetection(VOCroot, train_sets, preproc(
            img_dim, rgb_means, p), AnnotationTransform())
    elif args.dataset == 'COCO':
Esempio n. 24
0
def main():
    mean = (104, 117, 123)
    print('loading model!')
    if deform:
        from model.dualrefinedet_vggbn import build_net
        net = build_net('test',
                        size=ssd_dim,
                        num_classes=num_classes,
                        c7_channel=1024,
                        def_groups=deform,
                        multihead=multihead,
                        bn=bn)
    else:
        from model.refinedet_vgg import build_net
        net = build_net('test',
                        size=ssd_dim,
                        num_classes=num_classes,
                        use_refine=refine,
                        c7_channel=1024,
                        bn=bn)
    net.load_state_dict(torch.load(trained_model))
    net.eval()
    print('Finished loading model!', trained_model)
    net = net.to(device)
    detector = Detect(num_classes, 0, top_k, confidence_threshold,
                      nms_threshold)
    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward().to(device)
    for i, line in enumerate(open(img_set, 'r')):
        # if i==10:
        #     break
        if 'COCO' in dataset:
            image_name = line[:-1]
            image_id = int(image_name.split('_')[-1])
        elif 'VOC' in dataset:
            image_name = line[:-1]
            image_id = -1
        else:
            image_name, image_id = line.split(' ')
            image_id = image_id[:-1]
        print(i, image_name, image_id)
        image_path = os.path.join(img_root, image_name + '.jpg')
        image = cv2.imread(image_path, 1)
        h, w, _ = image.shape
        image_draw = cv2.resize(image.copy(), (640, 480))
        im_trans = base_transform(image, ssd_dim, mean)
        ######################## Detection ########################
        with torch.no_grad():
            x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1,
                                                                2).to(device)
            if 'RefineDet' in backbone and refine:
                arm_loc, _, loc, conf = net(x)
            else:
                loc, conf = net(x)
                arm_loc = None
            detections = detector.forward(loc,
                                          conf,
                                          priors,
                                          arm_loc_data=arm_loc)
        ############################################################
        out = list()
        for j in range(1, detections.size(1)):
            dets = detections[0, j, :]
            if dets.sum() == 0:
                continue
            mask = dets[:, 0].gt(0.).expand(dets.size(-1), dets.size(0)).t()
            dets = torch.masked_select(dets, mask).view(-1, dets.size(-1))
            boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:]
            boxes[:, 0] *= w
            boxes[:, 2] *= w
            boxes[:, 1] *= h
            boxes[:, 3] *= h
            scores = dets[:, 0].cpu().numpy()
            boxes_np = boxes.cpu().numpy()

            for b, s in zip(boxes_np, scores):
                if save_dir:
                    out.append(
                        [int(b[0]),
                         int(b[1]),
                         int(b[2]),
                         int(b[3]), j - 1, s])
                    if 'COCO' in dataset:
                        det_list.append({
                            'image_id':
                            image_id,
                            'category_id':
                            labelmap[j],
                            'bbox': [
                                float('{:.1f}'.format(b[0])),
                                float('{:.1f}'.format(b[1])),
                                float('{:.1f}'.format(b[2] - b[0] + 1)),
                                float('{:.1f}'.format(b[3] - b[1] + 1))
                            ],
                            'score':
                            float('{:.2f}'.format(s))
                        })
                    else:
                        results_file.write(
                            str(image_id) + ' ' + str(j) + ' ' + str(s) + ' ' +
                            str(np.around(b[0], 2)) + ' ' +
                            str(np.around(b[1], 2)) + ' ' +
                            str(np.around(b[2], 2)) + ' ' +
                            str(np.around(b[3], 2)) + '\n')
                if display:
                    cv2.rectangle(image_draw,
                                  (int(b[0] / w * 640), int(b[1] / h * 480)),
                                  (int(b[2] / w * 640), int(b[3] / h * 480)),
                                  (0, 255, 0),
                                  thickness=1)

                    cls = class_name[j] if 'COCO' in dataset else str(
                        labelmap[j - 1])
                    put_str = cls + ':' + str(np.around(s, decimals=2))
                    cv2.putText(
                        image_draw,
                        put_str,
                        (int(b[0] / w * 640), int(b[1] / h * 480) - 10),
                        cv2.FONT_HERSHEY_DUPLEX,
                        0.5,
                        color=(0, 255, 0),
                        thickness=1)
        if display:
            cv2.imshow('frame', image_draw)
            ch = cv2.waitKey(0)
            if ch == 115:
                if save_dir:
                    print('save: ', line)
                    torch.save(
                        out, os.path.join(save_dir, '%s.pkl' % str(line[:-1])))
                    cv2.imwrite(
                        os.path.join(save_dir, '%s.jpg' % str(line[:-1])),
                        image)
                    cv2.imwrite(
                        os.path.join(save_dir, '%s_box.jpg' % str(line[:-1])),
                        image_draw)

    cv2.destroyAllWindows()
    if save_dir:
        if dataset == 'COCO':
            json.dump(det_list, results_file)
        results_file.close()
def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0 + args.resume_epoch
    print('Loading Dataset...')

    if args.dataset == 'VOC':
        dataset = VOCDetection(VOCroot, train_sets,
                               preproc(img_dim, rgb_means, p),
                               AnnotationTransform())
    elif args.dataset == 'COCO':
        # dataset = COCODetection(COCOroot, train_sets, preproc(
        #     img_dim, rgb_means, p))
        print('COCO not supported now!')
        return
    elif args.dataset == 'CUSTOM':
        dataset = CustomDetection(CUSTOMroot, train_sets,
                                  preproc(img_dim, rgb_means, p),
                                  CustomAnnotationTransform())
        dataset_512 = CustomDetection(CUSTOMroot, train_sets,
                                      preproc(512, rgb_means, p),
                                      CustomAnnotationTransform())
    else:
        print('Only VOC and COCO are supported now!')
        return

    epoch_size = len(dataset) // args.batch_size
    max_iter = args.max_epoch * epoch_size

    stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size
                      )  # (80000,100000,120000)
    stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size)
    stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO']
    print('Training', args.version, 'on', dataset.name)
    step_index = 0

    if args.resume_epoch > 0:
        start_iter = args.resume_epoch * epoch_size
    else:
        start_iter = 0

    lr = args.lr
    image_size = 0
    for iteration in range(start_iter, max_iter):
        if iteration % epoch_size == 0:
            # create batch iterator
            image_size = ('300', '512')[1]  #[random.randint(0,1)]
            batch_iterator = iter(
                data.DataLoader((dataset, dataset_512)[image_size == '512'],
                                batch_size,
                                shuffle=True,
                                num_workers=args.num_workers,
                                collate_fn=detection_collate))
            priorbox = PriorBox((VOC_300_2, VOC_512_3)[image_size == '512'])
            priors = Variable(priorbox.forward(), volatile=True)
            loc_loss = 0
            conf_loss = 0
            #if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > 200):
            #torch.save(net.state_dict(), args.save_folder + args.version + '_' + args.dataset + '_epoches_' +
            #repr(epoch) + '.pth')
            epoch += 1

        if iteration in stepvalues:
            step_index += 1
        lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index,
                                  iteration, epoch_size)

        # load train data
        images, targets = next(batch_iterator)

        # print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets]))

        if args.cuda:
            images = Variable(images.cuda())
            targets = [
                Variable(anno.cuda(), volatile=True) for anno in targets
            ]
        else:
            images = Variable(images)
            targets = [Variable(anno, volatile=True) for anno in targets]
        # forward
        load_t0 = time.time()
        # t0 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, priors, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()
        # t1 = time.time()
        load_t1 = time.time()
        loc_loss += loss_l.data[0]
        conf_loss += loss_c.data[0]

        if iteration % 100 == 0:
            print('Epoch:' + repr(epoch) + ' || image-size:' +
                  repr(image_size) + ' || epochiter: ' +
                  repr(iteration % epoch_size) + '/' + repr(epoch_size) +
                  '|| Totel iter ' + repr(iteration) +
                  ' || L: %.4f C: %.4f||' % (loss_l.data[0], loss_c.data[0]) +
                  'Batch time: %.4f sec. ||' % (load_t1 - load_t0) +
                  'LR: %.8f' % (lr))
        if iteration <= 110000 and (iteration == 0 or iteration % 10000 == 0):
            print('Saving state, iter:', iteration)
            torch.save(net.state_dict(),
                       'weights/ssd300_2_VOC_' + repr(iteration) + '.pth')
        elif (iteration > 110000) and iteration % 1000 == 0:
            print('Saving state, iter:', iteration)
            torch.save(net.state_dict(),
                       'weights/ssd300_2_VOC_' + repr(iteration) + '.pth')

    torch.save(
        net.state_dict(), args.save_folder + 'Final_' + args.version + '_' +
        args.dataset + '.pth')
Esempio n. 26
0
parser = argparse.ArgumentParser()
parser.add_argument("--prune_folder", default = "prunes/")
parser.add_argument("--trained_model", default = "prunes/refineDet_trained.pth")
parser.add_argument('--dataset_root', default=VOC_ROOT)
parser.add_argument("--cut_ratio", default=0.2, type=float)
parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda to train model')
#for test_net: 200 in SSD paper, 200 for COCO, 300 for VOC
parser.add_argument('--max_per_image', default=200, type=int,
                    help='Top number of detections kept per image, further restrict the number of predictions to parse')
args = parser.parse_args()

cfg = voc320

# different from normal ssd, where the PriorBox is stored inside SSD object
priorbox = PriorBox(cfg)
priors = Variable(priorbox.forward().cuda(), volatile=True) # set the priors to cuda
detector = RefineDetect(cfg['num_classes'], 0, cfg, object_score=0.01)

def test_net(save_folder, net, detector, priors, cuda,
             testset, transform, max_per_image=200, thresh=0.05): # max_per_image is same as top_k

    if not os.path.exists(save_folder):
        os.mkdir(save_folder)

    num_images = len(testset)
    num_classes = len(labelmap)                      # +1 for background
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
Esempio n. 27
0
                             size=ssd_dim,
                             num_classes=num_classes,
                             use_refine=args.refine,
                             c7_channel=args.c7_channel)
     else:
         net = None
     print('loading model!', args.model_dir, args.iteration)
     net.load_state_dict(torch.load(trained_model))
     print(net)
     net.eval()
     print('Finished loading model!', args.model_dir, args.iteration,
           'tub=' + str(args.tub), 'tub_thresh=' + str(args.tub_thresh),
           'tub_score=' + str(args.tub_generate_score))
     detector = Detect(num_classes, 0, args.top_k,
                       args.confidence_threshold, args.nms_threshold)
     priorbox = PriorBox(cfg)
     # priorbox=PriorBox(multi_cfg['2.2'])
     with torch.no_grad():
         priors = priorbox.forward().to(device)
     # load data
     net = net.to(device)
     # evaluation
     test_net(args.save_folder, net, dataset,
              BaseTransform(net.size, dataset_mean), args.top_k, detector,
              priors)
 else:
     out_dir = get_output_dir(
         pkl_dir, args.iteration + '_' + args.dataset_name + '_' +
         args.set_file_name)
     print('Without detection', out_dir)
     do_python_eval(out_dir)
Esempio n. 28
0
parser.add_argument('-c', '--config', default='configs/m2det320_vgg.py', type=str)
parser.add_argument('-d', '--dataset', default='COCO', help='VOC or COCO version')
parser.add_argument('-m', '--trained_model', default=None, type=str, help='Trained state_dict file path to open')
parser.add_argument('--test', action='store_true', help='to submit a test file')
args = parser.parse_args()

print_info('----------------------------------------------------------------------\n'
           '|                       M2Det Evaluation Program                     |\n'
           '----------------------------------------------------------------------', ['yellow','bold'])
global cfg
cfg = Config.fromfile(args.config)
if not os.path.exists(cfg.test_cfg.save_folder):
    os.mkdir(cfg.test_cfg.save_folder)
anchor_config = anchors(cfg)
print_info('The Anchor info: \n{}'.format(anchor_config))
priorbox = PriorBox(anchor_config)
with torch.no_grad():
    priors = priorbox.forward()
    if cfg.test_cfg.cuda:
        priors = priors.cuda()

def test_net(save_folder, net, detector, cuda, testset, transform, max_per_image=300, thresh=0.005):
    if not os.path.exists(save_folder):
        os.mkdir(save_folder)

    num_images = len(testset)
    print_info('=> Total {} images to test.'.format(num_images),['yellow','bold'])
    num_classes = cfg.model.m2det_config.num_classes
    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]

    _t = {'im_detect': Timer(), 'misc': Timer()}
Esempio n. 29
0
def test_net(save_folder, net, dataset, transform, top_k, detector, priors):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(dataset)
    # all detections are collected into:score
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(len(labelmap) + 1)]

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}
    all_time = 0.
    output_dir = get_output_dir(
        pkl_dir,
        args.iteration + '_' + args.dataset_name + '_' + args.set_file_name)
    det_file = os.path.join(output_dir, 'detections.pkl')
    output_dir = get_output_dir(output_dir, 'multi_test')
    ######################### Multiscale PriorBox #####################
    priorboxes = {}
    for v1 in multi_scale[str(ssd_dim)]:
        if not multi_cfg[str(v1)]:
            return ("not included this multi_scale")
        priorbox = PriorBox(multi_cfg[str(v1)])
        img_size = multi_cfg[str(v1)]['min_dim']
        with torch.no_grad():
            priorboxes[str(img_size)] = priorbox.forward().to(device)
    ########################## Detection ##############################
    for i in range(num_images):
        _t['im_detect'].tic()
        image = dataset.pull_image(i)
        h, w, _ = image.shape
        detections_multi = {}
        for v in multi_scale[str(ssd_dim)]:
            priors = priorboxes[str(v)]
            ssd_dim_temp = int(v)
            for loop in range(2):
                if (loop == 0):
                    im_trans = base_transform(image, ssd_dim_temp,
                                              dataset_mean)
                    im_trans = im_trans[:, :, (2, 1, 0)]
                else:
                    im_f = image.copy()
                    im_f = cv2.flip(im_f, 1)
                    im_trans = base_transform(im_f, ssd_dim_temp, dataset_mean)
                    im_trans = im_trans[:, :, (2, 1, 0)]
                with torch.no_grad():
                    x = torch.from_numpy(im_trans).unsqueeze(0).permute(
                        0, 3, 1, 2).to(device)
                    if 'RefineDet' in args.backbone and args.refine:
                        arm_loc, _, loc, conf = net(x)
                        detections = detector.forward(loc,
                                                      conf,
                                                      priors,
                                                      arm_loc_data=arm_loc)
                        detections_multi[str(ssd_dim) + '_' + str(v) + '_' +
                                         str(loop)] = detections.clone()
                    else:
                        loc, conf = net(x)
                        arm_loc = None
                        detections = detector.forward(loc,
                                                      conf,
                                                      priors,
                                                      arm_loc_data=arm_loc)
                        detections_multi[str(ssd_dim) + '_' + str(v) + '_' +
                                         str(loop)] = detections.clone()
        detect_time = _t['im_detect'].toc(average=False)
        if i > 10:
            all_time += detect_time
    ###################################################################
        for j in range(1, detections.size(1)):
            cls_dets = np.array([])
            for k, d in detections_multi.items():
                dets = d[0, j, :]
                if dets.sum() == 0:
                    continue
                mask = dets[:, 0].gt(0.).expand(dets.size(-1),
                                                dets.size(0)).t()
                dets = torch.masked_select(dets, mask).view(-1, dets.size(-1))
                boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:]
                if (k[-1] == '1'):
                    boxes[:, 0] = 1 - boxes[:, 0]
                    boxes[:, 2] = 1 - boxes[:, 2]
                    temp_swap = boxes[:, 0].clone()
                    boxes[:, 0] = boxes[:, 2]
                    boxes[:, 2] = temp_swap
                boxes[:, 0] *= w
                boxes[:, 2] *= w
                boxes[:, 1] *= h
                boxes[:, 3] *= h
                if k in ['320_192_0', '320_192_1', '512_320_0', '512_320_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.maximum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) > 32)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in ['320_320_0', '320_320_1', '512_512_0', '512_512_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.maximum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) > 0)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in ['320_384_0', '320_384_1', '512_640_0', '512_640_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.minimum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) < 160)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in ['320_448_0', '320_448_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.minimum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) < 128)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in ['320_512_0', '320_512_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.minimum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) < 96)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in ['320_576_0', '320_576_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.minimum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) < 64)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in [
                        '320_706_0', '320_706_1', '512_1216_0', '512_1216_1'
                ]:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.minimum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) < 32)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                if (index_temp.size == 0):
                    continue
                scores = dets[index_temp, 0].cpu().numpy()
                cls_dets_temp = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                if (cls_dets.size == 0):
                    cls_dets = cls_dets_temp.copy()
                else:
                    cls_dets = np.concatenate((cls_dets, cls_dets_temp),
                                              axis=0)
            if (cls_dets.size != 0):
                cls_dets = bbox_vote(cls_dets)
                if (len(cls_dets) != 0):
                    all_boxes[j][i] = cls_dets
        print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images,
                                                    detect_time))
    FPS = (num_images - 10) / all_time
    print('FPS:', FPS)
    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    evaluate_detections(all_boxes, output_dir, dataset, FPS=FPS)
Esempio n. 30
0
                    help='Dir to save results')
parser.add_argument('-m', '--trained_model', default='weights/epoches_260.pth',
                    type=str, help='Trained state_dict file path to open')
parser.add_argument('--cuda', default=True, type=bool,
                    help='Use cuda to train model')
parser.add_argument('--cpu', default=False, type=bool,
                    help='Use cpu nms')
args = parser.parse_args()


cfg = VOC_Config
img_dim = 300
num_classes = 2
rgb_means = (104, 117, 123)

priorbox = PriorBox(cfg)
with torch.no_grad():
    priors = priorbox.forward()
    if args.cuda:
        priors = priors.cuda()


class ObjectDetector:
    def __init__(self, net, detection, transform, num_classes=2, thresh=0.1, cuda=True):
        self.net = net
        self.detection = detection
        self.transform = transform
        self.num_classes = num_classes
        self.thresh = thresh
        self.cuda = cuda
Esempio n. 31
0
    def __init__(self):
        self.cfg = cfg

        # Load data
        print('===> Loading data')
        self.train_loader = load_data(
            cfg.dataset, 'train') if 'train' in cfg.phase else None
        self.eval_loader = load_data(cfg.dataset,
                                     'eval') if 'eval' in cfg.phase else None
        self.test_loader = load_data(cfg.dataset,
                                     'test') if 'test' in cfg.phase else None
        # self.visualize_loader = load_data(cfg.DATASET, 'visualize') if 'visualize' in cfg.PHASE else None

        # Build model
        print('===> Building model')
        self.base_trans = BaseTransform(cfg.image_size[0],
                                        cfg.network.rgb_means,
                                        cfg.network.rgb_std, (2, 0, 1))
        self.priors = PriorBox(cfg.anchor)
        self.model = eval(cfg.model + '.build_net')(cfg.image_size[0],
                                                    cfg.dataset.num_classes)
        with torch.no_grad():
            self.priors = self.priors.forward()
        self.detector = Detect2(cfg.post_process)
        # Utilize GPUs for computation
        self.use_gpu = torch.cuda.is_available()
        if cfg.train.train_scope == '':
            trainable_param = self.model.parameters()
        else:
            trainable_param = self.trainable_param(cfg.train.train_scope)
        self.output_dir = os.path.join(cfg.output_dir, cfg.name, cfg.date)
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)
        self.log_dir = os.path.join(self.output_dir, 'logs')
        if not os.path.exists(self.log_dir):
            os.makedirs(self.log_dir)
        self.checkpoint = cfg.train.checkpoint

        previous = self.find_previous()
        previous = False
        if previous:
            self.start_epoch = previous[0][-1]
            self.resume_checkpoint(previous[1][-1])
        else:
            self.start_epoch = self.initialize()
        if self.use_gpu:
            print('Utilize GPUs for computation')
            print('Number of GPU available', torch.cuda.device_count())
            self.model.cuda()
            self.priors.cuda()
            cudnn.benchmark = True
            if cfg.ngpu > 1:
                self.model = torch.nn.DataParallel(self.model,
                                                   device_ids=list(
                                                       range(cfg.ngpu)))
        # Print the model architecture and parameters
        #print('Model architectures:\n{}\n'.format(self.model))

        #print('Parameters and size:')
        #for name, param in self.model.named_parameters():
        #    print('{}: {}'.format(name, list(param.size())))
        # print trainable scope
        print('Trainable scope: {}'.format(cfg.train.train_scope))
        self.optimizer = self.configure_optimizer(trainable_param,
                                                  cfg.train.optimizer)
        self.exp_lr_scheduler = self.configure_lr_scheduler(
            self.optimizer, cfg.train.lr_scheduler)
        self.max_epochs = cfg.train.lr_scheduler.max_epochs
        # metric
        if cfg.network.multi_box_loss_type == 'origin':
            self.criterion = MultiBoxLoss2(cfg.matcher, self.priors,
                                           self.use_gpu)
        else:
            print('ERROR: ' + cfg.multi_box_loss_type + ' is not supported')
            sys.exit()
        # Set the logger
        self.writer = SummaryWriter(log_dir=self.log_dir)
        self.checkpoint_prefix = cfg.name + '_' + cfg.dataset.dataset