예제 #1
0
    def get_model(cls):
        """Get the model object for this instance, loading it if it's not already loaded."""
        trained_model = '/opt/ml/model/m2det512_vgg.pth'
        #trained_model = '../../m2det512_vgg.pth'

        anchor_config = anchors(cfg)
        print_info('The Anchor info: \n{}'.format(anchor_config))
        priorbox = PriorBox(anchor_config)
        net = build_net('test',
                        size=cfg.model.input_size,
                        config=cfg.model.m2det_config)
        init_net(net, cfg, trained_model)
        print_info('===> Finished constructing and loading model',
                   ['yellow', 'bold'])
        net.eval()
        with torch.no_grad():
            priors = priorbox.forward()
            if cfg.test_cfg.cuda:
                net = net.cuda()
                priors = priors.cuda()
                cudnn.benchmark = True
            else:
                net = net.cpu()
        _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means,
                                    (2, 0, 1))
        detector = Detect(cfg.model.m2det_config.num_classes,
                          cfg.loss.bkg_label, anchor_config)

        return net, priors, _preprocess, detector
예제 #2
0
def get_model(device, cfg):
    net = build_net(
        'train',
        size=cfg.model.input_size,  # Only 320, 512, 704 and 800 are supported
        config=cfg.model.m2det_config)
    init_net(net, cfg)
    net.to(device)
    return net
예제 #3
0
        tot_detect_time += detect_time if i > 0 else 0
        tot_nms_time += nms_time if i > 0 else 0

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
    print_info('===> Evaluating detections',['yellow','bold'])
    testset.evaluate_detections(all_boxes, save_folder)
    print_info('Detect time per image: {:.3f}s'.format(tot_detect_time / (num_images-1)))
    print_info('Nms time per image: {:.3f}s'.format(tot_nms_time / (num_images - 1)))
    print_info('Total time per image: {:.3f}s'.format((tot_detect_time + tot_nms_time) / (num_images - 1)))
    print_info('FPS: {:.3f} fps'.format((num_images - 1) / (tot_detect_time + tot_nms_time)))

if __name__ == '__main__':
    net = build_net('test',
                    size = cfg.model.input_size,
                    config = cfg.model.m2det_config)
    init_net(net, cfg, args.trained_model)
    print_info('===> Finished constructing and loading model',['yellow','bold'])
    net.eval()
    _set = 'eval_sets' if not args.test else 'test_sets'
    testset = get_dataloader(cfg, args.dataset, _set)
    if cfg.test_cfg.cuda:
        net = net.cuda()
        cudnn.benchmark = True
    else:
        net = net.cpu()
    detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config)
    save_folder = os.path.join(cfg.test_cfg.save_folder, args.dataset)
    _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1))
    test_net(save_folder,
                    type=bool,
                    default=False,
                    help='Use tensorborad to show the Loss Graph')
args = parser.parse_args()

print_info(
    '----------------------------------------------------------------------\n'
    '|                       M2Det Training Program                       |\n'
    '----------------------------------------------------------------------',
    ['yellow', 'bold'])

logger = set_logger(args.tensorboard)
global cfg
cfg = Config.fromfile(args.config)
net = build_net(
    'train',
    size=cfg.model.input_size,  # Only 320, 512, 704 and 800 are supported
    config=cfg.model.m2det_config)
init_net(net, cfg, args.resume_net
         )  # init the network with pretrained weights or resumed weights

if args.ngpu > 1:
    net = torch.nn.DataParallel(net)
if cfg.train_cfg.cuda:
    net.cuda()
    cudnn.benchmark = True

optimizer = set_optimizer(net, cfg)
criterion = set_criterion(cfg)
priorbox = PriorBox(anchors(cfg))

with torch.no_grad():
예제 #5
0
def demo(v_f):
    cfg = Config.fromfile(config_f)
    anchor_config = anchors(cfg)
    priorbox = PriorBox(anchor_config)
    net = build_net('test',
                    size=cfg.model.input_size,
                    config=cfg.model.m2det_config)
    init_net(net, cfg, checkpoint_path)
    net.eval().to(device)
    with torch.no_grad():
        priors = priorbox.forward().to(device)
    _preprocess = BaseTransform(
        cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1))
    detector = Detect(cfg.model.m2det_config.num_classes,
                      cfg.loss.bkg_label, anchor_config)
    logging.info('detector initiated.')

    cap = cv2.VideoCapture(v_f)
    logging.info('detect on: {}'.format(v_f))
    logging.info('video width: {}, height: {}'.format(int(cap.get(3)), int(cap.get(4))))
    out_video = cv2.VideoWriter("result.mp4", cv2.VideoWriter_fourcc(*'MJPG'), 24, (int(cap.get(3)), int(cap.get(4))))

    while True:
        ret, image = cap.read()
        if not ret:
            out_video.release()
            cv2.destroyAllWindows()
            cap.release()
            break
        w, h = image.shape[1], image.shape[0]
        img = _preprocess(image).unsqueeze(0).to(device)
        scale = torch.Tensor([w, h, w, h])
        out = net(img)
        boxes, scores = detector.forward(out, priors)
        boxes = (boxes[0]*scale).cpu().numpy()
        scores = scores[0].cpu().numpy()
        allboxes = []
        for j in range(1, cfg.model.m2det_config.num_classes):
            inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0]
            if len(inds) == 0:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(
                np.float32, copy=False)
            soft_nms = cfg.test_cfg.soft_nms
            # min_thresh, device_id=0 if cfg.test_cfg.cuda else None)
            keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms)
            keep = keep[:cfg.test_cfg.keep_per_class]
            c_dets = c_dets[keep, :]
            allboxes.extend([_.tolist()+[j] for _ in c_dets])
        if len(allboxes) > 0:
            allboxes = np.array(allboxes)
            # [boxes, scores, label_id] -> [id, score, boxes] 0, 1, 2, 3, 4, 5
            allboxes = allboxes[:, [5, 4, 0, 1, 2, 3]]
            logging.info('allboxes shape: {}'.format(allboxes.shape))
            res = visualize_det_cv2(image, allboxes, classes=classes, thresh=0.2)
            # res = visualize_det_cv2_fancy(image, allboxes, classes=classes, thresh=0.2, r=4, d=6)
            cv2.imshow('rr', res)
            out_video.write(res)
            cv2.waitKey(1)
def train(cfg):
    cfg = Config.fromfile(cfg)
    net = build_net(
        'train',
        size=cfg.model.input_size,  # Only 320, 512, 704 and 800 are supported
        config=cfg.model.m2det_config)
    init_net(net, cfg, False)
    net.to(device)
    if os.path.exists(checkpoint_path.format(start_epoch)):
        checkpoints = torch.load(checkpoint_path.format(start_epoch))
        net.load_state_dict(checkpoints)
        logging.info('checkpoint loaded.')

    optimizer = optim.SGD(net.parameters(),
                          lr=cfg.train_cfg.lr[0],
                          momentum=cfg.optimizer.momentum,
                          weight_decay=cfg.optimizer.weight_decay)
    criterion = MultiBoxLoss(cfg.model.m2det_config.num_classes,
                             overlap_thresh=cfg.loss.overlap_thresh,
                             prior_for_matching=cfg.loss.prior_for_matching,
                             bkg_label=cfg.loss.bkg_label,
                             neg_mining=cfg.loss.neg_mining,
                             neg_pos=cfg.loss.neg_pos,
                             neg_overlap=cfg.loss.neg_overlap,
                             encode_target=cfg.loss.encode_target)
    priorbox = PriorBox(anchors(cfg))
    with torch.no_grad():
        priors = priorbox.forward().to(device)
    net.train()

    anchor_config = anchors(cfg)
    detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label,
                      anchor_config)
    logging.info('detector initiated.')

    dataset = get_dataloader(cfg, 'COCO', 'train_sets')
    train_ds = DataLoader(dataset,
                          cfg.train_cfg.per_batch_size,
                          shuffle=True,
                          num_workers=0,
                          collate_fn=detection_collate)
    logging.info('dataset loaded, start to train...')

    for epoch in range(start_epoch, cfg.model.epochs):
        for i, data in enumerate(train_ds):
            try:
                lr = adjust_learning_rate_v2(optimizer, epoch, cfg)
                images, targets = data
                images = images.to(device)
                targets = [anno.to(device) for anno in targets]
                out = net(images)

                optimizer.zero_grad()
                loss_l, loss_c = criterion(out, priors, targets)
                loss = loss_l + loss_c
                loss.backward()
                optimizer.step()

                if i % 30 == 0:
                    logging.info(
                        'Epoch: {}, iter: {}, loc_loss: {}, conf_loss: {}, loss: {}, lr: {}'
                        .format(epoch, i, loss_l.item(), loss_c.item(),
                                loss.item(), lr))

                if i % 2000 == 0:
                    # two_imgs = images[0:2, :]
                    # out = net(two_imgs)
                    # snap_middle_result(two_imgs[0], out[0], priors, detector, cfg, epoch)
                    torch.save(net.state_dict(), checkpoint_path.format(epoch))
                    logging.info('model saved.')
            except KeyboardInterrupt:
                torch.save(net.state_dict(), checkpoint_path.format(epoch))
                logging.info('model saved.')
                exit(0)
    torch.save(net.state_dict(), checkpoint_path.format(epoch))
예제 #7
0
def detect_parking_spaces(dir,
                          threshold=0.2,
                          save=False,
                          show=False,
                          cam=-1,
                          gpu=False,
                          config='training/m2det/configs/m2det512_vgg.py',
                          weights='training/m2det/weights/m2det512_vgg.pth'):
    print('Detect Parking Spaces Programe')
    cfg = Config.fromfile(config)
    anchor_config = anchors(cfg)

    priorbox = PriorBox(anchor_config)
    net = build_net('test',
                    size=cfg.model.input_size,
                    config=cfg.model.m2det_config)
    init_net(net, cfg, weights)
    net.eval()
    if not gpu:
        cfg.test_cfg.cuda = False

    with torch.no_grad():
        priors = priorbox.forward()
        if cfg.test_cfg.cuda:
            net = net.cuda()
            priors = priors.cuda()
            cudnn.benchmark = True
        else:
            net = net.cpu()
    print_info('===> Finished constructing and loading model')

    _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means,
                                (2, 0, 1))
    detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label,
                      anchor_config)

    base = int(np.ceil(pow(cfg.model.m2det_config.num_classes, 1. / 3)))
    colors = [
        _to_color(x, base) for x in range(cfg.model.m2det_config.num_classes)
    ]
    cats = [
        _.strip().split(',')[-1]
        for _ in open('training/m2det/data/coco_labels.txt', 'r').readlines()
    ]
    labels = tuple(['__background__'] + cats)

    im_path = dir + '/images'
    cam = cam
    if cam >= 0:
        capture = cv2.VideoCapture(cam)
    im_fnames = sorted((fname for fname in os.listdir(im_path)
                        if os.path.splitext(fname)[-1] == '.jpg'))
    im_fnames = (os.path.join(im_path, fname) for fname in im_fnames)
    im_iter = iter(im_fnames)

    save_dir = dir + '/detection_images'
    os.makedirs(save_dir, exist_ok=True)
    locs_list = {}
    while True:
        if cam < 0:
            try:
                fname = next(im_iter)
            except StopIteration:
                break
            image = cv2.imread(fname, cv2.IMREAD_COLOR)
        else:
            ret, image = capture.read()
            if not ret:
                cv2.destroyAllWindows()
                capture.release()
                break

        loop_start = time.time()
        w, h = image.shape[1], image.shape[0]
        img = _preprocess(image).unsqueeze(0)
        if cfg.test_cfg.cuda:
            img = img.cuda()
        scale = torch.Tensor([w, h, w, h])
        out = net(img)
        if not gpu:
            priors = priors.cpu()

        boxes, scores = detector.forward(out, priors)
        boxes = (boxes[0] * scale).cpu().numpy()
        scores = scores[0].cpu().numpy()
        allboxes = []

        for j in range(1, cfg.model.m2det_config.num_classes):
            inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0]
            if len(inds) == 0:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            soft_nms = cfg.test_cfg.soft_nms
            keep = nms(
                c_dets, cfg.test_cfg.iou, force_cpu=soft_nms
            )  #min_thresh, device_id=0 if cfg.test_cfg.cuda else None)
            keep = keep[:cfg.test_cfg.keep_per_class]
            c_dets = c_dets[keep, :]
            allboxes.extend([_.tolist() + [j] for _ in c_dets])

        loop_time = time.time() - loop_start
        allboxes = np.array(allboxes)
        boxes = allboxes[:, :4]
        scores = allboxes[:, 4]
        cls_inds = allboxes[:, 5]
        # print('\n'.join(['pos:{}, ids:{}, score:{:.3f}'.format('(%.1f,%.1f,%.1f,%.1f)' % (o[0],o[1],o[2],o[3]) \
        #         ,labels[int(oo)],ooo) for o,oo,ooo in zip(boxes,cls_inds,scores)]))
        fps = 1.0 / float(loop_time) if cam >= 0 else -1
        im2show, loc = draw_detection(image,
                                      boxes,
                                      scores,
                                      cls_inds,
                                      fps,
                                      threshold,
                                      colors=colors,
                                      labels=labels)
        locs_list[fname] = loc

        if im2show.shape[0] > 1100:
            im2show = cv2.resize(im2show, (int(
                1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000))
        if show:
            cv2.imshow('test', im2show)
            if cam < 0:
                cv2.waitKey(1000)
            else:
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    cv2.destroyAllWindows()
                    capture.release()
                    break
        if save:
            name = fname.split('.')[0]
            name = name.split('/')[-1]
            cv2.imwrite(f"{save_dir}/{name}.jpg", im2show)

    save_name = dir + '/labels/split.txt'
    f = open(save_name, 'wb')
    pickle.dump(locs_list, f)
    f.close()