Ejemplo n.º 1
0
 def __init__(self, data_dir, img_size, confthre, nmsthre, vis=False):
     """
     Args:
         data_dir (str): dataset root directory
         img_size (int): image size after preprocess. images are resized \
             to squares whose shape is (img_size, img_size).
         confthre (float):
             confidence threshold ranging from 0 to 1, \
             which is defined in the config file.
         nmsthre (float):
             IoU threshold of non-max supression ranging from 0 to 1.
     """
     test_sets = [
         ('ENHANCE1', 'test'),
     ]
     self.dataset = SWIMDetection(
         root=data_dir,
         image_sets=test_sets,
         input_dim=img_size,
         preproc=ValTransform(rgb_means=(0.485, 0.456, 0.406),
                              std=(0.229, 0.224, 0.225)),
     )
     self.num_images = len(self.dataset)
     self.dataloader = torch.utils.data.DataLoader(self.dataset,
                                                   batch_size=1,
                                                   shuffle=False,
                                                   num_workers=0)
     self.img_size = img_size
     self.confthre = confthre
     self.nmsthre = nmsthre
     self.vis = vis
Ejemplo n.º 2
0
def demo():
    model = build_model()
    if os.path.isdir(data_f):
        all_imgs = glob.glob(os.path.join(data_f, '*.jpg'))
        for img in all_imgs:
            print('~~~~~ predict on img: {}'.format(img))
            im = cv2.imread(img)
            ori_im = im.copy()
            height, width, _ = im.shape
            transform = ValTransform(rgb_means=(
                0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
            im_input, _ = transform(im, None, target_size)
            im_input = im_input.to(device).unsqueeze(0)

            with torch.no_grad():
                out = model(im_input)
                outputs = postprocess(out, num_classes, 0.01, 0.65)
                outputs = outputs[0].cpu().data
                bboxes = outputs[:, 0:4]
                bboxes[:, 0::2] *= width / target_size[0]
                bboxes[:, 1::2] *= height / target_size[1]
                cls = outputs[:, 6]
                scores = outputs[:, 4] * outputs[:, 5]
                if isinstance(bboxes, torch.Tensor):
                    bboxes = bboxes.cpu().numpy()
                res = visualize_det_cv2_part(
                    im, scores, cls, bboxes, coco_label_map_list[1:], 0.1)
                cv2.imshow('rr', res)
                cv2.waitKey(0)
Ejemplo n.º 3
0
def demo():
    args = parse_args()
    print("Setting Arguments.. : ", args)
    cuda = torch.cuda.is_available() and args.use_cuda
    # Parse config settings
    with open(args.cfg, 'r') as f:
        cfg = yaml.safe_load(f)

    print("successfully loaded config file: ", cfg)

    backbone = cfg['MODEL']['BACKBONE']
    test_size = (args.test_size, args.test_size)

    if args.dataset == 'COCO':
        class_names = COCO_CLASSES
        num_class = 80
    elif args.dataset == 'VOC':
        class_names = VOC_CLASSES
        num_class = 20
    else:
        raise Exception("Only support COCO or VOC model now!")

    onnx_model = onnx.load('weights/yolov3_asff.onnx')
    onnx.checker.check_model(onnx_model)
    print('onnx model checked.')

    #load img
    if os.path.isdir(args.img):
        all_imgs = glob.glob(os.path.join(args.img, '*.jpg'))
        for img in all_imgs:
            print('~~~~~ predict on img: {}'.format(img))
            transform = ValTransform(rgb_means=(0.485, 0.456, 0.406),
                                     std=(0.229, 0.224, 0.225))
            im = cv2.imread(img)
            height, width, _ = im.shape
            ori_im = im.copy()
            im_input, _ = transform(im, None, test_size)
            tic = time.time()
            # outputs= model(im_input)
            print('cost: {}'.format(time.time() - tic))
            outputs = postprocess(outputs, num_class, 0.01, 0.65)

            # outputs = outputs[0].cpu().data
            # bboxes = outputs[:, 0:4]
            # bboxes[:, 0::2] *= width / test_size[0]
            # bboxes[:, 1::2] *= height / test_size[1]
            # bboxes[:, 2] = bboxes[:,2] - bboxes[:,0]
            # bboxes[:, 3] = bboxes[:,3] - bboxes[:,1]
            # cls = outputs[:, 6]
            # scores = outputs[:, 4]* outputs[:,5]

            # pred_im=vis(ori_im, bboxes.numpy(), scores.numpy(), cls.numpy(), conf=0.6, class_names=class_names)
            # cv2.imshow('Detection', pred_im)
            # cv2.waitKey(0)
    elif 'mp4' in args.img:
        cam = cv2.VideoCapture(args.img)
Ejemplo n.º 4
0
    def __init__(self,
                 data_dir,
                 img_size,
                 confthre,
                 nmsthre,
                 testset=False,
                 voc=False,
                 vis=False,
                 classes=COCO_CLASSES):
        """
        Args:
            data_dir (str): dataset root directory
            img_size (int): image size after preprocess. images are resized \
                to squares whose shape is (img_size, img_size).
            confthre (float):
                confidence threshold ranging from 0 to 1, \
                which is defined in the config file.
            nmsthre (float):
                IoU threshold of non-max supression ranging from 0 to 1.
        """
        json_f = 'instances_val2017.json'
        name = 'val2017'
        if testset:
            json_f = 'image_info_test-dev2017.json'
            name = 'test2017'
        if voc:
            json_f = 'pascal_test2007.json'

        self.testset = testset
        self.dataset = COCODataset(data_dir=data_dir,
                                   img_size=img_size,
                                   json_file=json_f,
                                   preproc=ValTransform(
                                       rgb_means=(0.485, 0.456, 0.406),
                                       std=(0.229, 0.224, 0.225)),
                                   name=name,
                                   voc=voc,
                                   classes=classes)
        self.num_classes = len(classes)
        self.num_images = len(self.dataset)
        self.dataloader = torch.utils.data.DataLoader(self.dataset,
                                                      batch_size=1,
                                                      shuffle=False,
                                                      num_workers=0)
        self.img_size = img_size
        self.confthre = confthre
        self.nmsthre = nmsthre
        self.voc = voc
        self.vis = vis
Ejemplo n.º 5
0
    def detect(self, img_pth, img):
        #load img
        transform = ValTransform(rgb_means=(0.485, 0.456, 0.406),
                                 std=(0.229, 0.224, 0.225))
        #im = cv2.imread(args.img)
        im = cv2.imread(os.path.join(img_pth, img))
        height, width, _ = im.shape
        ori_im = im.copy()
        im_input, _ = transform(im, None, self.test_size)
        if self.cfg['MODEL']['USE_CUDA']:
            im_input = im_input.to(self.device)

        im_input = Variable(im_input.type(self.dtype).unsqueeze(0))
        outputs = self.model(im_input)  #xc,yc, w, h
        outputs = postprocess(outputs, self.num_class, 0.1, 0.65)
        outputs = outputs[0].cpu().data

        bboxes = outputs[:, 0:4]  #x1, y1, x2, y2
        bboxes[:, 0::2] *= width / self.test_size[0]  #rescale
        bboxes[:, 1::2] *= height / self.test_size[1]  #rescale
        #bboxes[:, 2] = bboxes[:,2] - bboxes[:,0] # w
        #bboxes[:, 3] = bboxes[:,3] - bboxes[:,1] # h
        cls = outputs[:, 6]
        scores = outputs[:, 4] * outputs[:, 5]

        pred_im = vis(ori_im,
                      bboxes.numpy(),
                      scores.numpy(),
                      cls.numpy(),
                      conf=0.6,
                      class_names=self.class_names)
        cv2.imshow('Detection', pred_im)
        cv2.imwrite(os.path.join(self.cfg['TEST']['SAVED'], img), pred_im)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        return bboxes.numpy(), cls.numpy(), scores.numpy()
Ejemplo n.º 6
0
def demo():
    """
    YOLOv3 demo. See README for details.
    """
    args = parse_args()
    print("Setting Arguments.. : ", args)

    cuda = torch.cuda.is_available() and args.use_cuda

    # Parse config settings
    with open(args.cfg, 'r') as f:
        cfg = yaml.safe_load(f)

    print("successfully loaded config file: ", cfg)

    backbone=cfg['MODEL']['BACKBONE']
    test_size = (args.test_size,args.test_size)

    if args.dataset == 'COCO':
        class_names = COCO_CLASSES
        num_class=80
    elif args.dataset == 'VOC':
        class_names = VOC_CLASSES
        num_class=20
    else:
        raise Exception("Only support COCO or VOC model now!")

    # Initiate model
    if args.asff:
        if backbone == 'mobile':
            from models.yolov3_mobilev2 import YOLOv3
            print("For mobilenet, we currently don't support dropblock, rfb and FeatureAdaption")
        else:
            from models.yolov3_asff import YOLOv3
        print('Training YOLOv3 with ASFF!')
        model = YOLOv3(num_classes = num_class, rfb=args.rfb, asff=args.asff)
    else:
        if backbone == 'mobile':
            from models.yolov3_mobilev2 import YOLOv3
        else:
            from models.yolov3_baseline import YOLOv3
        print('Training YOLOv3 strong baseline!')
        model = YOLOv3(num_classes = num_class, rfb=args.rfb)


    if args.checkpoint:
        print("loading pytorch ckpt...", args.checkpoint)
        cpu_device = torch.device("cpu")
        ckpt = torch.load(args.checkpoint, map_location=cpu_device)
        model.load_state_dict(ckpt,strict=False)
        #model.load_state_dict(ckpt)
    if cuda:
        print("using cuda")
        torch.backends.cudnn.benchmark = True
        device = torch.device("cuda")
        model = model.to(device)

    if args.half:
        model = model.half()

    model = model.eval()
    dtype = torch.float16 if args.half else torch.float32

    #load img
    transform = ValTransform(rgb_means=(0.485, 0.456, 0.406), std=(0.229,0.224,0.225))
    im = cv2.imread(args.img)
    height, width, _ = im.shape
    ori_im = im.copy()
    im_input, _ = transform(im, None, test_size)
    if cuda:
        im_input = im_input.to(device)

    im_input = Variable(im_input.type(dtype).unsqueeze(0))
    outputs= model(im_input)
    outputs = postprocess(outputs, num_class, 0.01, 0.65)

    outputs = outputs[0].cpu().data
    bboxes = outputs[:, 0:4]
    bboxes[:, 0::2] *= width / test_size[0]
    bboxes[:, 1::2] *= height / test_size[1]
    bboxes[:, 2] = bboxes[:,2] - bboxes[:,0]
    bboxes[:, 3] = bboxes[:,3] - bboxes[:,1]
    cls = outputs[:, 6]
    scores = outputs[:, 4]* outputs[:,5]

    pred_im=vis(ori_im, bboxes.numpy(), scores.numpy(), cls.numpy(), conf=0.6, class_names=class_names)
    cv2.imshow('Detection', pred_im)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    sys.exit(0)
Ejemplo n.º 7
0
def demo(args, video_pth, video_name):
    """
    YOLOv3 demo. See README for details.
    """
    # Parse config settings
    with open(args.cfg, 'r') as f:
        cfg = yaml.safe_load(f)

    #print("successfully loaded config file: ", cfg)

    backbone = cfg['MODEL']['BACKBONE']
    test_size = (args.test_size, args.test_size)

    if args.dataset == 'COCO':
        class_names = COCO_CLASSES
        num_class = 80
    if args.dataset == 'VOC':
        class_names = VOC_CLASSES
        num_class = 20
    if args.dataset == 'SWIM':
        class_names = SWIM_CLASSES
        num_class = 1
    else:
        raise Exception("Only support COCO, VOC ang SWIM model now!")

    # Initiate model
    if args.asff:
        if backbone == 'mobile':
            from models.yolov3_mobilev2 import YOLOv3
            #print("For mobilenet, we currently don't support dropblock, rfb and FeatureAdaption")
        else:
            from models.yolov3_asff import YOLOv3
        #print('Training YOLOv3 with ASFF!')
        model = YOLOv3(num_classes=num_class, rfb=args.rfb, asff=args.asff)
    else:
        if backbone == 'mobile':
            from models.yolov3_mobilev2 import YOLOv3
        else:
            from models.yolov3_baseline import YOLOv3
        #print('Training YOLOv3 strong baseline!')
        model = YOLOv3(num_classes=num_class, rfb=args.rfb)

    if args.checkpoint:
        #print("loading pytorch ckpt...", args.checkpoint)
        cpu_device = torch.device("cpu")
        ckpt = torch.load(args.checkpoint, map_location=cpu_device)
        #model.load_state_dict(ckpt,strict=False)
        model.load_state_dict(ckpt)
    if cuda:

        torch.backends.cudnn.benchmark = True
        device = torch.device("cuda")
        model = model.to(device)

    if args.half:
        model = model.half()

    model = model.eval()
    dtype = torch.float16 if args.half else torch.float32

    #load video
    transform = ValTransform(rgb_means=(0.485, 0.456, 0.406),
                             std=(0.229, 0.224, 0.225))
    cap = cv2.VideoCapture(video_pth)
    fps = cap.get(cv2.CAP_PROP_FPS)
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  #4
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))  #3
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    vid_writer = cv2.VideoWriter(
        os.path.join(args.save_path, video_name + '.avi'), fourcc, fps,
        (width, height))
    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            ori_frame = frame.copy()
            frame_input, _ = transform(frame, None, test_size)
            if cuda:
                frame_input = frame_input.to(device)

            frame_input = Variable(frame_input.type(dtype).unsqueeze(0))
            outputs = model(frame_input)
            outputs = postprocess(outputs, num_class, 0.01, 0.65)
            if type(outputs) != type([None]):
                outputs = outputs[0].cpu().data
                bboxes = outputs[:, 0:4]
                bboxes[:, 0::2] *= width / test_size[0]
                bboxes[:, 1::2] *= height / test_size[1]
                #bboxes[:, 2] = bboxes[:,2] - bboxes[:,0]
                #bboxes[:, 3] = bboxes[:,3] - bboxes[:,1]
                cls = outputs[:, 6]
                scores = outputs[:, 4] * outputs[:, 5]

                pred_frame = vis(ori_frame,
                                 bboxes.numpy(),
                                 scores.numpy(),
                                 cls.numpy(),
                                 conf=0.6,
                                 class_names=class_names)
            else:
                pred_frame = ori_frame
            cv2.namedWindow("Detection", 0)
            cv2.resizeWindow("enhanced", 720, 640)
            cv2.imshow('Detection', pred_frame)
            vid_writer.write(pred_frame)
        else:
            break
        key = cv2.waitKey(1)
        if key == ord("q"):
            break
    cap.release()
    cv2.destroyAllWindows()