Example #1
0
    def backbone(self, images, **kwargs):
        r"""Returns backbone features and transformed input image list.

        Args:
            images(tensor | List[tensor | str]): a batch tensor of images, a list of image tensors, or image filenames
        
        Returns:
            images(ImageList): a transformed image list with scaled/padded image batch and shape meta
            features(tensor): backbone features in a batch
        """

        mode = self.training
        self.eval()
        model = self.module
        dev = next(model.parameters()).device

        if th.is_tensor(images):
            if images.dim() == 3:
                images = images.unsqueeze(0)
        elif not isinstance(images, list):
            images = [images]

        from ml import cv
        images = [
            image.to(dev) if th.is_tensor(image) else cv.toTorch(
                cv.imread(image), device=dev) for image in images
        ]

        original_image_sizes = [img.shape[-2:] for img in images]
        with th.no_grad():
            images, _ = model.transform(images, targets=None)
            self.train(mode)
            return model.backbone(images.tensors), images, original_image_sizes
Example #2
0
    def __getitem__(self, index):
        index = self.shuffled[index]
        with self.cv:
            if index not in self.cache:
                self.cache[index] = False
            else:
                if self.cache[index] is False:
                    while (self.cache[index] is False):
                        print(
                            f'[P{os.getpid()}] waiting for samples[{index}] to be loaded'
                        )
                        self.cv.wait()
                    print(
                        f'[P{os.getpid()}] done waiting for samples[{index}]')

        cls, subdir, fn = self.samples[index]
        if self.cache[index] is False:
            img = cv.imread(self.path / cls / subdir / fn)
            if self.input_trans:
                img = self.input_trans(img)
            self.cache[index] = img
            with self.cv:
                self.cv.notify_all()

        target = self.cls2idx[cls]
        if self.target_trans:
            target = self.target_trans(target)

        return self.cache[index], target
Example #3
0
    def preprocess(self, frames=None, interpolation=cv.INTER_LINEAR):
        if frames is None:
            # Fake frames to warmup
            frames = [
                np.ones((480, 640, 3)) * 114
                for _ in range(self.config.TEST.BATCH_IMAGES)
            ]
        else:
            if isinstance(frames, (str, np.ndarray)):
                frames = [frames]
            if isinstance(frames[0], str):
                frames = [cv.imread(frame) for frame in frames]

        # resize to a predefined scale (800, 1200) for SoftNMS with aspect ratio preserved
        # transform from BGR HxWxC to RGB CxHxW with normalization
        data = []
        config = self.config
        from dcn.utils.image import resize, transform
        for im in frames:
            # config.SCALES = [(800, 1200)]
            # data.shape = (B, C, H, W)
            # im_info = [[800, 1067, 1.666]]
            target_size = config.SCALES[0][0]
            max_size = config.SCALES[0][1]
            im, im_scale = resize(im,
                                  target_size,
                                  max_size,
                                  stride=config.network.IMAGE_STRIDE,
                                  interpolation=interpolation)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)
            data.append({'data': im_tensor, 'im_info': im_info})
        return data
Example #4
0
def test_rfcn(tile_img):
    from ml import cv
    path = Path(tile_img)
    img = cv.imread(path)
    img2 = cv.resize(img, scale=0.5)
    img = cv.imread(path)
    model_dir = None  # "/tmp/ml/checkpoints"
    detector = rfcn(pooling=2, model_dir=model_dir, force_reload=True)
    assert detector.with_rpn
    rois, dets, pooled = detector.detect(img, return_rpn=True)
    print('dets:', [tuple(det.shape) for det in dets], dets)
    print('rois:', [tuple(roi.shape) for roi in rois])
    print('pooled:', [tuple(feats.shape) for feats in pooled])
    cv.render(img,
              dets[0],
              score_thr=0.01,
              classes=COCO80_CLASSES,
              path=f"export/{path.name[:-4]}-rfcn.jpg")
Example #5
0
def inference(detector,
              model,
              img,
              vis=False,
              bbox_thr=0.3,
              kpt_thr=0.3,
              dataset='TopDownCocoDataset',
              format='xyxy',
              return_heatmap=False,
              **kwargs):
    import torch as th
    from ml import cv
    from ml.vision.ops import dets_select
    # from xtcocotools.coco import COCO
    from mmpose.apis import (inference_top_down_pose_model, vis_pose_result)
    from mmpose.datasets import DatasetInfo

    model.to('cuda:0')
    model.eval()
    # result = model(return_loss=return_loss, **data)

    fp16 = kwargs.get('fp16', False)
    with th.cuda.amp.autocast(enabled=fp16):
        dets = detector.detect(img, size=640, conf_thres=0.4, iou_thres=0.5)
    persons = dets_select(dets, [0])
    ppls = [
        dets_f[persons_f].cpu().numpy()
        for dets_f, persons_f in zip(dets, persons)
    ]
    """
    Args:
        person_results(List[Tensor(N, 5)]): bboxes per class in order with scores
    """
    # print(ppls)
    person_results = [dict(bbox=ppl[:-1]) for ppl in ppls[0]]
    # print(person_results)
    pose_results, returned_outputs = inference_top_down_pose_model(
        model,
        img,
        person_results,
        bbox_thr=bbox_thr,
        format=format,
        dataset=dataset,
        # dataset_info=DatasetInfo({'dataset_name': dataset, 'flip_pairs': []}),
        return_heatmap=return_heatmap,
        outputs=None)
    if vis:
        img = cv.imread(img)
        vis_img = vis_pose_result(model,
                                  img,
                                  pose_results,
                                  dataset=dataset,
                                  kpt_score_thr=kpt_thr,
                                  show=False)
        return pose_results, vis_img
    return pose_results
Example #6
0
def test_render_yolo(images, labels, suffix, classes=None, output=None):
    if not isinstance(images, list):
        images = [images]
    if not isinstance(labels, list):
        labels = [labels]
    if output is None:
        output = '.'

    for img, label in zip(images, labels):
        with open(label) as f:
            cxyxy = th.Tensor([
                tuple(map(float, line.split()))
                for line in f.read().splitlines()
            ])
            xyxysc = th.cat(
                [cxyxy[:, 1:],
                 torch.ones(len(cxyxy), 1), cxyxy[:, 0:1]],
                dim=1)
            path = Path(output, f"{Path(img).stem}-{suffix}.jpg")
            img = cv.imread(img)
            h, w = img.shape[:2]
            xyxysc[:, [0, 2]] *= w
            xyxysc[:, [1, 3]] *= h
            cv.render(img, xyxysc, classes=classes, path=path)
Example #7
0
def test_yolo_deep_sort(video):
    import numpy as np
    from ml.vision.models.tracking.dsort import DeepSort
    from ml import av
    model, size = yolo4, 608
    model, size = yolo5x, 736
    detector = model(pretrained=True, fuse=True, pooling=True)
    pooler = MultiScaleFusionRoIAlign(3)
    tracker = DeepSort(
        max_feat_dist=0.2,
        nn_budget=100,
        max_iou_dist=0.7,  # 0.7
        max_age=15,  # 30 (FPS)
        n_init=3)  # 3

    video = Path(video)
    if video.suffix in ['.mp4', '.avi']:
        s = av.open(video)
        v = s.decode(video=0)
        print(f"Tracking video: {video}")
    else:
        s = None
        if video.is_file():
            files = [video]
        elif video.is_dir():
            files = sorted([f for f in video.iterdir() if f.is_file()])
        v = [cv.imread(f) for f in files]
        print(f"Tracking {len(files)} frames in {video}")
    export = Path(f'export/{video.stem}-{model.__name__}')
    export.mkdir(parents=True, exist_ok=True)
    assert export.exists()

    print(f"Saving to {export / 'tracking.mp4'}")
    media = av.open(f"{export}/tracking.mp4", 'w')
    stream = media.add_stream('h264', 15)
    stream.bit_rate = 2000000
    for i, frame in enumerate(v):
        if not isinstance(frame, np.ndarray):
            frame = frame.to_rgb().to_ndarray()[:, :, ::-1]

        if i == 0:
            stream.height = frame.shape[0]
            stream.width = frame.shape[1]
        dets, features = detector.detect([frame], size=size)

        # Track person only
        person = dets[0][:, -1] == 0
        persons = dets[0][person]
        features[0] = features[0][person]

        assert len(dets) == 1
        assert len(persons) == features[0].shape[0]
        assert dets[0].shape[1] == 4 + 1 + 1
        # assert features[0].shape[1] == 256+512+1024
        assert features[0].shape[1] == 320 + 640 + 1280

        if len(dets[0]) > 0:
            D = 1
            for s in features[0].shape[1:]:
                D *= s
            tracker.update(persons, features[0].view(len(features[0]), D))
            if i < 60:
                logging.info(
                    f"[{i}] dets[0]: {dets[0].shape}, feats: {[tuple(feats.shape) for feats in features]}"
                )
                cv.render(frame,
                          dets[0],
                          path=export / 'dets' / f"frame{i:03d}.jpg")
            else:
                break

        snapshot = tracker.snapshot()
        logging.info(
            f"[{i}] snapshot[0]: {snapshot and list(zip(*snapshot))[0] or len(snapshot)}"
        )
        frame = cv.render(
            frame,
            snapshot,
            path=
            f"export/{video.stem}-{model.__name__}/tracking/frame{i:03d}.jpg")
        if media is not None:
            shape = frame.shape
            frame = av.VideoFrame.from_ndarray(frame, format='bgr24')
            packets = stream.encode(frame)
            print('encoded:', packets, frame)
            media.mux(packets)
    if media is not None:
        packets = stream.encode(None)
        media.mux(packets)
        media.close()