def backbone(self, images, **kwargs): r"""Returns backbone features and transformed input image list. Args: images(tensor | List[tensor | str]): a batch tensor of images, a list of image tensors, or image filenames Returns: images(ImageList): a transformed image list with scaled/padded image batch and shape meta features(tensor): backbone features in a batch """ mode = self.training self.eval() model = self.module dev = next(model.parameters()).device if th.is_tensor(images): if images.dim() == 3: images = images.unsqueeze(0) elif not isinstance(images, list): images = [images] from ml import cv images = [ image.to(dev) if th.is_tensor(image) else cv.toTorch( cv.imread(image), device=dev) for image in images ] original_image_sizes = [img.shape[-2:] for img in images] with th.no_grad(): images, _ = model.transform(images, targets=None) self.train(mode) return model.backbone(images.tensors), images, original_image_sizes
def __getitem__(self, index): index = self.shuffled[index] with self.cv: if index not in self.cache: self.cache[index] = False else: if self.cache[index] is False: while (self.cache[index] is False): print( f'[P{os.getpid()}] waiting for samples[{index}] to be loaded' ) self.cv.wait() print( f'[P{os.getpid()}] done waiting for samples[{index}]') cls, subdir, fn = self.samples[index] if self.cache[index] is False: img = cv.imread(self.path / cls / subdir / fn) if self.input_trans: img = self.input_trans(img) self.cache[index] = img with self.cv: self.cv.notify_all() target = self.cls2idx[cls] if self.target_trans: target = self.target_trans(target) return self.cache[index], target
def preprocess(self, frames=None, interpolation=cv.INTER_LINEAR): if frames is None: # Fake frames to warmup frames = [ np.ones((480, 640, 3)) * 114 for _ in range(self.config.TEST.BATCH_IMAGES) ] else: if isinstance(frames, (str, np.ndarray)): frames = [frames] if isinstance(frames[0], str): frames = [cv.imread(frame) for frame in frames] # resize to a predefined scale (800, 1200) for SoftNMS with aspect ratio preserved # transform from BGR HxWxC to RGB CxHxW with normalization data = [] config = self.config from dcn.utils.image import resize, transform for im in frames: # config.SCALES = [(800, 1200)] # data.shape = (B, C, H, W) # im_info = [[800, 1067, 1.666]] target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE, interpolation=interpolation) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) return data
def test_rfcn(tile_img): from ml import cv path = Path(tile_img) img = cv.imread(path) img2 = cv.resize(img, scale=0.5) img = cv.imread(path) model_dir = None # "/tmp/ml/checkpoints" detector = rfcn(pooling=2, model_dir=model_dir, force_reload=True) assert detector.with_rpn rois, dets, pooled = detector.detect(img, return_rpn=True) print('dets:', [tuple(det.shape) for det in dets], dets) print('rois:', [tuple(roi.shape) for roi in rois]) print('pooled:', [tuple(feats.shape) for feats in pooled]) cv.render(img, dets[0], score_thr=0.01, classes=COCO80_CLASSES, path=f"export/{path.name[:-4]}-rfcn.jpg")
def inference(detector, model, img, vis=False, bbox_thr=0.3, kpt_thr=0.3, dataset='TopDownCocoDataset', format='xyxy', return_heatmap=False, **kwargs): import torch as th from ml import cv from ml.vision.ops import dets_select # from xtcocotools.coco import COCO from mmpose.apis import (inference_top_down_pose_model, vis_pose_result) from mmpose.datasets import DatasetInfo model.to('cuda:0') model.eval() # result = model(return_loss=return_loss, **data) fp16 = kwargs.get('fp16', False) with th.cuda.amp.autocast(enabled=fp16): dets = detector.detect(img, size=640, conf_thres=0.4, iou_thres=0.5) persons = dets_select(dets, [0]) ppls = [ dets_f[persons_f].cpu().numpy() for dets_f, persons_f in zip(dets, persons) ] """ Args: person_results(List[Tensor(N, 5)]): bboxes per class in order with scores """ # print(ppls) person_results = [dict(bbox=ppl[:-1]) for ppl in ppls[0]] # print(person_results) pose_results, returned_outputs = inference_top_down_pose_model( model, img, person_results, bbox_thr=bbox_thr, format=format, dataset=dataset, # dataset_info=DatasetInfo({'dataset_name': dataset, 'flip_pairs': []}), return_heatmap=return_heatmap, outputs=None) if vis: img = cv.imread(img) vis_img = vis_pose_result(model, img, pose_results, dataset=dataset, kpt_score_thr=kpt_thr, show=False) return pose_results, vis_img return pose_results
def test_render_yolo(images, labels, suffix, classes=None, output=None): if not isinstance(images, list): images = [images] if not isinstance(labels, list): labels = [labels] if output is None: output = '.' for img, label in zip(images, labels): with open(label) as f: cxyxy = th.Tensor([ tuple(map(float, line.split())) for line in f.read().splitlines() ]) xyxysc = th.cat( [cxyxy[:, 1:], torch.ones(len(cxyxy), 1), cxyxy[:, 0:1]], dim=1) path = Path(output, f"{Path(img).stem}-{suffix}.jpg") img = cv.imread(img) h, w = img.shape[:2] xyxysc[:, [0, 2]] *= w xyxysc[:, [1, 3]] *= h cv.render(img, xyxysc, classes=classes, path=path)
def test_yolo_deep_sort(video): import numpy as np from ml.vision.models.tracking.dsort import DeepSort from ml import av model, size = yolo4, 608 model, size = yolo5x, 736 detector = model(pretrained=True, fuse=True, pooling=True) pooler = MultiScaleFusionRoIAlign(3) tracker = DeepSort( max_feat_dist=0.2, nn_budget=100, max_iou_dist=0.7, # 0.7 max_age=15, # 30 (FPS) n_init=3) # 3 video = Path(video) if video.suffix in ['.mp4', '.avi']: s = av.open(video) v = s.decode(video=0) print(f"Tracking video: {video}") else: s = None if video.is_file(): files = [video] elif video.is_dir(): files = sorted([f for f in video.iterdir() if f.is_file()]) v = [cv.imread(f) for f in files] print(f"Tracking {len(files)} frames in {video}") export = Path(f'export/{video.stem}-{model.__name__}') export.mkdir(parents=True, exist_ok=True) assert export.exists() print(f"Saving to {export / 'tracking.mp4'}") media = av.open(f"{export}/tracking.mp4", 'w') stream = media.add_stream('h264', 15) stream.bit_rate = 2000000 for i, frame in enumerate(v): if not isinstance(frame, np.ndarray): frame = frame.to_rgb().to_ndarray()[:, :, ::-1] if i == 0: stream.height = frame.shape[0] stream.width = frame.shape[1] dets, features = detector.detect([frame], size=size) # Track person only person = dets[0][:, -1] == 0 persons = dets[0][person] features[0] = features[0][person] assert len(dets) == 1 assert len(persons) == features[0].shape[0] assert dets[0].shape[1] == 4 + 1 + 1 # assert features[0].shape[1] == 256+512+1024 assert features[0].shape[1] == 320 + 640 + 1280 if len(dets[0]) > 0: D = 1 for s in features[0].shape[1:]: D *= s tracker.update(persons, features[0].view(len(features[0]), D)) if i < 60: logging.info( f"[{i}] dets[0]: {dets[0].shape}, feats: {[tuple(feats.shape) for feats in features]}" ) cv.render(frame, dets[0], path=export / 'dets' / f"frame{i:03d}.jpg") else: break snapshot = tracker.snapshot() logging.info( f"[{i}] snapshot[0]: {snapshot and list(zip(*snapshot))[0] or len(snapshot)}" ) frame = cv.render( frame, snapshot, path= f"export/{video.stem}-{model.__name__}/tracking/frame{i:03d}.jpg") if media is not None: shape = frame.shape frame = av.VideoFrame.from_ndarray(frame, format='bgr24') packets = stream.encode(frame) print('encoded:', packets, frame) media.mux(packets) if media is not None: packets = stream.encode(None) media.mux(packets) media.close()