def get_detections(cfg, ckpt): model = SSDDetector(cfg) model = torch_utils.to_cuda(model) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) dataset_path = DatasetCatalog.DATASETS["tdt4265_test"]["data_dir"] dataset_path = pathlib.Path(cfg.DATASET_DIR, dataset_path) image_path = pathlib.Path(dataset_path, "images") image_paths = list(image_path.glob("*.jpg")) transforms = build_transforms(cfg, is_train=False) model.eval() detections = [] labels = read_labels( image_path.parent.parent.joinpath("train", "labels.json")) check_all_images_exists(labels, image_paths) for i, image_path in enumerate( tqdm.tqdm(image_paths, desc="Inference on images")): image_detections = {"image_id": image_path.stem, "bounding_boxes": []} image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) result = model(torch_utils.to_cuda(images))[0] result = result.resize((width, height)).cpu().numpy() boxes, labels, scores = result['boxes'], result['labels'], result[ 'scores'] for idx in range(len(boxes)): box = boxes[idx] label_id = labels[idx] label = TDT4265Dataset.class_names[label_id] assert label != "__background__" score = float(scores[idx]) assert box.shape == (4, ) json_box = { "xmin": float(box[0]), "ymin": float(box[1]), "xmax": float(box[2]), "ymax": float(box[3]), "label": str(label), "label_id": int(label_id), "confidence": float(score) } image_detections["bounding_boxes"].append(json_box) detections.append(image_detections) return detections
def make_data_loader(cfg, is_train=True, distributed=False, max_iter=None, start_iter=0): train_transform = build_transforms(cfg, is_train=is_train) target_transform = build_target_transform(cfg) if is_train else None dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST datasets = build_dataset(dataset_list, transform=train_transform, target_transform=target_transform, is_train=is_train) shuffle = is_train or distributed data_loaders = [] for dataset in datasets: if distributed: sampler = samplers.DistributedSampler(dataset, shuffle=shuffle) elif shuffle: sampler = torch.utils.data.RandomSampler(dataset) else: sampler = torch.utils.data.sampler.SequentialSampler(dataset) batch_size = cfg.SOLVER.BATCH_SIZE if is_train else cfg.TEST.BATCH_SIZE batch_sampler = torch.utils.data.sampler.BatchSampler( sampler=sampler, batch_size=batch_size, drop_last=False) print('max_iter', max_iter) if max_iter is not None: batch_sampler = samplers.IterationBasedBatchSampler( batch_sampler, num_iterations=max_iter, start_iter=start_iter) torch.manual_seed(3) data_loader = DataLoader(dataset, num_workers=cfg.DATA_LOADER.NUM_WORKERS, batch_sampler=batch_sampler, pin_memory=cfg.DATA_LOADER.PIN_MEMORY, collate_fn=BatchCollator(is_train), worker_init_fn=_init_fn) # data_loader = DataLoader(dataset, num_workers=cfg.DATA_LOADER.NUM_WORKERS, batch_sampler=batch_sampler, # pin_memory=cfg.DATA_LOADER.PIN_MEMORY, collate_fn=BatchCollator(is_train)) data_loaders.append(data_loader) if is_train: # during training, a single (possibly concatenated) data_loader is returned assert len(data_loaders) == 1 return data_loaders[0] return data_loaders
def run_demo(cfg, ckpt, score_threshold, images_dir: pathlib.Path, output_dir: pathlib.Path, dataset_type): if dataset_type == "voc": class_names = VOCDataset.class_names elif dataset_type == "mnist": class_names = MNISTDetection.class_names else: raise NotImplementedError('Not implemented now.') model = SSDDetector(cfg) model = torch_utils.to_cuda(model) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) image_paths = list(images_dir.glob("*.png")) + list( images_dir.glob("*.jpg")) output_dir.mkdir(exist_ok=True, parents=True) transforms = build_transforms(cfg, is_train=False) model.eval() drawn_images = [] for i, image_path in enumerate( tqdm.tqdm(image_paths, desc="Predicting on images")): image_name = image_path.stem image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) result = model(torch_utils.to_cuda(images))[0] result = result.resize((width, height)).cpu().numpy() boxes, labels, scores = result['boxes'], result['labels'], result[ 'scores'] indices = scores > score_threshold boxes = boxes[indices] labels = labels[indices] scores = scores[indices] drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) drawn_images.append(drawn_image) im = Image.fromarray(drawn_image) output_path = output_dir.joinpath(f"{image_name}.png") im.save(output_path) return drawn_images
def get_detections(cfg, ckpt): model = SSDDetector(cfg) model = torch_utils.to_cuda(model) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) dataset_path = DatasetCatalog.DATASETS["tdt4265_test"]["data_dir"] dataset_path = pathlib.Path(cfg.DATASET_DIR, dataset_path) image_dir = pathlib.Path(dataset_path) image_paths = list(image_dir.glob("*.jpg")) transforms = build_transforms(cfg, is_train=False) model.eval() detections = [] for image_path in tqdm.tqdm(image_paths, desc="Inference on images"): image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) result = model(torch_utils.to_cuda(images))[0] result = result.resize((width, height)).cpu().numpy() boxes, labels, scores = result['boxes'], result['labels'], result[ 'scores'] for idx in range(len(boxes)): box = boxes[idx] label_id = labels[idx] label = TDT4265Dataset.class_names[label_id] assert label != "__background__" score = float(scores[idx]) assert box.shape == (4, ) xmin, ymin, xmax, ymax = box width = xmax - xmin height = ymax - ymin detections.append({ "image_id": image_path.stem, "category_id": LABEL_MAP[label], "score": score, "bbox": [xmin, ymin, width, height] }) return detections
def __init__(self): self.threshold = 0.5 self.device = torch.device('cpu') self.class_names = VOCDataset.class_names ssd_dir = os.path.expanduser(rospy.get_param('~model_path')) config = os.path.join(ssd_dir, 'configs/mobilenet_v2_ssd320_voc0712.yaml') weightfile = os.path.join(ssd_dir, 'weight/mobilenet_v2_ssd320_voc0712_v2.pth') cfg.merge_from_file(config) cfg.freeze() self.model = self.get_model(cfg, weightfile) self.transforms = build_transforms(cfg, is_train=False) self.model.eval() self.sub = rospy.Subscriber("preprocessed_image", Image, self.object_detection) self.pub = rospy.Publisher('object_detection_result', ObjectDetectionResult, queue_size=10)
def update_config(self, config_file, dataset_type, weight, score_threshold=0.5, targets=["person"]): if dataset_type == "voc": self.class_names = VOCDataset.class_names elif dataset_type == "coco": self.class_names = COCODataset.class_names else: raise NotImplementedError('Not implemented now.') self.target_labels = [] if targets is None: self.target_labels = [i for i in range(len(self.class_names))] else: for idx in range(len(self.class_names)): if self.class_names[idx] in targets: self.target_labels.append(idx) self.cfg = cfg self.cfg.merge_from_file(config_file) self.cfg.freeze() print("Loaded configuration file {}".format(config_file)) with open(config_file, "r") as cf: config_str = "\n" + cf.read() # print(config_str) # print("Running SSD with config:\n{}".format(self.cfg)) self.device = torch.device(self.cfg.MODEL.DEVICE) self.model = build_detection_model(self.cfg) self.model = self.model.to(self.device) self.checkpointer = CheckPointer(self.model) self.checkpointer.load(weight) self.cpu_device = torch.device("cpu") self.score_threshold = score_threshold self.transforms = build_transforms(self.cfg, is_train=False) self.model.eval()
def run_demo(cfg, ckpt, score_threshold, images_dir, dataset_type): if dataset_type == "voc": class_names = VOCDataset.class_names elif dataset_type == "pick": class_names = PICKDataset.class_names elif dataset_type == "cotb": class_names = COTBDataset.class_names elif dataset_type == 'coco': class_names = COCODataset.class_names else: raise NotImplementedError('Not implemented now.') device = torch.device(cfg.MODEL.DEVICE) model = build_detection_model(cfg) model = model.to(device) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) train_epoch = weight_file.split('/')[2] train_epoch = train_epoch.split('.')[0].split('_')[1] save_path = os.path.join('demo', dataset_type, cfg.MODEL.BACKBONE.NAME, train_epoch) image_paths = glob.glob(os.path.join(images_dir, '*.jpg')) + glob.glob( os.path.join(images_dir, '*.jpeg')) mkdir(save_path) cpu_device = torch.device("cpu") transforms = build_transforms(cfg, is_train=False) model.eval() for i, image_path in enumerate(image_paths): start = time.time() image_name = os.path.basename(image_path) image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) load_time = time.time() - start start = time.time() result = model(images.to(device))[0] inference_time = time.time() - start result = result.resize((width, height)).to(cpu_device).numpy() boxes, labels, scores = result['boxes'], result['labels'], result[ 'scores'] indices = scores > score_threshold boxes = boxes[indices] labels = labels[indices] scores = scores[indices] meters = ' | '.join([ 'objects {:02d}'.format(len(boxes)), 'load {:03d}ms'.format(round(load_time * 1000)), 'inference {:03d}ms'.format(round(inference_time * 1000)), 'FPS {}'.format(round(1.0 / inference_time)) ]) print('({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters)) drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) Image.fromarray(drawn_image).save(os.path.join(save_path, image_name))
class_name = { '__background__', 'lubang', 'retak aligator', 'retak melintang', 'retak memanjang' } cfg.merge_from_file(config) cfg.freeze() ckpt = None device = torch.device('cpu') model = build_detection_model(cfg) model.to(device) checkpoint = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpoint.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpoint.get_checkpoint_file() transforms = build_transforms(cfg, is_train=False) model.eval() conv_layers = [] model_children = list(model.children()) print(len(model_children)) print(type(model_children[0])) print(type(model_children[1])) counter = 0 for i in range(len(model_children)): if type(model_children[i]) == VGG: counter += 1 conv_layers.append(model_children[i])
def active_train(cfg, args): logger = logging.getLogger("SSD.trainer") raw_model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) raw_model.to(device) lr = cfg.SOLVER.LR * args.num_gpus optimizer = make_optimizer(cfg, raw_model, lr) milestones = [step // args.num_gpus for step in cfg.SOLVER.LR_STEPS] scheduler = make_lr_scheduler(cfg, optimizer, milestones) arguments = {"iteration": 0} checkpointer = None save_to_disk = dist_util.get_rank() == 0 checkpointer = CheckPointer(raw_model, optimizer, scheduler, args.model_dir, save_to_disk, logger) max_iter = cfg.SOLVER.MAX_ITER // args.num_gpus is_train = True train_transform = build_transforms(cfg, is_train=is_train) target_transform = build_target_transform(cfg) if is_train else None dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST datasets = build_dataset(dataset_list, transform=train_transform, target_transform=target_transform, is_train=is_train) logger.info(f'Creating query loader...') query_loader = QueryLoader(datasets[0], args, cfg) logger.info(f'Creating al model...') strategy = get_strategy(args.strategy) model = ALModel(raw_model, strategy, optimizer, device, scheduler, arguments, args, checkpointer, cfg) logger.info(f'Training on initial data with size {args.init_size}...') n_bbox = query_loader.len_annotations() t1 = time.time() model.fit(query_loader.get_labeled_loader()) init_time = time.time() - t1 logger.info(f'Scoring after initial training...') score = model.score() logger.info(f'SCORE : {score:.4f}') fields = [ args.strategy, {}, 0, score, init_time, 0, init_time, len(query_loader), n_bbox ] save_to_csv(args.filename, fields) for step in range(args.query_step): logger.info(f'STEP NUMBER {step}') logger.info('Querying assets to label') t1 = time.time() query_idx = model.query( unlabeled_loader=query_loader.get_unlabeled_loader(), cfg=cfg, args=args, step=step, n_instances=args.query_size, length_ds=len(datasets[0])) logger.info('Adding labeled samples to train dataset') query_loader.add_to_labeled(query_idx, step + 1) t2 = time.time() logger.info('Fitting with new data...') model.fit(query_loader.get_labeled_loader()) total_time = time.time() - t1 train_time = time.time() - t2 active_time = total_time - train_time logger.info('Scoring model...') score = model.score() n_bbox = query_loader.len_annotations() fields = [ args.strategy, {}, step + 1, score, train_time, active_time, total_time, len(query_loader), n_bbox ] save_to_csv(args.filename, fields) logger.info(f'SCORE : {score:.4f}') return model.model
def _read_image(self, image_id): image_file = os.path.join(self.data_dir, "JPEGImages", "%s.jpg" % image_id) image = Image.open(image_file).convert("RGB") image = np.array(image) return image if __name__ == '__main__': from ssd.config import cfg from ssd.data.transforms import build_transforms, build_target_transform from ssd.data.datasets import build_dataset is_train = True train_transform = build_transforms(cfg, is_train=is_train) target_transform = build_target_transform(cfg) if is_train else None dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST datasets = build_dataset(dataset_list, transform=train_transform, target_transform=target_transform, is_train=is_train) image, targets, index = datasets[0].__getitem__(200) boxes = targets['boxes'] labels = targets['labels'] print(image.shape) print(boxes.shape) print(labels.shape) print(index)
def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type): if dataset_type == "voc": class_names = VOCDataset.class_names elif dataset_type == 'coco': class_names = COCODataset.class_names else: raise NotImplementedError('Not implemented now.') device = torch.device(cfg.MODEL.DEVICE) smoke_name_dic = ('__background__', '一次性快餐盒', '书籍纸张', '充电宝', '剩饭剩菜', '包', '垃圾桶', '塑料器皿', '塑料玩具', '塑料衣架', '大骨头', '干电池', '快递纸袋', '插头电线', '旧衣服', '易拉罐', '枕头', '果皮果肉', '毛绒玩具', '污损塑料', '污损用纸', '洗护用品', '烟蒂', '牙签', '玻璃器皿', '砧板', '筷子', '纸盒纸箱', '花盆', '茶叶渣', '菜帮菜叶', '蛋壳', '调料瓶', '软膏', '过期药物', '酒瓶', '金属厨具', '金属器皿', '金属食品罐', '锅', '陶瓷器皿', '鞋', '食用油桶', '饮料瓶', '鱼骨') model = build_detection_model(cfg) cpu_device = torch.device("cpu") model = model.to(device) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) image_paths = glob.glob(os.path.join(images_dir, '*.jpg')) mkdir(output_dir) transforms = build_transforms(cfg, is_train=False) model.eval() miss = 0 for i, image_path in enumerate(image_paths): start = time.time() image_name = os.path.basename(image_path) cv_image = cv2.imread(image_path) PIL_image = Image.open(image_path) image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) load_time = time.time() - start start = time.time() result = model(images.to(device))[0] inference_time = time.time() - start result = result.resize((width, height)).to(cpu_device).numpy() boxes, labels, scores = result['boxes'], result['labels'], result[ 'scores'] indices = scores > score_threshold boxes = boxes[indices] labels = labels[indices] scores = scores[indices] miss = miss + (1 - len(boxes)) meters = ' | '.join([ 'objects {:02d}'.format(len(boxes)), 'load {:03d}ms'.format(round(load_time * 1000)), 'inference {:03d}ms'.format(round(inference_time * 1000)), 'FPS {}'.format(round(1.0 / inference_time)) ]) print('({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters)) draw_ = ImageDraw.Draw(PIL_image) for c in range(len(scores)): text = smoke_name_dic[labels[c]] font = ImageFont.truetype( '/usr/share/fonts/truetype/arphic/uming.ttc', 40) draw_.text((int(boxes[c][0]) + 2, int(boxes[c][1]) - 2), text, (255, 0, 0), font=font) cv_image = cv2.cvtColor(np.asarray(PIL_image), cv2.COLOR_RGB2BGR) for c in range(len(scores)): cv2.rectangle(cv_image, (int(boxes[c][0]), int(boxes[c][1])), (int(boxes[c][2]), int(boxes[c][3])), (0, 0, 255), 4) cv2.imwrite(os.path.join(output_dir, image_name), cv_image) smoke_count = len(image_paths) print("出现:%d 漏掉: %d 漏检率:%.2f" % (smoke_count, miss, miss / smoke_count))
def run_demo(cfg, model, score_threshold, images_dir, output_dir): device = torch.device(cfg.MODEL.DEVICE) class_names = VOCDataset.class_names mkdir(output_dir) cpu_device = torch.device("cpu") transforms = build_transforms(cfg, is_train=False) model.eval() start = time.time() image_name = os.path.basename(images_dir) image = np.array(Image.open(images_dir).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) load_time = time.time() - start start = time.time() result = model(images.to(device))[0] inference_time = time.time() - start result = result.resize((width, height)).to(cpu_device).numpy() boxes, labels, scores = result['boxes'], result['labels'], result['scores'] indices = scores > score_threshold boxes = boxes[indices] labels = labels[indices] meters = ' | '.join([ 'objects {:02d}'.format(len(boxes)), 'load {:03d}ms'.format(round(load_time * 1000)), 'inference {:03d}ms'.format(round(inference_time * 1000)), 'FPS {}'.format(round(1.0 / inference_time)) ]) print('({:04d}) {}: {}'.format(len(images_dir), image_name, meters)) text = ['__background__'] resDic = {} for j in range(len(boxes)): xmin = int(boxes[j, 0]) ymin = int(boxes[j, 1]) xmax = int(boxes[j, 2]) ymax = int(boxes[j, 3]) if labels[j] == 1: xmin += 140 xmax -= 130 elif labels[j] == 2: xmin += 130 elif labels[j] == 4: xmin += 40 hight = ymax - ymin width = xmax - xmin cropImg = image[ymin:ymin + hight, xmin:xmin + width] cropImg = local_threshold(cropImg) text_tmp = crnnOcr(Image.fromarray(cropImg)) if labels[j] == 2: text_tmp = re.sub('[^\x00-\xff]', '/', text_tmp) text.append(text_tmp) resDic[class_names[labels[j]]] = text_tmp return json.dumps(resDic, ensure_ascii=False).encode('utf-8')
def main(): st.title('Pavement Distress Detector') st.markdown(get_file_content_as_string('./introduction.md')) st.sidebar.markdown(get_file_content_as_string('./documentation.md')) caching.clear_cache() video = video_uploader('./input') config = config_uploader('./configs') output_dir = checkpoint_folder('./outputs') filename = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}_{os.path.splitext(os.path.basename(config))[0]}" output_file = './results' #score_threshold = st.slider('Confidence Threshold', 0.0, 1.0, 0.5) #fps_threshold = st.slider('Counting Every (frames)', 10, 30, 20) score_threshold = 0.5 fps_threshold = 20 video_filename = f'{output_file}/{filename}.mp4' labels_filename = f'{output_file}/{filename}.txt' if st.button('Click here to run'): if (os.path.isdir(video) == False and os.path.isdir(config) == False and output_dir != './outputs/'): class_name = ('__background__', 'lubang', 'retak aligator', 'retak melintang', 'retak memanjang') cfg.merge_from_file(config) cfg.freeze() ckpt = None device = torch.device(cfg.MODEL.DEVICE) model = build_detection_model(cfg) model.to(device) checkpoint = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpoint.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpoint.get_checkpoint_file() st.write(f'Loading weight from {weight_file}') cpu_device = torch.device('cpu') transforms = build_transforms(cfg, is_train=False) model.eval() clip = VideoFileClip(video) with tempfile.NamedTemporaryFile( suffix='.avi' ) as temp: #using temporary file because streamlit can't read opencv video result temp_name = temp.name pavement_distress(video, clip, fps_threshold, score_threshold, temp_name, labels_filename, transforms, model, device, cpu_device, class_name) result_clip = VideoFileClip(temp_name) st.write('Please wait, prepraring result...') result_clip.write_videofile(video_filename) video_file = open(video_filename, 'rb') video_bytes = video_file.read() st.video(video_bytes) elif (os.path.isdir(video) == True and os.path.isdir(config) == False and output_dir != './outputs/'): st.warning('Please select video file') elif (os.path.isdir(video) == True and os.path.isdir(config) == True and output_dir != './outputs/'): st.warning('Please select video file and config file') elif (os.path.isdir(video) == False and os.path.isdir(config) == True and output_dir != './outputs/'): st.warning('Please select config file') elif (os.path.isdir(video) == True and os.path.isdir(config) == False and output_dir == './outputs/'): st.warning('Please select video file and checkpoint folder') elif (os.path.isdir(video) == False and os.path.isdir(config) == False and output_dir == './outputs/'): st.warning('Please select checkpoint folder') elif (os.path.isdir(video) == False and os.path.isdir(config) == True and output_dir == './outputs/'): st.warning('Please select config file and checkpoint folder') else: st.warning( 'Please select video file, config file, and checkpoint folder')
def preparingModel(self): print('Loaded weights from {}'.format(self.weight_file)) self.transforms = build_transforms(self.cfg, is_train=False) self.model.eval() self.isReady = True
def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type): if dataset_type == "voc": class_names = VOCDataset.class_names elif dataset_type == 'coco': class_names = COCODataset.class_names else: raise NotImplementedError('Not implemented now.') device = torch.device(cfg.MODEL.DEVICE) model = build_detection_model(cfg) model = model.to(device) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) image_paths = glob.glob(os.path.join(images_dir, '*.jpg')) mkdir(output_dir) cpu_device = torch.device("cpu") transforms = build_transforms(cfg, is_train=False) model.eval() _t = {'im_detect': Timer()} timer = Timer() timer.tic() inference_time_list=[] load_time_list = [] for image_path in image_paths: start = time.time() image_name = os.path.basename(image_path) image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) load_time = time.time() - start load_time_list.append(1000*load_time) _t['im_detect'].tic() #start = time.time() #print('1') result = model(images.to(device))[0] #print('2') result = result.resize((width, height)).to(cpu_device).numpy() boxes, labels, scores = result['boxes'], result['labels'], result['scores'] indices = scores > score_threshold boxes = boxes[indices] labels = labels[indices] scores = scores[indices] #inference_time = time.time() - start inference_time = _t['im_detect'].toc() #print(1000*(inference_time)) inference_time_list.append(1000*inference_time) meters = ' | '.join( [ 'objects {:02d}'.format(len(boxes)), 'load {:03d}ms'.format(round(load_time * 1000)), 'inference {:03d}ms'.format(round(inference_time * 1000)), 'FPS {}'.format(round(1.0 / inference_time)) ] ) # print('({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters)) #drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) #Image.fromarray(drawn_image).save(os.path.join(output_dir, image_name)) _t['im_detect'].clear() N = len(inference_time_list)//2 total_time_list = np.array(inference_time_list) + np.array(load_time_list) total_time_list.sort() inference_time_list.sort() det_time = np.mean(total_time_list[:N])#/BATCH_SIZE best_det_time = np.min(total_time_list)#/BATCH_SIZE print("Total test time: %.2f s" % (timer.toc())) print("\nTotal detection speed: %.1f FPS" % (len(inference_time_list)/timer.toc())) print("\nAvg detection speed: %.1f FPS" % (1000./det_time)) print("Best detection speed: %.1f FPS" % (1000./best_det_time))
def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type): if dataset_type == "voc": class_names = VOCDataset.class_names elif dataset_type == 'coco': class_names = COCODataset.class_names else: raise NotImplementedError('Not implemented now.') device = torch.device(cfg.MODEL.DEVICE) cpu_device = torch.device("cpu") model = build_detection_model(cfg) model = model.to(cpu_device) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) image_paths = glob.glob(os.path.join(images_dir, '*.jpg')) mkdir(output_dir) transforms = build_transforms(cfg, is_train=False) model.eval() for i, image_path in enumerate(image_paths): start = time.time() image_name = os.path.basename(image_path) image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) load_time = time.time() - start start = time.time() result = model(images.to(cpu_device))[0] inference_time = time.time() - start result = result.resize((width, height)).numpy() boxes, labels, scores = result['boxes'], result['labels'], result[ 'scores'] indices = scores > score_threshold boxes = boxes[indices] labels = labels[indices] scores = scores[indices] meters = ' | '.join([ 'objects {:02d}'.format(len(boxes)), 'load {:03d}ms'.format(round(load_time * 1000)), 'inference {:03d}ms'.format(round(inference_time * 1000)), 'FPS {}'.format(round(1.0 / inference_time)) ]) print('({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters)) for i in range(len(labels)): text = str(label_name[labels[i]]) + str(round(scores[i], 2)) cv2.rectangle(image, tuple(boxes[i][:2]), tuple(boxes[i][2:]), color, 3) image = Image.fromarray(image) draw = ImageDraw.Draw(image) draw.text(tuple([boxes[i][0], boxes[i][1] - 40]), text, color, font=fontStyle) image = np.asarray(image) cv2.imshow('drawn_image', image) # drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) Image.fromarray(image).save(os.path.join(output_dir, image_name))
def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir): class_names = VOCDataset.class_names device = torch.device(cfg.MODEL.DEVICE) model = build_detection_model(cfg) model = model.to(device) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) image_paths = glob.glob(os.path.join(images_dir, '*.bmp')) mkdir(output_dir) cpu_device = torch.device("cpu") transforms = build_transforms(cfg, is_train=False) model.eval() for i, image_path in enumerate(image_paths): start = time.time() image_name = os.path.basename(image_path) image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) load_time = time.time() - start start = time.time() result = model(images.to(device))[0] inference_time = time.time() - start result = result.resize((width, height)).to(cpu_device).numpy() boxes, labels, scores = result['boxes'], result['labels'], result[ 'scores'] indices = scores > score_threshold boxes = boxes[indices] labels = labels[indices] meters = ' | '.join([ 'objects {:02d}'.format(len(boxes)), 'load {:03d}ms'.format(round(load_time * 1000)), 'inference {:03d}ms'.format(round(inference_time * 1000)), 'FPS {}'.format(round(1.0 / inference_time)) ]) print('({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters)) text = ['__background__'] resDic = {} for j in range(len(boxes)): xmin = int(boxes[j, 0]) ymin = int(boxes[j, 1]) xmax = int(boxes[j, 2]) ymax = int(boxes[j, 3]) if labels[j] == 1: xmin += 140 xmax -= 130 elif labels[j] == 2: xmin += 130 elif labels[j] == 4: xmin += 40 hight = ymax - ymin width = xmax - xmin cropImg = image[ymin:ymin + hight, xmin:xmin + width] cropImg = local_threshold(cropImg) boxes[j, 0] = xmin boxes[j, 1] = ymin boxes[j, 2] = xmax boxes[j, 3] = ymax text_tmp = crnnOcr(Image.fromarray(cropImg)) if labels[j] == 2: text_tmp = re.sub('[^\x00-\xff]', '/', text_tmp) text.append(text_tmp) resDic[class_names[labels[j]]] = text_tmp result = json.dumps(resDic, ensure_ascii=False) print(result)
def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type, gen_heatmap): if dataset_type == "voc": class_names = VOCDataset.class_names elif dataset_type == 'coco': class_names = COCODataset.class_names else: raise NotImplementedError('Not implemented now.') if torch.cuda.is_available(): device = torch.device(cfg.MODEL.DEVICE) else: device = torch.device("cpu") model = build_detection_model(cfg) model = model.to(device) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) mkdir(output_dir) cpu_device = torch.device("cpu") transforms = build_transforms(cfg, is_train=False) model.eval() dist_regr_model = DistanceRegrNet(2) dist_regr_model = load_model_weight(dist_regr_model, device) # load weights dist_regr_model.eval() X_scaler = load_standardizer(Standardizer()) person_label_idx = class_names.index('person') centroid_tracker = CentroidTracker() capture = cv2.VideoCapture(0) while capture.isOpened(): ret, frame = capture.read() single_frame_render_time = 0 if ret: image = frame height, width = image.shape[:2] start_time = time.time() images = transforms(frame)[0].unsqueeze(0) result = model(images.to(device))[0] result = result.resize((width, height)).to(cpu_device).numpy() single_frame_render_time += round((time.time() - start_time) * 1000, 3) print(f"MobileNet SSD Inference time {round((time.time() - start_time) * 1000, 3)}ms") boxes, labels, scores = result['boxes'], result['labels'], result['scores'] # remove all non person class detections indices = np.logical_and(scores > score_threshold, labels == person_label_idx) boxes = boxes[indices] labels = labels[indices] scores = scores[indices] distances = None # create gaussian mixture models and kde plots only if centers detected if len(boxes) != 0: centers = np.apply_along_axis(get_mid_point, 1, boxes) image = draw_points(image, centers) # draw center points on image # Distance Regression start_time = time.time() # As boxes is in (xmin, ymin, xmax, ymax) format # X should always have width, height format width = boxes[:, 2] - boxes[:, 0] height = boxes[:, 3] - boxes[:, 1] X = np.column_stack((width, height)) X_scaled = X_scaler.transform(X) distances = dist_regr_model(torch.Tensor(X_scaled).to(device)).to(cpu_device).numpy() single_frame_render_time += round((time.time() - start_time) * 1000, 3) print(f"Distance Regression Inference time {round(time.time() - start_time, 4) * 1000}ms") # object tracking with centroids start_time = time.time() objects = centroid_tracker.update(centers, distances) # loop over the tracked objects # for (objectID, centroid) in objects.items(): # print("Center Distances tracked overtime") # print(centroid_tracker.obj_distance_counts[objectID]) single_frame_render_time += round((time.time() - start_time) * 1000, 3) print(f"Centroid Tracking Update time {round(time.time() - start_time, 4) * 1000}ms") if len(centers) > 1: # reset center point ranges to a min of 0 and max of 100 _x = centers[:, 0] _y = centers[:, 1] centers[:, 0] = reset_range(max(_x), min(_x), 100, 0, _x) centers[:, 1] = reset_range(max(_y), min(_y), 100, 0, _y) # DBSCAN Clustering start_time = time.time() dbscan_center = DBSCAN(eps=18) dbscan_center.fit(centers) # print("DBSCAN Clusters", dbscan_center._labels) # print("Unique number of clusters", len(set(dbscan_center._labels))) single_frame_render_time += round((time.time() - start_time) * 1000, 3) print(f"DBSCAN Clustering time {round((time.time() - start_time) * 1000, 3)}ms") if gen_heatmap: image = generate_cv2_heatmap(centers, dbscan_center._labels, None, None, len(set(dbscan_center._labels)), covariance_type='diag') cv2.imshow("frame", image) if not gen_heatmap: drawn_image = draw_boxes(image, boxes, labels, scores, distances, class_names).astype(np.uint8) cv2.imshow("frame", drawn_image) print(f"Total time to render one frame {single_frame_render_time}." + f"FPS {round(1 / (single_frame_render_time / 1000))}") key = cv2.waitKey(1) if key & 0xFF == ord('x'): break else: break print("Distance counts for tracked objects") print(centroid_tracker.obj_distance_counts) write_file = f'{output_dir}/dist_regr_results/{round(time.time())}.txt' print(f"Writing the distance values to file {write_file}") os.makedirs(f'{output_dir}/dist_regr_results', exist_ok=True) with open(write_file, 'w') as fw: for key, arr in centroid_tracker.obj_distance_counts.items(): arr = [str(v) for v in arr] fw.write(str(key) + ',' + ','.join(arr)) fw.write('\n') capture.release() cv2.destroyAllWindows()
def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type, model_path=None): if dataset_type == "voc": class_names = VOCDataset.class_names elif dataset_type == 'coco': class_names = COCODataset.class_names else: class_names = TxtDataset(dataset_name=dataset_type).class_names # else: # raise NotImplementedError('Not implemented now.') device = torch.device(cfg.MODEL.DEVICE) model = build_detection_model(cfg) model = model.to(device) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) ## # model.backbone.bn_fuse()#需要修改demo.py 要bn_fuse 因为fpga端没有bn # model.to(device) # ## if model_path is None: checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) else: model.load_state_dict(torch.load(model_path)) if cfg.TEST.BN_FUSE is True: print('BN_FUSE.') model.backbone.bn_fuse() model.to(device) image_paths = glob.glob(os.path.join(images_dir, '*.jpg')) #.png mkdir(output_dir) cpu_device = torch.device("cpu") transforms = build_transforms(cfg, is_train=False) model.eval() for i, image_path in enumerate(image_paths): start = time.time() image_name = os.path.basename(image_path) image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) load_time = time.time() - start start = time.time() result = model(images.to(device))[0] inference_time = time.time() - start result = result.resize((width, height)).to(cpu_device).numpy() boxes, labels, scores = result['boxes'], result['labels'], result[ 'scores'] indices = scores > score_threshold boxes = boxes[indices] labels = labels[indices] scores = scores[indices] meters = ' | '.join([ 'objects {:02d}'.format(len(boxes)), 'load {:03d}ms'.format(round(load_time * 1000)), 'inference {:03d}ms'.format(round(inference_time * 1000)), 'FPS {}'.format(round(1.0 / inference_time)) ]) print('({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters)) drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) Image.fromarray(drawn_image).save(os.path.join(output_dir, image_name))
def run_demo(cfg, ckpt, score_threshold, output_dir, dataset_type): if dataset_type == "voc": class_names = VOCDataset.class_names elif dataset_type == 'coco': class_names = COCODataset.class_names else: raise NotImplementedError('Not implemented now.') device = torch.device(cfg.MODEL.DEVICE) model = build_detection_model(cfg) model = model.to(device) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) cpu_device = torch.device("cpu") transforms = build_transforms(cfg, is_train=False) model.eval() cap = cv2.VideoCapture('parking_lot/13.mp4') sz = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) fourcc = cv2.VideoWriter_fourcc(*'DIVX') fps = 50 vout = cv2.VideoWriter('ssd.avi', fourcc, fps, sz, True) count = 0 # cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() if not ret: break else: count += 1 # if count % 3 == 1: start = time.time() image = frame height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) load_time = time.time() - start start = time.time() result = model(images.to(device))[0] inference_time = time.time() - start result = result.resize((width, height)).to(cpu_device).numpy() boxes, labels, scores = result['boxes'], result['labels'], result[ 'scores'] indices = scores > score_threshold boxes = boxes[indices] labels = labels[indices] obj_dict = Counter(labels) scores = scores[indices] meters = ' | '.join([ 'objects {:02d}'.format(len(boxes)), 'load {:03d}ms'.format(round(load_time * 1000)), 'inference {:03d}ms'.format(round(inference_time * 1000)), 'FPS {}'.format(round(1.0 / inference_time)) ]) print(meters) # drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) for i in range(len(labels)): if labels[i] == 3: text = 'car:' + str(round(scores[i], 2)) cv2.rectangle(image, tuple(boxes[i][:2]), tuple(boxes[i][2:]), color, 3) image = Image.fromarray(image) draw = ImageDraw.Draw(image) draw.text(tuple([boxes[i][0], boxes[i][1] - 40]), text, color, font=fontStyle) image = np.asarray(image) cv2.imshow('drawn_image', image) vout.write(image) if count >= 800 or cv2.waitKey(1) & 0xFF == ord('q'): break
def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type): if dataset_type == "voc": class_names = VOCDataset.class_names elif dataset_type == 'coco': class_names = COCODataset.class_names else: raise NotImplementedError('Not implemented now.') if torch.cuda.is_available(): device = torch.device(cfg.MODEL.DEVICE) else: device = torch.device("cpu") model = build_detection_model(cfg) model = model.to(device) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) images_dir = 'datasets/MOT16/train/MOT16-02/img1' image_paths = sorted(glob.glob(os.path.join(images_dir, '*.jpg'))) mkdir(output_dir) cpu_device = torch.device("cpu") transforms = build_transforms(cfg, is_train=False) model.eval() person_label_idx = class_names.index('person') centroid_tracker = CentroidTracker() wfile = open('py-motmetrics/motmetrics/data/MOT16/predicted/MOT16-02.txt', 'w') inference_times = [] for i, image_path in enumerate(image_paths): image_name = os.path.basename(image_path) start_time = time.time() image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) result = model(images.to(device))[0] result = result.resize((width, height)).to(cpu_device).numpy() boxes, labels, scores = result['boxes'], result['labels'], result[ 'scores'] # remove all non person class detections indices = np.logical_and(scores > score_threshold, labels == person_label_idx) boxes = boxes[indices] distances = None inference_times.append(time.time() - start_time) print(time.time() - start_time) if len(boxes) != 0: centers = np.apply_along_axis(get_mid_point, 1, boxes) # object tracking with centroids centroid_tracker.update(centers, distances, boxes) fnum = int(image_name.split('.')[0]) # loop over the tracked objects for (objID, bbox_) in centroid_tracker.obj_bbox.items(): xm, ym = bbox_[0], bbox_[1] w, h = bbox_[2] - bbox_[0], bbox_[3] - bbox_[1] output = f"{fnum},{objID},{xm},{ym},{w},{h},-1,-1,-1\n" wfile.write(output) # drawn_image = draw_boxes(image, boxes, labels, scores, distances, class_names).astype(np.uint8) # Image.fromarray(drawn_image).save(os.path.join(output_dir, image_name)) framerates = [1 / tm for tm in inference_times] print( f"Avg frame rate is {sum(framerates) / len(framerates)} for {len(framerates)} frames" ) wfile.close()
def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, onnx_dir, dataset_type): if dataset_type == "voc": class_names = VOCDataset.class_names elif dataset_type == 'coco': class_names = COCODataset.class_names else: raise NotImplementedError('Not implemented now.') device = torch.device(cfg.MODEL.DEVICE) device = "cpu" if not torch.cuda.is_available() else device model = build_detection_model(cfg) model = model.to(device) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) image_paths = glob.glob(os.path.join(images_dir, '*.jpg')) mkdir(output_dir) cpu_device = torch.device("cpu") transforms = build_transforms(cfg, is_train=False) model.eval() # get model ready for onnx export mkdir(onnx_dir) model_onnx = build_detection_model(cfg) model_onnx = model_onnx.to(device) checkpointer_onnx = CheckPointer(model_onnx, save_dir=cfg.OUTPUT_DIR) checkpointer_onnx.load(ckpt, use_latest=ckpt is None) # replace the SSD box head postprocessor with the onnx version for exporting model_onnx.box_head.post_processor = PostProcessorOnnx(cfg) model_onnx.eval() # export with ONNX # onnx modle takes the name of the pth ckpt file model_onnx_name = os.path.basename(ckpt).split('.')[0] + ".onnx" model_onnx_path = os.path.join(onnx_dir, model_onnx_name) if not os.path.exists(model_onnx_path): print(f'Model exported as onnx to {model_onnx_path}') dummy_input = torch.zeros( [1, 3, cfg.INPUT.IMAGE_SIZE, cfg.INPUT.IMAGE_SIZE]).to(device) torch.onnx.export(model_onnx, dummy_input, model_onnx_path, export_params=True, do_constant_folding=True, opset_version=11, input_names=['input'], output_names=['boxes', 'scores', 'labels'], dynamic_axes={ 'input': {0: 'batch_size', 2: "height", 3: "width"}}, verbose=False) # load exported onnx model for inference test print( f'Loading exported onnx model from {model_onnx_path} for inference comparison test') onnx_runtime_sess = onnxruntime.InferenceSession(model_onnx_path) for i, image_path in enumerate(image_paths): start = time.time() image_name = os.path.basename(image_path) image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) load_time = time.time() - start start = time.time() result = model(images.to(device))[0] inference_time = time.time() - start result = result.resize((width, height)).to(cpu_device).numpy() boxes, labels, scores = result['boxes'], result['labels'], result['scores'] indices = scores > score_threshold boxes, labels, scores = boxes[indices], labels[indices], scores[indices] meters = ' | '.join( [ 'objects {:02d}'.format(len(boxes)), 'load {:03d}ms'.format(round(load_time * 1000)), 'inference {:03d}ms'.format(round(inference_time * 1000)), 'FPS {}'.format(round(1.0 / inference_time)) ] ) print('Pytorch: ({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters)) drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) Image.fromarray(drawn_image).save( os.path.join(output_dir, "pytorch_" + image_name)) """ Compute ONNX Runtime output prediction """ start = time.time() ort_inputs = {onnx_runtime_sess.get_inputs()[0].name: np.array(images)} boxes, scores, labels = onnx_runtime_sess.run(None, ort_inputs) inference_time = time.time() - start indices = scores > score_threshold boxes, labels, scores = boxes[indices], labels[indices], scores[indices] # resize bounding boxes to size of the original image boxes[:, 0::2] *= (width) boxes[:, 1::2] *= (height) meters = ' | '.join( [ 'objects {:02d}'.format(len(boxes)), 'load {:03d}ms'.format(round(load_time * 1000)), 'inference {:03d}ms'.format(round(inference_time * 1000)), 'FPS {}'.format(round(1.0 / inference_time)) ] ) print('Onnx: ({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters)) drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) Image.fromarray(drawn_image).save( os.path.join(output_dir, "onnx_" + image_name))
def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type, gen_heatmap): if dataset_type == "voc": class_names = VOCDataset.class_names elif dataset_type == 'coco': class_names = COCODataset.class_names else: raise NotImplementedError('Not implemented now.') if torch.cuda.is_available(): device = torch.device(cfg.MODEL.DEVICE) else: device = torch.device("cpu") model = build_detection_model(cfg) model = model.to(device) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) image_paths = glob.glob(os.path.join(images_dir, '*.jpg')) mkdir(output_dir) cpu_device = torch.device("cpu") transforms = build_transforms(cfg, is_train=False) model.eval() dist_regr_model = DistanceRegrNet(2) dist_regr_model = load_model_weight(dist_regr_model, device) # load weights dist_regr_model.eval() X_scaler = load_standardizer(Standardizer()) person_label_idx = class_names.index('person') for i, image_path in enumerate(image_paths): start = time.time() image_name = os.path.basename(image_path) image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) load_time = time.time() - start start = time.time() result = model(images.to(device))[0] inference_time = time.time() - start result = result.resize((width, height)).to(cpu_device).numpy() boxes, labels, scores = result['boxes'], result['labels'], result[ 'scores'] # remove all non person class detections indices = np.logical_and(scores > score_threshold, labels == person_label_idx) boxes = boxes[indices] labels = labels[indices] scores = scores[indices] distances = None # create gaussian mixture models and kde plots only if centers detected if len(boxes) != 0: centers = np.apply_along_axis(get_mid_point, 1, boxes) image = draw_points(image, centers) # draw center points on image # reset center point ranges to a min of 0 and max of 100 _x = centers[:, 0] _y = centers[:, 1] centers[:, 0] = reset_range(max(_x), min(_x), 100, 0, _x) centers[:, 1] = reset_range(max(_y), min(_y), 100, 0, _y) # DBSCAN Clustering start = time.time() dbscan_center = DBSCAN(eps=18) dbscan_center.fit(centers) # print("dbscan clusters", dbscan_center._labels) # print("Unique number of clusters", len(set(dbscan_center._labels))) print( f"DBSCAN clustering time {round((time.time() - start) * 1000, 3)}ms" ) # Distance Regression start_time = time.time() # As boxes is in (xmin, ymin, xmax, ymax) format # X should always have width, height format width = boxes[:, 2] - boxes[:, 0] height = boxes[:, 3] - boxes[:, 1] X = np.column_stack((width, height)) X_scaled = X_scaler.transform(X) distances = dist_regr_model(torch.Tensor(X_scaled).to(device)) print( f"Distance Regr Inference time {round(time.time() - start_time, 4) * 1000}ms" ) if gen_heatmap: generate_sns_kde_heatmap(centers[:, 0], centers[:, 1], i, image_name) generate_sk_gaussian_mixture(centers, dbscan_center._labels, i, image_name, len(set(dbscan_center._labels)), covariance_type='diag') generate_cv2_heatmap(centers, dbscan_center._labels, i, image_name, len(set(dbscan_center._labels)), covariance_type='diag') meters = ' | '.join([ 'objects {:02d}'.format(len(boxes)), 'load {:03d}ms'.format(round(load_time * 1000)), 'inference {:03d}ms'.format(round(inference_time * 1000)), 'FPS {}'.format(round(1.0 / inference_time)) ]) print('({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters)) drawn_image = draw_boxes(image, boxes, labels, scores, distances, class_names).astype(np.uint8) Image.fromarray(drawn_image).save(os.path.join(output_dir, image_name))