class Detect(object): """ dir_name: Folder or image_file """ def __init__(self, weights, num_class=21): super(Detect, self).__init__() self.weights = weights self.device = torch.device( "cuda:0" if torch.cuda.is_available() else 'cpu') self.transform = transforms.Compose([Normalizer(), Resizer()]) self.model = EfficientDet(num_classes=num_class, is_training=False) self.model = self.model.to(self.device) if (self.weights is not None): print('Load pretrained Model') state_dict = torch.load(weights) self.model.load_state_dict(state_dict) self.model.eval() def process(self, file_name): img = cv2.imread(file_name) cv2.imwrite('kaka.png', img) img = self.transform(img) img = img.to(self.device) img = img.unsqueeze(0).permute(0, 3, 1, 2) scores, classification, transformed_anchors = self.model(img) print('scores: ', scores) scores = scores.detach().cpu().numpy() idxs = np.where(scores > 0.1) return idxs
class Detect(object): """ dir_name: Folder or image_file """ def __init__(self, weights, num_class=21): super(Detect, self).__init__() self.weights = weights self.device = torch.device( "cuda:0" if torch.cuda.is_available() else 'cpu') self.transform = get_augumentation(phase='test') self.show_transform = get_augumentation(phase='show') self.model = EfficientDet(num_classes=num_class, is_training=False) # self.model = torch.nn.DataParallel(self.model, device_ids=[0, 1]) self.model = self.model.cuda() if (self.weights is not None): print('Load pretrained Model') state = torch.load(self.weights, map_location=lambda storage, loc: storage) state_dict = state['state_dict'] num_class = state['num_class'] self.model.load_state_dict(state_dict) self.model.eval() def process(self, file_name): img = cv2.imread(file_name) show_aug = self.show_transform(image=img) show_image = show_aug['image'] augmentation = self.transform(image=img) img = augmentation['image'] img = img.to(self.device) img = img.unsqueeze(0) with torch.no_grad(): scores, classification, transformed_anchors = self.model(img) # print('scores: ', scores) idxs = np.where(scores.cpu().data.numpy() > 0.25) for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = VOC_CLASSES[int(classification[idxs[0][j]])] cv2.rectangle(show_image, (x1, y1), (x2, y2), (77, 255, 9), 3, 1) cv2.putText(show_image, label_name, (x1 - 10, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) cv2.imwrite('docs/output.png', show_image)
class Detect(object): """ dir_name: Folder or image_file """ def __init__(self, weights, num_class=21, network='efficientdet-d0', size_image=(512, 512)): global checkpoint super(Detect, self).__init__() self.weights = weights self.size_image = size_image self.device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu') self.transform = get_augumentation(phase='test') if self.weights is not None: print('Load pretrained Model') checkpoint = torch.load(self.weights, map_location=lambda storage, loc: storage) params = checkpoint['parser'] num_class = params.num_class network = params.network self.model = EfficientDet( num_classes=num_class, network=network, W_bifpn=EFFICIENTDET[network]['W_bifpn'], D_bifpn=EFFICIENTDET[network]['D_bifpn'], D_class=EFFICIENTDET[network]['D_class'], is_training=False ) if self.weights is not None: state_dict = checkpoint['state_dict'] self.model.load_state_dict(state_dict) if torch.cuda.is_available(): self.model = self.model.cuda() self.model.eval() def process(self, file_name=None, img=None, show=False): if file_name is not None: img = cv2.imread(file_name) origin_img = copy.deepcopy(img) augmentation = self.transform(image=img) img = augmentation['image'] img = img.to(self.device) img = img.unsqueeze(0) with torch.no_grad(): scores, classification, transformed_anchors = self.model(img) bboxes = list() labels = list() bbox_scores = list() for j in range(scores.shape[0]): bbox = transformed_anchors[[j], :][0].data.cpu().numpy() x1 = int(bbox[0] * origin_img.shape[1] / self.size_image[1]) y1 = int(bbox[1] * origin_img.shape[0] / self.size_image[0]) x2 = int(bbox[2] * origin_img.shape[1] / self.size_image[1]) y2 = int(bbox[3] * origin_img.shape[0] / self.size_image[0]) bboxes.append([x1, y1, x2, y2]) label_name = VOC_CLASSES[int(classification[[j]])] labels.append(label_name) if args.cam: cv2.rectangle(origin_img, (x1, y1), (x2, y2), (179, 255, 179), 2, 1) if args.score: score = np.around( scores[[j]].cpu().numpy(), decimals=2) * 100 if args.cam: labelSize, baseLine = cv2.getTextSize('{} {}'.format( label_name, int(score)), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2) cv2.rectangle( origin_img, (x1, y1 - labelSize[1]), (x1 + labelSize[0], y1 + baseLine), (223, 128, 255), cv2.FILLED) cv2.putText( origin_img, '{} {}'.format(label_name, int(score)), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2 ) bbox_scores.append(int(score)) else: if args.cam: labelSize, baseLine = cv2.getTextSize('{}'.format( label_name), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2) cv2.rectangle( origin_img, (x1, y1 - labelSize[1]), (x1 + labelSize[0], y1 + baseLine), (0, 102, 255), cv2.FILLED) cv2.putText( origin_img, '{} {}'.format(label_name, int(score)), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2 ) if show: fig, ax = vis_bbox(img=origin_img, bbox=bboxes, label=labels, score=bbox_scores) fig.savefig('./docs/demo.png') plt.show() else: return origin_img def camera(self): if args.video_name: cap = cv2.VideoCapture(args.video_name) else: cap = cv2.VideoCapture(0) if not cap.isOpened(): print("Unable to open camera") exit(-1) count_tfps = 1 accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() while True: res, img = cap.read() curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = curr_fps curr_fps = 0 if res: show_image = self.process(img=img) cv2.putText( show_image, "FPS: " + str(fps), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (204, 51, 51), 2 ) # cv2.imshow("Detection", show_image) # k = cv2.waitKey(1) # if k == 27: # break print(f'{fps = }') else: print("Unable to read image") exit(-1) count_tfps += 1 cap.release() cv2.destroyAllWindows()
class Detect(object): """ dir_name: Folder or image_file """ def __init__(self, weights, num_class=21, network='efficientdet-d1', size_image=(512, 512)): super(Detect, self).__init__() self.weights = weights self.size_image = size_image self.device = torch.device( "cuda:0" if torch.cuda.is_available() else 'cpu') self.transform = get_augumentation(phase='test', width=size_image[0], height=size_image[1]) if (self.weights is not None): print('Load pretrained Model') checkpoint = torch.load(self.weights, map_location=lambda storage, loc: storage) num_class = checkpoint['num_class'] network = checkpoint['network'] self.model = EfficientDet(num_classes=num_class, network=network, W_bifpn=EFFICIENTDET[network]['W_bifpn'], D_bifpn=EFFICIENTDET[network]['D_bifpn'], D_class=EFFICIENTDET[network]['D_class'], is_training=False, threshold=0.055) if (self.weights is not None): state_dict = checkpoint['state_dict'] self.model.load_state_dict(state_dict) self.model = self.model.cuda() self.model.eval() def process(self, file_name=None, img=None): if file_name is not None: img = cv2.imread(file_name) origin_img = copy.deepcopy(img) augmentation = self.transform(image=img) img = augmentation['image'] img = img.to(self.device) img = img.unsqueeze(0) with torch.no_grad(): scores, classification, transformed_anchors = self.model(img) bboxes = list() labels = list() bbox_scores = list() colors = list() for j in range(scores.shape[0]): bbox = transformed_anchors[[j], :][0].data.cpu().numpy() x1 = int(bbox[0] * origin_img.shape[1] / self.size_image[1]) y1 = int(bbox[1] * origin_img.shape[0] / self.size_image[0]) x2 = int(bbox[2] * origin_img.shape[1] / self.size_image[1]) y2 = int(bbox[3] * origin_img.shape[0] / self.size_image[0]) bboxes.append([x1, y1, x2, y2]) label_name = 'face' if int( classification[[j]]) == 0 else 'not recognized' labels.append(label_name) score = np.around(scores[[j]].cpu().numpy(), decimals=3) bbox_scores.append(float(score)) return bboxes, labels, bbox_scores
class Detect(object): """ dir_name: Folder or image_file """ def __init__(self, weights, num_class=21, network='efficientdet-d0'): super(Detect, self).__init__() self.weights = weights self.device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu') self.transform = get_augumentation(phase='test') self.show_transform = get_augumentation(phase='show') if(self.weights is not None): print('Load pretrained Model') checkpoint = torch.load(self.weights, map_location=lambda storage, loc: storage) num_class = checkpoint['num_class'] network = checkpoint['network'] self.model = EfficientDet( num_classes=num_class, network=network, is_training=False, threshold=args.threshold, iou_threshold=args.iou_threshold ) if(self.weights is not None): state_dict = checkpoint['state_dict'] self.model.load_state_dict(state_dict) self.model = self.model.cuda() self.model.eval() def process(self, file_name=None, img=None, show=False): if file_name is not None: img = cv2.imread(file_name) show_aug = self.show_transform(image = img) show_image = show_aug['image'] augmentation = self.transform(image = img) img = augmentation['image'] img = img.to(self.device) img = img.unsqueeze(0) with torch.no_grad(): scores, classification, transformed_anchors = self.model(img) # idxs = np.where(scores.cpu().data.numpy()>args.threshold) for j in range(scores.shape[0]): bbox = transformed_anchors[[j], :][0] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = VOC_CLASSES[int(classification[[j]])] cv2.rectangle(show_image, (x1, y1), (x2, y2), (77, 255, 9), 3, 1) if args.score: score = np.around( scores[[j]].cpu().numpy(), decimals=2) * 100 cv2.putText( show_image, '{} {}%'.format(label_name, int(score)), (x1-10, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2 ) else: cv2.putText( show_image, label_name, (x1-10, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2 ) if show: cv2.imshow("Detection", show_image) cv2.waitKey(0) cv2.imwrite('docs/output.png', show_image) else: return show_image def camera(self): cap = cv2.VideoCapture(0) if not cap.isOpened(): print("Unable to open camera") exit(-1) count_tfps = 1 accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() while True: res, img = cap.read() curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = curr_fps curr_fps = 0 if res: show_image = self.process(img=img) cv2.putText( show_image, "FPS: " + str(fps), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 250, 0), 2 ) cv2.imshow("Detection", show_image) k = cv2.waitKey(1) if k == 27: break else: print("Unable to read image") exit(-1) count_tfps += 1 cap.release() cv2.destroyAllWindows()
with torch.no_grad(): img_norm = (img / 255 - mean) / std nimg, nw, nh, ow, oh, _, _ = aspectaware_resize_padding(img, 512, 512) x = torch.from_numpy(nimg).permute(2, 0, 1).unsqueeze(0) if cuda: model = model.cuda() x = x.cuda() regression, classification, anchors = model(x) preds = postprocess(x, anchors, regression, classification, BBoxTransform(), ClipBoxes(), 0.5, 0.7) if not preds: return None preds = invert_affine([[nw, nh, ow, oh]], preds) print(len(preds[0]['rois'])) return preds params = yaml.safe_load(open('configs/coco.yml')) if __name__ == '__main__': model = EfficientDet(compound_coef=0, num_classes=len(params['obj_list']), ratios=eval(params['anchors_ratios']), scales=eval(params['anchors_scales'])) model.load_state_dict(torch.load('./weights/efficientdet-d0.pth')) model.eval() img = cv2.imread('demo.jpg') out = detector(img, model)
def main(): args = parse_args() cfg = get_default_cfg() if args.config_file: cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() dataset = COCODataset(cfg.data.test[0], cfg.data.test[1]) num_classes = dataset.num_classes label_map = dataset.labels model = EfficientDet(num_classes=num_classes, model_name=cfg.model.name) device = torch.device(cfg.device) model.to(device) model.eval() inp_size = model.config['inp_size'] transforms = build_transforms(False, inp_size=inp_size) output_dir = cfg.output_dir checkpointer = Checkpointer(model, None, None, output_dir, True) checkpointer.load(args.ckpt) images = [] if args.img: if osp.isdir(args.img): for filename in os.listdir(args.img): if is_valid_file(filename): images.append(osp.join(args.img, filename)) else: images = [args.img] for img_path in images: img = cv2.imread(img_path) img = inference(model, img, label_map, score_thr=args.score_thr, transforms=transforms) save_path = osp.join(args.save, osp.basename(img_path)) cv2.imwrite(save_path, img) if args.vid: vCap = cv2.VideoCapture(args.v) fps = int(vCap.get(cv2.CAP_PROP_FPS)) height = int(vCap.get(cv2.CAP_PROP_FRAME_HEIGHT)) width = int(vCap.get(cv2.CAP_PROP_FRAME_WIDTH)) size = (width, height) fourcc = cv2.VideoWriter_fourcc(*'mp4v') save_path = osp.join(args.save, osp.basename(args.v)) vWrt = cv2.VideoWriter(save_path, fourcc, fps, size) while True: flag, frame = vCap.read() if not flag: break frame = inference(model, frame, label_map, score_thr=args.score_thr, transforms=transforms) vWrt.write(frame) vCap.release() vWrt.release()