def read_images(): for filename in os.listdir(imgfile_path): ori_imgs, framed_imgs, framed_metas = preprocess(os.path.join( imgfile_path, filename), max_size=input_size) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=7, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict( torch.load(f'weights/efficientdet-d7/efficientdet-d7.pth') ) #place weight path here model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) display(filename, out, ori_imgs, imshow=False, imwrite=True) print('running speed test...') with torch.no_grad(): print('test1: model inferring and postprocessing') print('inferring image for 10 times...') t1 = time.time() for _ in range(10): _, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) t2 = time.time() tact_time = (t2 - t1) / 10 print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
def predict(self, img_path, threshold=0.5): self.system_dict["params"]["threshold"] = threshold ori_imgs, framed_imgs, framed_metas = preprocess( img_path, max_size=self.system_dict["local"]["input_size"]) if self.system_dict["params"]["use_cuda"]: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not self.system_dict["params"]["use_float16"] else torch.float16).permute(0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = self.system_dict[ "local"]["model"](x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, self.system_dict["params"]["threshold"], self.system_dict["params"]["iou_threshold"]) out = invert_affine(framed_metas, out) scores, labels, bboxes = self.display(out, ori_imgs, imshow=False, imwrite=True) return scores, labels, bboxes
def detect(image): # convert image to array frame = np.array(image) # convert to cv format frames = frame[:, :, ::-1] ori_imgs, framed_imgs, framed_metas = image_preprocess(frames, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) render_frame = display(out, frame, imshow=True, imwrite=False) return render_frame
def detect_image(self, image_path, use_cuda=False, use_float16=False, threshold=0.2, iou_threshold=0.2): # replace this part with your project's anchor config max_size = self.input_sizes[self.compound_coef] anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] ori_imgs, framed_imgs, framed_metas = preprocess(image_path, max_size=max_size) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) features, regression, classification, anchors = self.forward(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) self.__save_image(out, ori_imgs, imwrite=True)
def __call__(self, imgs): # frame preprocessing _, framed_imgs, framed_metas = preprocess(imgs, max_size=self.input_size) if self.use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) dtype = torch.float32 if not self.use_float16 else torch.float16 x = x.to(dtype).permute(0, 3, 1, 2) # model predict with torch.no_grad(): features, regression, classification, anchors = self.model(x) out = postprocess(x, anchors, regression, classification, self.regressBoxes, self.clipBoxes, self.score_thresh, self.nms_thresh) # result out = invert_affine(framed_metas, out) if len(out) == 0: return None, None, None rois = [o['rois'] for o in out] scores = [o['scores'] for o in out] class_ids = [o['class_ids'] for o in out] if self.is_xywh: return xyxy_to_xywh(rois), scores, class_ids else: return rois, scores, class_ids
def main(img_path, base_name, checkpoint_path): ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) # model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth')) model.load_state_dict(torch.load(checkpoint_path)) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) display(out, ori_imgs, base_name,imshow=False, imwrite=True)
def detect(img_path): #------------------preprocessing------------------------ ori_imgs, framed_imgs, framed_metas = preprocess( img_path, max_size=input_size) #input_size: 512 x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) with torch.no_grad(): start = timeutil.get_epochtime_ms() t1 = time.time() features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) c1, c2 = display(out, ori_imgs, imshow=True, imwrite=False) t2 = time.time() tact_time = (t2 - t1) / 10 print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1') print('milisecond is ' + str(t2 - t1)) print("Latency: %fms" % (timeutil.get_epochtime_ms() - start)) return c1, c2
def evaluate_coco(img_path, model, threshold=0.05): kag_res = ["image_id,PredictionString"] included_extensions = ['jpg', 'jpeg', 'bmp', 'png', 'gif'] imgs_files = [os.path.join(img_path, fn) for fn in os.listdir(img_path) if any(fn.endswith(ext) for ext in included_extensions)] regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() for img_path in tqdm(imgs_files): ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_sizes[compound_coef]) x = torch.from_numpy(framed_imgs[0]) if use_cuda: x = x.cuda(gpu) if use_float16: x = x.half() else: x = x.float() else: x = x.float() x = x.unsqueeze(0).permute(0, 3, 1, 2) features, regression, classification, anchors = model(x) preds = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, nms_threshold) if not preds: continue preds = invert_affine(framed_metas, preds)[0] scores = preds['scores'] rois = preds['rois'] if rois.shape[0] > 0: # x1,y1,x2,y2 -> x1,y1,w,h rois[:, 2] -= rois[:, 0] rois[:, 3] -= rois[:, 1] kag_res.append(f"{os.path.basename(img_path).replace('.jpg', '')},{format_prediction_string(rois, scores)}") if not len(kag_res): raise Exception('the model does not provide any valid output, check model architecture and the data input') # write output filepath = f'/kaggle/working/submission.csv' if os.path.exists(filepath): os.remove(filepath) with open(filepath, "w") as f: for line in kag_res: f.write(line) f.write("\n")
def single_img_test(img_path, input_size, model, use_cuda=True, use_float16=False): # tf bilinear interpolation is different from any other's, just make do threshold = 0.05 iou_threshold = 0.5 image_name = img_path.replace('\\', '/').split('/')[-1] ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) # display(out, ori_imgs, imshow=False, imwrite=True) # print('running speed test...') # with torch.no_grad(): # print('test1: model inferring and postprocessing') # print('inferring image for 10 times...') # t1 = time.time() # for _ in range(10): # _, regression, classification, anchors = model(x) # # out = postprocess(x, # anchors, regression, classification, # regressBoxes, clipBoxes, # threshold, iou_threshold) # out = invert_affine(framed_metas, out) # # t2 = time.time() # tact_time = (t2 - t1) / 10 # print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1') det_num = len(out[0]['class_ids']) det = [] for i in range(det_num): det.append([image_name, out[0]['class_ids'][i], out[0]['scores'][i], tuple(out[0]['rois'][i])]) return det
def evaluate_mAP(imgs, imgs_ids, framed_metas, regressions, \ classifications, anchors, threshold=0.05, nms_threshold=0.5): ''' Inputs: Images, Image IDs, Framed Metas (Resizing stats), prredictions Output: results ''' results = [] # This is used for storing evaluation results. regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() preds = postprocess(imgs, torch.stack([anchors[0]] * imgs.shape[0], 0).detach(), regressions.detach(), classifications.detach(), regressBoxes, clipBoxes, threshold, nms_threshold) if not preds: return preds = invert_affine(framed_metas, preds) for i, _ in enumerate(preds): scores = preds[i]['scores'] class_ids = preds[i]['class_ids'] rois = preds[i]['rois'] if rois.shape[0] > 0: # x1,y1,x2,y2 -> x1,y1,w,h rois[:, 2] -= rois[:, 0] rois[:, 3] -= rois[:, 1] bbox_score = scores for roi_id in range(rois.shape[0]): score = float(bbox_score[roi_id]) label = int(class_ids[roi_id]) box = rois[roi_id, :] if score < threshold: break image_result = { 'image_id': imgs_ids[i], 'category_id': label + 1, 'score': float(score), 'bbox': box.tolist(), } results.append(image_result) return results
def predict(self, raw_img): self.ori_imgs, self.framed_imgs, self.framed_metas = preprocess_raw(raw_img, max_size=self.input_size) if self.use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in self.framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in self.framed_imgs], 0) x = x.to(torch.float32 if not self.use_float16 else torch.float16).permute(0, 3, 1, 2) with torch.no_grad(): self.features, self.regression, self.classification, self.anchors = self.model(x) self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() out = postprocess(x, self.anchors, self.regression, self.classification, self.regressBoxes, self.clipBoxes, self.threshold, self.iou_threshold) pred = invert_affine(self.framed_metas, out) return pred
def detect(): with torch.no_grad(): t1 = time.time() features, regression, classification, anchors = model(x) # t1 = time.time() regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() # start = timeutil.get_epochtime_ms() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) c1, c2 = display(out, ori_imgs, imshow=True, imwrite=False) # t2 = time.time() # tact_time = (t2 - t1) / 10 # print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1') # print("Latency: %fms" % (timeutil.get_epochtime_ms() - start)) return c1, c2
def get_face_position(fn): _, fimg, meta = preprocess(fn, max_size=effdet_input_size) x = torch.from_numpy(fimg[0]).float().unsqueeze(0) x = x.permute(0, 3, 1, 2) if args.cuda: x = x.cuda() with torch.no_grad(): _, reg, clss, anchors = model(x) rbox = BBoxTransform() cbox = ClipBoxes() out = postprocess(x, anchors, reg, clss, rbox, cbox, \ effdet_thr, effdet_iou_thr) out = invert_affine(meta, out) lst_face_bbox = [] for i_detect in range(len(out[0]["rois"])): lst_face_bbox.append( [int(val) for val in out[0]["rois"][i_detect]] ) return lst_face_bbox
def predict_fn(data, model): """mostly copied from https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch/blob/master/efficientdet_test.py Args: data: tuple of inputs generated by custom input_fn above model: PyTorch model loaded in memory by model_fn Returns: a prediction """ ori_imgs, framed_imgs, framed_metas, threshold, iou_threshold = data x = torch.stack([ torch.from_numpy(fi).cuda() if USE_CUDA else torch.from_numpy(fi) for fi in framed_imgs ], 0) x = x.to(torch.float32 if not USE_FLOAT16 else torch.float16).permute( 0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = model(x) regress_boxes = BBoxTransform() clip_boxes = ClipBoxes() out = postprocess(x, anchors=anchors, regression=regression, classification=classification, regressBoxes=regress_boxes, clipBoxes=clip_boxes, threshold=threshold, iou_threshold=iou_threshold) out = invert_affine(framed_metas, out) return out
x = x.to(torch.float32 if not use_float16 else torch.float16 ).permute(0, 3, 1, 2) if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, config.threshold, config.iou_threshold) out = invert_affine(framed_metas, out) if opt.debug: display(out, ori_imgs, config, label=os.path.basename(img_id), imshow=True, imwrite=False) mark = [] # None for Unknown, True for Pass, False for Reject for roi, class_id, score in zip(out[0]['rois'], out[0]['class_ids'], out[0]['scores']): iou = compute_overlaps(np.asarray([roi]), np.asarray([red_box]))
def detect(model, dataset, args): use_cuda = not args.cpu threshold = args.threshold iou_threshold = args.iou_threshold input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] input_size = input_sizes[args.compound_coef] img_dir = os.path.join(dataset, dataset, 'images') bbox_dir = os.path.join(dataset, dataset, 'annotations', 'bboxes') vis_dir = os.path.join(dataset, 'det_vis') prepare_dirs(bbox_dir, vis_dir) img_paths = [os.path.join(img_dir, f) for f in os.listdir(img_dir)] for img_path in tqdm(img_paths): ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) ori_img = ori_imgs[0] img_id = os.path.basename(img_path).split('.')[0] json_byhand = os.path.join(dataset, 'annotation_byhand', img_id + '.json') if os.path.exists(json_byhand): with open(json_byhand) as f: annotation_byhand = json.load(f) points = annotation_byhand['shapes'][0]['points'] max_box = points[0] + points[1] else: if args.update: # only process annotations by hand continue if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(ft) for fi in framed_imgs], 0) x = x.to(torch.float32).permute(0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() preds = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) pred = invert_affine(framed_metas, preds)[0] max_area, max_box = 0, [0, 0, ori_img.shape[1], ori_img.shape[0]] for det, class_id in zip(pred['rois'], pred['class_ids']): if not class_id == 0: continue x1, y1, x2, y2 = det.astype(np.int) w, h = x2 - x1, y2 - y1 area = w * h if area > max_area: max_area = area max_box = [x1, y1, x2, y2] plot_one_box(ori_img, max_box, color=[255, 0, 255], line_thickness=2) if args.vis: cv2.imwrite(os.path.join(vis_dir, img_id + '.jpg'), ori_img) bbox_file = os.path.join(bbox_dir, img_id + '.txt') with open(bbox_file, 'w') as f: bbox_info = ' '.join(map(str, max_box)) f.write(bbox_info)
with torch.no_grad(): for indx in range((len(img_paths) + batch_eval - 1) // batch_eval): print(indx * batch_eval) ori_img_batch, framed_img_batch, metas_batch = preprocess( img_paths[indx * batch_eval:min((indx + 1) * batch_eval, len(img_paths))], max_size=input_size) img_names = img_paths[indx * batch_eval:min((indx + 1) * batch_eval, len(img_paths))] convert_coco(img_names) image_names = [] for img_name in img_names: image_names.append(img_name.replace('/', '_')[:-4]) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_img_batch], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_img_batch], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) features, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(metas_batch, out) display(image_names, out, ori_img_batch, imshow=False, imwrite=True)
def efficientDet_video_inference(video_src,compound_coef = 0,force_input_size=None, frame_skipping = 3, threshold=0.2,out_path=None,imshow=False, display_fps=False): #deep-sort variables # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 model_filename = '/home/shaheryar/Desktop/Projects/Football-Monitoring/deep_sort/model_weights/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric,n_init=5) # efficientDet-pytorch variables iou_threshold = 0.4 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size # load model model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list)) model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() # Video capture cap = cv2.VideoCapture(video_src) frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MPEG') fps = cap.get(cv2.CAP_PROP_FPS) print("Video fps",fps) if(out_path is not None): outp = cv2.VideoWriter(out_path, fourcc, fps, (frame_width, frame_height)) i=0 start= time.time() current_frame_fps=0 while True: ret, frame = cap.read() if not ret: break t1=time.time() if (frame_skipping==0 or i%frame_skipping==0): # if(True): # frame preprocessing (running detections) ori_imgs, framed_imgs, framed_metas, t1 = preprocess_video(frame, width=input_size, height=input_size) if use_cuda: x = torch.stack([fi.cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) # model predict t1=time.time() with torch.no_grad(): features, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) # Post processing out = invert_affine(framed_metas, out) # decoding bbox ,object name and scores boxes,classes,scores =decode_predictions(out[0]) org_boxes = boxes.copy() t2 = time.time() - t1 # feature extraction for deep sort boxes = [convert_bbox_to_deep_sort_format(frame.shape, b) for b in boxes] features = encoder(frame,boxes) detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxes, features)] boxes = np.array([d.tlwh for d in detections]) # print(boxes) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) i = i + 1 img_show=frame.copy() for j in range(len(org_boxes)): img_show =drawBoxes(img_show,org_boxes[j],(255,255,0),str(tracker.tracks[j].track_id)) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() x1=int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2=int(bbox[3]) roi= frame[y1:y2,x1:x2] cv2.rectangle(img_show, (x1, y1), (x2, y2), update_color_association(roi, track.track_id), 2) cv2.putText(img_show, str(track.track_id), (x1, y1), 0, 5e-3 * 100, (255, 255, 0), 1) if display_fps: current_frame_fps=1/t2 else: current_frame_fps=0 cv2.putText(img_show, 'FPS: {0:.2f}'.format(current_frame_fps), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) if (i % int(fps) == 0): print("Processed ", str(int(i / fps)), "seconds") print("Time taken",time.time()-start) # print(color_dict) if imshow: img_show=cv2.resize(img_show,(0,0),fx=0.75,fy=0.75) cv2.imshow('Frame',img_show) # Press Q on keyboard to exit if cv2.waitKey(1) & 0xFF == ord('q'): break if out_path is not None: outp.write(img_show) cap.release() outp.release()
def inference(): compound_coef = 0 force_input_size = None # set None to use default size img_path = 'test/original_img.jpg' # replace this part with your project's anchor config anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] threshold = 0.2 iou_threshold = 0.2 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] color_list = standard_to_bgr(STANDARD_COLORS) # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] input_size = input_sizes[2] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) model = EfficientDet_semanticBackbone(compound_coef=1, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load('model_weight/model_1_epoch_80.pth')) if use_cuda: model = model.cuda() with torch.no_grad(): features, regression, classification, anchors, sem_out = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) out = box(out, ori_imgs, color_list, obj_list, imshow=False, imwrite=False) outputs = sem_out.data.cpu().numpy() # (shape: (batch_size, num_classes, img_h, img_w)) pred_label_imgs = np.argmax(outputs, axis=1) # (shape: (batch_size, img_h, img_w)) pred_label_imgs = pred_label_imgs.astype(np.uint8) z = cv2.resize(pred_label_imgs[0], (ori_imgs[0].shape[1], ori_imgs[0].shape[0])) from semantic_utils.utils import label_img_to_color pred_label_img_color = label_img_to_color(z) overlayed_img = 0.35*out + 0.65*pred_label_img_color flag = cv2.imwrite('test/semantic_img_1.jpg', overlayed_img) return flag
def getImageDetections(imagePath, weights, nms_threshold, confidenceParam, coefficient): """ Runs the detections and returns all detection into a single structure. Parameters ---------- imagePath : str Path to all images. weights : str path to the weights. nms_threshold : float non-maximum supression threshold. confidenceParam : float confidence score for the detections (everything above this threshold is considered a valid detection). coefficient : int coefficient of the current efficientdet model (from d1 to d7). Returns ------- detectionsList : List return a list with all predicted bounding-boxes. """ compound_coef = coefficient force_input_size = None # set None to use default size img_path = imagePath threshold = confidenceParam iou_threshold = nms_threshold use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = ['class_name'] # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), # replace this part with your project's anchor config ratios=[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)], scales=[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]) model.load_state_dict(torch.load(rootDir+'logs/' + project + '/' + weights)) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) for i in range(len(ori_imgs)): if len(out[i]['rois']) == 0: continue detectionsList = [] for j in range(len(out[i]['rois'])): (x1, y1, x2, y2) = out[i]['rois'][j].astype(np.int) detectionsList.append((float(out[i]['scores'][j]), x1, y1, x2, y2)) return detectionsList
def evaluate_voc(gt_dict, img_paths, model, max_size, config): results = [] regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() for idx, image_path in enumerate(tqdm(img_paths)): ori_imgs, framed_imgs, framed_metas = preprocess(image_path, max_size=max_size) x = torch.from_numpy(framed_imgs[0]) if config.eval_use_cuda: x = x.cuda(config.eval_gpu) if config.eval_use_float16: x = x.half() else: x = x.float() else: x = x.float() x = x.unsqueeze(0).permute(0, 3, 1, 2) features, regression, classification, anchors = model(x) preds = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, config.eval_threshold, config.eval_nms_threshold, anchor_free_mode=config.anchor_free_mode) if not preds: continue preds = invert_affine(framed_metas, preds)[0] scores = preds['scores'] class_ids = preds['class_ids'] rois = preds['rois'] if rois.shape[0] > 0: # # x1,y1,x2,y2 -> x1,y1,w,h # rois[:, 2] -= rois[:, 0] # rois[:, 3] -= rois[:, 1] bbox_score = scores for roi_id in range(rois.shape[0]): score = float(bbox_score[roi_id]) label = int(class_ids[roi_id]) box = rois[roi_id, :] image_result = [idx, box[0], box[1], box[2], box[3], score, label] results.append(image_result) if not len(results): raise Exception('the model does not provide any valid output, check model architecture and the data input') voc_certs = [] for idx in range(len(config.obj_list)): npos, nd, rec, prec, ap = voc_eval(gt_dict, results, idx, iou_thres=0.5, use_07_metric=False) voc_certs.append([prec, rec, ap]) return voc_certs
def excuteModel(videoname): # Video's path # set int to use webcam, set str to read from a video file if videoname is not None: video_src = os.path.join(r'D:\GitHub\Detection\server\uploads', f"{videoname}.mp4") else: video_src = 'D:\\GitHub\\Detection\\server\AImodel\\videotest\\default.mp4' compound_coef = 2 trained_weights = 'D:\\GitHub\\Detection\\server\\AImodel\\weights\\efficientdet-video.pth' force_input_size = None # set None to use default size threshold = 0.2 iou_threshold = 0.2 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size # load model model = EfficientDetBackbone( compound_coef=compound_coef, num_classes=len(obj_list)) model.load_state_dict(torch.load(trained_weights)) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() # function for display # Box regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() # Video capture cap = cv2.VideoCapture(video_src) length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) writer = None # try to determine the total number of frames in the video file try: prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() \ else cv2.CAP_PROP_FRAME_COUNT total = int(vs.get(prop)) print("[INFO] {} total frames in video".format(total)) # an error occurred while trying to determine the total # number of frames in the video file except: print("[INFO] could not determine # of frames in video") total = -1 path_out = os.path.join(os.path.dirname( os.path.abspath(__file__)), 'outvideo') path_result = r"D:\GitHub\Detection\server\AImodel\videotest\default.mp4" path_asset = r"D:\GitHub\Detection\client\src\assets" for i in range(0, length): ret, frame = cap.read() if not ret: break # frame preprocessing ori_imgs, framed_imgs, framed_metas = preprocess_video( frame, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) # model predict with torch.no_grad(): features, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) # result out = invert_affine(framed_metas, out) img_show = display(out, ori_imgs, obj_list) if writer is None: # initialize our video writer fourcc = 0x00000021 #fourcc = cv2.VideoWriter_fourcc(*'mp4v') if videoname is not None: path_result = os.path.join(path_out, f"{videoname}.mp4") else: path_result = os.path.join(path_out, "default.mp4") writer = cv2.VideoWriter(path_result, fourcc, 30, (img_show.shape[1], img_show.shape[0]), True) # write the output frame to disk writer.write(img_show) print("Processing data... " + str(round((i+1)/length, 3)*100) + " %") # show frame by frame #cv2.imshow('frame', img_show) if cv2.waitKey(1) & 0xFF == ord('q'): break print("[INFO] cleaning up...") writer.release() cap.release() cv2.destroyAllWindows() if videoname is not None: path_asset = os.path.join(path_asset, f"{videoname}.mp4") else: path_asset = os.path.join(path_asset, "default.mp4") copyfile(path_result, path_asset) return path_asset
def main(i): compound_coef = i force_input_size = None # set None to use default size # replace this part with your project's anchor config anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] threshold = 0.2 iou_threshold = 0.2 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] out_dict = dict() input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth', map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() base_dir = '/data/jiashenc/jackson/' print('Processing Det-' + str(i)) for k in range(1000000, 1100000): if k % 1000 == 0: print(' Finish {} frames'.format(k + 1)) img_path = os.path.join(base_dir, 'frame{}.jpg'.format(k)) ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) to_json(out, out_dict) with open(os.path.join(base_dir, '10', 'res-{:d}.json'.format(i)), 'w') as f: json.dump(out_dict, f) out_dict = dict()
def main(compound_coef=0, model_dir=MODEL_DIR, nms_threshold=0.5, use_cuda=False, use_float16=False, image_batch_size=2): threshold = 0.05 cudnn.fastest = True cudnn.benchmark = True # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] input_size = input_sizes[compound_coef] model = model_fn(model_dir=model_dir, compound_coef=compound_coef, use_cuda=use_cuda, use_float16=use_float16) image_paths = glob.glob(os.path.join(DATA, '*.jpg')) L = len(image_paths) print(f'processing {L} in batches of {image_batch_size}') results = {} loop_start = datetime.datetime.now() for image_batch in image_path_batches(image_paths, image_batch_size): batch_start = datetime.datetime.now() ori_images, framed_images, framed_metas = preprocess(*image_batch, max_size=input_size) # build tensor from framed images x = torch.stack([(torch.from_numpy(fi).cuda() if use_cuda else torch.from_numpy(fi)) for fi in framed_images], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, nms_threshold) out = invert_affine(framed_metas, out) batch_end = datetime.datetime.now() batch_time = (batch_end - batch_start).total_seconds() print(f"batch_time = {batch_time} (s)") print(f"batch_size = {image_batch_size}") print(f"FPS = {image_batch_size / batch_time:0.4f}") print(f"SPF = {batch_time / image_batch_size:0.4f}") results.update(dict(zip(image_batch, out))) loop_end = datetime.datetime.now() loop_time = (loop_end - loop_start).total_seconds() print('\nfinal summary:') print(f"total processing time: {loop_time} (s)") print(f"number of frames processed: {len(image_paths)}") print(f"batch_size = {image_batch_size}") print(f"FPS: {L / loop_time:0.4f}") print(f"SPF: {loop_time / L:0.4f}") with open(f'results.{compound_coef}.pkl', 'wb') as fp: pickle.dump(results, fp)
def img_detect(file, img_dir, model, input_size, regressBoxes, clipBoxes, prior_mask, threshold): fname, ext = os.path.splitext(file) image_id = int(fname.split("_")[-1]) img_path = os.path.join(img_dir, file) ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) if args.flip_test: ids = torch.arange(x.shape[-1] - 1, -1, -1).long().cuda() x_flip = x[..., ids] x_cat = torch.cat([x, x_flip], 0) with torch.no_grad(): if args.flip_test: features, union_act_cls, union_sub_reg, union_obj_reg, \ inst_act_cls, inst_obj_cls, inst_bbox_reg, anchors = model(x_cat) anchors = torch.cat([anchors, anchors], 0) preds_union = postprocess_dense_union_flip( x_cat, anchors, union_act_cls, union_sub_reg, union_obj_reg, regressBoxes, clipBoxes, 0.5, 1) preds_inst = postprocess_hoi_flip(x_cat, anchors, inst_bbox_reg, inst_obj_cls, inst_act_cls, regressBoxes, clipBoxes, threshold, nms_threshold, mode="object", classwise=True) else: features, union_act_cls, union_sub_reg, union_obj_reg, \ inst_act_cls, inst_obj_cls, inst_bbox_reg, anchors = model(x) preds_union = postprocess_dense_union(x, anchors, union_act_cls, union_sub_reg, union_obj_reg, regressBoxes, clipBoxes, 0.5, 1, classwise=True) preds_inst = postprocess_hoi(x, anchors, inst_bbox_reg, inst_obj_cls, inst_act_cls, regressBoxes, clipBoxes, threshold, nms_threshold, mode="object", classwise=True) preds_inst = invert_affine(framed_metas, preds_inst)[0] preds_union = invert_affine(framed_metas, preds_union)[0] dets = hoi_match(image_id, preds_inst, preds_union, prior_mask) return dets
def infer(self, image): img = np.array(image) img = img[:, :, ::-1] #rgb 2 bgr anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] threshold = 0.25 iou_threshold = 0.25 force_input_size = None use_cuda = False use_float16 = False cudnn.fastest = False cudnn.benchmark = False input_size = 512 ori_imgs, framed_imgs, framed_metas = preprocess(img, max_size=input_size) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=0, num_classes=len(self.labels), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(self.path, map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) pred = invert_affine(framed_metas, out) results = [] for i in range(len(ori_imgs)): if len(pred[i]['rois']) == 0: continue ori_imgs[i] = ori_imgs[i].copy() for j in range(len(pred[i]['rois'])): xt1, yt1, xbr, ybr = pred[i]['rois'][j].astype(np.float64) xt1 = float(xt1) yt1 = float(yt1) xbr = float(xbr) yb4 = float(ybr) obj = str(pred[i]['class_ids'][j]) obj_label = self.labels.get(obj) obj_score = str(pred[i]['scores'][j]) results.append({ "confidence": str(obj_score), "label": obj_label, "points": [xt1, yt1, xbr, ybr], "type": "rectangle", }) return results
def evaluate_coco_show_res_jss(img_path, set_name, image_ids, coco, model, threshold=0.05): results = [] regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() count = 0 for image_id in tqdm(image_ids): count = count + 1 if count > 21: break image_info = coco.loadImgs(image_id)[0] image_path = img_path + image_info['file_name'] print('image path:', image_path) ori_imgs, framed_imgs, framed_metas = preprocess( image_path, max_size=input_sizes[compound_coef]) x = torch.from_numpy(framed_imgs[0]) if use_cuda: x = x.cuda(gpu) if use_float16: x = x.half() else: x = x.float() else: x = x.float() x = x.unsqueeze(0).permute(0, 3, 1, 2) features, regression, classification, anchors = model(x) preds = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, nms_threshold) if not preds: continue preds = invert_affine(framed_metas, preds)[0] scores = preds['scores'] class_ids = preds['class_ids'] rois = preds['rois'] if rois.shape[0] > 0: # x1,y1,x2,y2 -> x1,y1,w,h rois[:, 2] -= rois[:, 0] rois[:, 3] -= rois[:, 1] bbox_score = scores for roi_id in range(rois.shape[0]): score = float(bbox_score[roi_id]) label = int(class_ids[roi_id]) box = rois[roi_id, :] image_result = { 'image_id': image_id, 'category_id': label + 1, 'score': float(score), 'bbox': box.tolist(), } score = float(score) category_id = label + 1 box = box.tolist() # print('box:',box) xmin, ymin, w, h, score = int(box[0]), int(box[1]), int( box[2]), int(box[3]), score if score > 0.2: cv2.rectangle(ori_imgs[0], (xmin, ymin), (xmin + w, ymin + h), (0, 255, 0), 6) cv2.putText(ori_imgs[0], '{}:{:.2f}'.format(category_id, score), (xmin, ymin), cv2.FONT_HERSHEY_SIMPLEX, 4.0, (0, 255, 0), 6) results.append(image_result) cv2.imwrite( './test_result/zhongchui_d3_epoch200_1124/' + 'tmp' + '{}'.format(count) + '.jpeg', ori_imgs[0]) if not len(results): raise Exception( 'the model does not provide any valid output, check model architecture and the data input' ) # write output # filepath = f'{set_name}_bbox_results.json' filepath = det_save_json if os.path.exists(filepath): os.remove(filepath) json.dump(results, open(filepath, 'w'), indent=4)
def predict(images: List[Union[str, os.PathLike]], model: EfficientDetBackbone, compound_coef: float, resize: Optional[Union[int, Tuple[int, int]]] = None, confidence: Optional[float] = 0.5, nms_threshold: Optional[float] = 0.5, output_path: Union[str, os.PathLike] = "../", ) -> None: """Generate Predictions on test images in a folder. Args: images (List[Union[str, os.PathLike]]): List of test image path to run predictions. model (EfficientDetBackbone): EfficientDet model. compound_coef (float): Compund scaling coefficient. resize (Optional[Union[int, Tuple[int, int]]], optional): Resize of test images. Defaults to None. confidence (Optional[float], optional): confidence score to filter detections. Defaults to 0.5. nms_threshold (Optional[float], optional): IOU threshold to filter duplicate detections. Defaults to 0.5. output_path (Union[str, os.PathLike], optional): Output path/file where final output needs to be stored. Defaults to "../". Raises: IOError: Raises when output_path do not exist. """ #Initializaing results results = {} regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() #Iterating over all images for image_path in tqdm(images): #Initalize and Get image name. img_result = [] img_name = image_path.split('/')[-1] #Preprocess image ori_imgs, framed_imgs, framed_metas = preprocess(image_path, max_size=INPUT_SIZES[compound_coef]) x = torch.from_numpy(framed_imgs[0]) #Convert to CUDA or CPU. if USE_CUDA: x = x.cuda() x = x.float() else: x = x.float() #Batching x = x.unsqueeze(0).permute(0, 3, 1, 2) #Run model features, regression, classification, anchors = model(x) #Applying threshold and NMS on predictions preds = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, confidence, nms_threshold) #Continue if there are no predictions for this image. if not preds: results[img_name] = img_result continue #Convert predictions. preds = invert_affine(framed_metas, preds)[0] scores = preds['scores'] class_ids = preds['class_ids'] rois = preds['rois'] #Convert bbox and others to required format. if rois.shape[0] > 0: # x1,y1,x2,y2 -> x1,y1,w,h #rois[:, 2] -= rois[:, 0] #rois[:, 3] -= rois[:, 1] bbox_score = scores for roi_id in range(rois.shape[0]): score = float(bbox_score[roi_id]) label = int(class_ids[roi_id]) box = rois[roi_id, :] img_result.append({ 'class_index': label, 'bbox': box.tolist(), 'confidence': float(score) }) results[img_name] = img_result if not len(results): print('The model does not provide any valid output, check model architecture and the data input') # Write output if output_path.endswith(".json"): if os.path.exists(os.path.dirname(output_path)): output_file = output_path else: os.makedirs(os.path.dirname(output_path), exist_ok=True) output_file = output_path elif os.path.isdir(output_path): output_file = os.path.join( output_path, "yolov5_predictions_" + str(time.time()).split(".")[0] + ".json" ) else: raise IOError( f"{Fore.RED} no such directory {os.path.dirname(output_path)} {Style.RESET_ALL}" ) with open(output_file, "w") as f: json.dump(results, f, indent=2) print(f"Detections are written to {output_file}.")
def evaluate_coco(img_path, set_name, image_ids, coco, model, threshold=0.05): results = [] regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() for image_id in tqdm(image_ids): image_info = coco.loadImgs(image_id)[0] image_path = img_path + '/' + image_info['file_name'] ori_imgs, framed_imgs, framed_metas = preprocess( image_path, max_size=input_sizes[compound_coef]) x = torch.from_numpy(framed_imgs[0]) if use_cuda: x = x.cuda(gpu) if use_float16: x = x.half() else: x = x.float() else: x = x.float() x = x.unsqueeze(0).permute(0, 3, 1, 2) features, regression, classification, anchors = model(x) preds = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, nms_threshold) if not preds: continue preds = invert_affine(framed_metas, preds)[0] scores = preds['scores'] class_ids = preds['class_ids'] rois = preds['rois'] if rois.shape[0] > 0: # x1,y1,x2,y2 -> x1,y1,w,h rois[:, 2] -= rois[:, 0] rois[:, 3] -= rois[:, 1] bbox_score = scores for roi_id in range(rois.shape[0]): score = float(bbox_score[roi_id]) label = int(class_ids[roi_id]) box = rois[roi_id, :] image_result = { 'image_id': image_id, 'category_id': label + 1, 'score': float(score), 'bbox': box.tolist(), } results.append(image_result) if not len(results): raise Exception( 'the model does not provide any valid output, check model architecture and the data input' ) # write output filepath = f'{set_name}_bbox_results.json' if os.path.exists(filepath): os.remove(filepath) json.dump(results, open(filepath, 'w'), indent=4)
with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) preds = invert_affine(framed_metas, out) if(savefig): imgs = ori_imgs; i = 0; for j in range(len(preds[i]['rois'])): x1, y1, x2, y2 = preds[i]['rois'][j].astype(np.int) obj = obj_list[preds[i]['class_ids'][j]] score = float(preds[i]['scores'][j]) if(score >= threshold and obj in classes): plot_one_box(imgs[i], [x1, y1, x2, y2], label=obj,score=score,color=color_list[get_index_label(obj, obj_list)]) cv2.imwrite(output_name, imgs[i])