def filter_prediction(mc, boxes, probs, cls_idx): if mc.TOP_N_DETECTION < len(probs) and mc.TOP_N_DETECTION > 0: order = probs.argsort()[:-mc.TOP_N_DETECTION - 1:-1] probs = probs[order] boxes = boxes[order] cls_idx = cls_idx[order] else: filtered_idx = np.nonzero(probs > mc.PROB_THRESH)[0] probs = probs[filtered_idx] boxes = boxes[filtered_idx] cls_idx = cls_idx[filtered_idx] final_boxes = [] final_probs = [] final_cls_idx = [] for c in range(mc.CLASSES): idx_per_class = [i for i in range(len(probs)) if cls_idx[i] == c] keep = util.nms(boxes[idx_per_class], probs[idx_per_class], mc.NMS_THRESH) for i in range(len(keep)): if keep[i]: final_boxes.append(boxes[idx_per_class[i]]) final_probs.append(probs[idx_per_class[i]]) final_cls_idx.append(c) return final_boxes, final_probs, final_cls_idx
def filter_prediction(self, boxes, probs, cls_idx, backgroud_id=-1): """Filter bounding box predictions with probability threshold and non-maximum supression. Args: boxes: array of [cx, cy, w, h]. probs: array of probabilities cls_idx: array of class indices Returns: final_boxes: array of filtered bounding boxes. final_probs: array of filtered probabilities final_cls_idx: array of filtered class indices """ mc = self.mc ''' if backgroud_id >= 0: print ('remove backgroud') order_forcegroud = np.where(cls_idx != backgroud_id) probs = probs[order_forcegroud] boxes = boxes[order_forcegroud] cls_idx = cls_idx[order_forcegroud] ''' if mc.TOP_N_DETECTION < len(probs) and mc.TOP_N_DETECTION > 0: #print ('[filter_prediction]============1') order = probs.argsort()[:-mc.TOP_N_DETECTION - 1:-1] probs = probs[order] boxes = boxes[order] cls_idx = cls_idx[order] else: filtered_idx = np.nonzero(probs > mc.PROB_THRESH)[0] probs = probs[filtered_idx] boxes = boxes[filtered_idx] cls_idx = cls_idx[filtered_idx] final_boxes = [] final_probs = [] final_cls_idx = [] #print ('probs:',probs) #print ('3===========cls_idx.shape:',cls_idx.shape) for c in range(mc.CLASSES): if backgroud_id >= 0: if c == backgroud_id: continue idx_per_class = [i for i in range(len(probs)) if cls_idx[i] == c] keep = util.nms(boxes[idx_per_class], probs[idx_per_class], mc.NMS_THRESH) #print ("c",c," keep:",keep) for i in range(len(keep)): if keep[i]: final_boxes.append(boxes[idx_per_class[i]]) final_probs.append(probs[idx_per_class[i]]) final_cls_idx.append(c) return final_boxes, final_probs, final_cls_idx
def detect_onet(onet, image, bboxes, device): # start = time.time() size = 48 thresholds = 0.98 # face detection thresholds nms_thresholds = 0.7 height, width, channel = image.shape num_boxes = len(bboxes) [dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(bboxes, width, height) img_boxes = np.zeros((num_boxes, 3, size, size)) for i in range(num_boxes): img_box = np.zeros((h[i], w[i], 3)) img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] = \ image[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :] # resize img_box = cv2.resize(img_box, (size, size), interpolation=cv2.INTER_LINEAR) img_boxes[i, :, :, :] = preprocess(img_box) img_boxes = torch.FloatTensor(img_boxes).to(device) landmark, offset, prob = onet(img_boxes) landmarks = landmark.cpu().data.numpy() # shape [n_boxes, 10] offsets = offset.cpu().data.numpy() # shape [n_boxes, 4] probs = prob.cpu().data.numpy() # shape [n_boxes, 2] keep = np.where(probs[:, 1] > thresholds)[0] bboxes = bboxes[keep] bboxes[:, 4] = probs[keep, 1].reshape((-1, )) # assign score from stage 2 offsets = offsets[keep] landmarks = landmarks[keep] # compute landmark points width = bboxes[:, 2] - bboxes[:, 0] + 1.0 height = bboxes[:, 3] - bboxes[:, 1] + 1.0 xmin, ymin = bboxes[:, 0], bboxes[:, 1] landmarks[:, 0:5] = np.expand_dims( xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5] landmarks[:, 5:10] = np.expand_dims( ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10] bboxes = calibrate_box(bboxes, offsets) keep = nms(bboxes, nms_thresholds, mode='min') bboxes = bboxes[keep] landmarks = landmarks[keep] # print("onet predicted in {:2.3f} seconds".format(time.time() - start)) return bboxes, landmarks
def run_det(self): for batch in tqdm(self.loader): out = self.model(batch['input'].to(device='cuda')) file_name = self.loadImgs( ids=[batch['img_id'].numpy()[0]])[0]['file_name'] bboxes = self.model.convert_pred(out, scores_thresh=0.5) bboxes = nms(bboxes, 0.5, 0.5) gt_boxes = batch['gt_boxes'].numpy()[0] self.write(gt_boxes, file_name, det_dir='cal_map/gt') self.write(bboxes, file_name, det_dir='cal_map/det')
def predict(img, model, nms_iou=0.45, conf_thresh=0.45): num_classes = cfg.YOLO.NUM_CLASSES input_size = cfg.YOLO.INPUT_SIZE frame_size = img.shape[:2] img_data = util.image_preporcess(img.copy(), [input_size, input_size]) img_data = img_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() pred_bbox = model.predict_on_batch(img_data)[1:6:2] curr_time = time.time() exec_time = curr_time - prev_time pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = util.postprocess_boxes(pred_bbox, frame_size, input_size, conf_thresh) bboxes = util.nms(bboxes, nms_iou, method='nms') return bboxes, exec_time
def filter_prediction(self, boxes, probs, cls_idx): """Filter bounding box predictions with probability threshold and non-maximum supression. Args: boxes: array of [cx, cy, w, h]. probs: array of probabilities cls_idx: array of class indices Returns: final_boxes: array of filtered bounding boxes. final_probs: array of filtered probabilities final_cls_idx: array of filtered class indices """ mc = self.mc """add filter prob""" # print(len(probs)) filtered_idx = np.nonzero(probs > mc.NMS_THRESH)[0] probs = probs[filtered_idx] boxes = boxes[filtered_idx] cls_idx = cls_idx[filtered_idx] # print(probs) if mc.TOP_N_DETECTION < len(probs) and mc.TOP_N_DETECTION > 0: order = probs.argsort()[:-mc.TOP_N_DETECTION-1:-1] probs = probs[order] boxes = boxes[order] cls_idx = cls_idx[order] # else: # filtered_idx = np.nonzero(probs>mc.PROB_THRESH)[0] # probs = probs[filtered_idx] # boxes = boxes[filtered_idx] # cls_idx = cls_idx[filtered_idx] final_boxes = [] final_probs = [] final_cls_idx = [] for c in range(mc.CLASSES): idx_per_class = [i for i in range(len(probs)) if cls_idx[i] == c] keep = util.nms(boxes[idx_per_class], probs[idx_per_class], mc.NMS_THRESH) for i in range(len(keep)): if keep[i]: final_boxes.append(boxes[idx_per_class[i]]) final_probs.append(probs[idx_per_class[i]]) final_cls_idx.append(c) return final_boxes, final_probs, final_cls_idx
def detect_rnet(rnet, image, bboxes, device): # start = time.time() size = 24 thresholds = 0.8 # face detection thresholds nms_thresholds = 0.7 height, width, channel = image.shape num_boxes = len(bboxes) [dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(bboxes, width, height) img_boxes = np.zeros((num_boxes, 3, size, size)) for i in range(num_boxes): img_box = np.zeros((h[i], w[i], 3)) img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] = \ image[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :] # resize img_box = cv2.resize(img_box, (size, size), interpolation=cv2.INTER_LINEAR) img_boxes[i, :, :, :] = preprocess(img_box) img_boxes = torch.FloatTensor(img_boxes).to(device) offset, prob = rnet(img_boxes) offsets = offset.cpu().data.numpy() # shape [n_boxes, 4] probs = prob.cpu().data.numpy() # shape [n_boxes, 2] keep = np.where(probs[:, 1] > thresholds)[0] bboxes = bboxes[keep] bboxes[:, 4] = probs[keep, 1].reshape((-1, )) # assign score from stage 2 offsets = offsets[keep] # keep = nms(bboxes, nms_thresholds) bboxes = bboxes[keep] bboxes = calibrate_box(bboxes, offsets[keep]) bboxes = convert_to_square(bboxes) bboxes[:, 0:4] = np.round(bboxes[:, 0:4]) # print("rnet predicted in {:2.3f} seconds".format(time.time() - start)) return bboxes
def detect_pnet(pnet, image, min_face_size, device): # start = time.time() thresholds = 0.7 # face detection thresholds nms_thresholds = 0.7 # BUILD AN IMAGE PYRAMID height, width, channel = image.shape min_length = min(height, width) min_detection_size = 12 factor = 0.707 # sqrt(0.5) # scales for scaling the image scales = [] # scales the image so that minimum size that we can detect equals to minimum face size that we want to detect m = min_detection_size / min_face_size min_length *= m factor_count = 0 while min_length > min_detection_size: scales.append(m * factor**factor_count) min_length *= factor factor_count += 1 # it will be returned bounding_boxes = [] with torch.no_grad(): # run P-Net on different scales for scale in scales: sw, sh = math.ceil(width * scale), math.ceil(height * scale) img = cv2.resize(image, (sw, sh), interpolation=cv2.INTER_LINEAR) img = torch.FloatTensor(preprocess(img)).to(device) offset, prob = pnet(img) probs = prob.cpu().data.numpy()[ 0, 1, :, :] # probs: probability of a face at each sliding window offsets = offset.cpu().data.numpy( ) # offsets: transformations to true bounding boxes # applying P-Net is equivalent, in some sense, to moving 12x12 window with stride 2 stride, cell_size = 2, 12 # indices of boxes where there is probably a face # returns a tuple with an array of row idx's, and an array of col idx's: inds = np.where(probs > thresholds) if inds[0].size == 0: boxes = None else: # transformations of bounding boxes tx1, ty1, tx2, ty2 = [ offsets[0, i, inds[0], inds[1]] for i in range(4) ] offsets = np.array([tx1, ty1, tx2, ty2]) score = probs[inds[0], inds[1]] # P-Net is applied to scaled images # so we need to rescale bounding boxes back bounding_box = np.vstack([ np.round((stride * inds[1] + 1.0) / scale), np.round((stride * inds[0] + 1.0) / scale), np.round((stride * inds[1] + 1.0 + cell_size) / scale), np.round((stride * inds[0] + 1.0 + cell_size) / scale), score, offsets ]) boxes = bounding_box.T keep = nms(boxes[:, 0:5], overlap_threshold=0.5) boxes[keep] bounding_boxes.append(boxes) # collect boxes (and offsets, and scores) from different scales bounding_boxes = [i for i in bounding_boxes if i is not None] bounding_boxes = np.vstack(bounding_boxes) keep = nms(bounding_boxes[:, 0:5], nms_thresholds) bounding_boxes = bounding_boxes[keep] # use offsets predicted by pnet to transform bounding boxes bboxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:]) # shape [n_boxes, 5], x1, y1, x2, y2, score bboxes = convert_to_square(bboxes) bboxes[:, 0:4] = np.round(bboxes[:, 0:4]) # print("pnet predicted in {:2.3f} seconds".format(time.time() - start)) return bboxes
def filter_prediction(self, boxes, points, probs, poses, ages, cls_idx): """Filter bounding box predictions with probability threshold and non-maximum supression. Args: boxes: array of [cx, cy, w, h]. probs: array of probabilities cls_idx: array of class indices Returns: final_boxes: array of filtered bounding boxes. final_probs: array of filtered probabilities final_cls_idx: array of filtered class indices """ mc = self.mc if mc.TOP_N_DETECTION < len(probs) and mc.TOP_N_DETECTION > 0: order = probs.argsort()[:-mc.TOP_N_DETECTION - 1:-1] # print(np.array(probs).shape) # print(np.array(boxes).shape) # print(np.array(cls_idx).shape) # print(np.array(points).shape) probs = probs[order] boxes = boxes[order] points = points[order] cls_idx = cls_idx[order] poses = poses[order] ages = ages[order] else: filtered_idx = np.nonzero(probs > mc.PROB_THRESH)[0] probs = probs[filtered_idx] boxes = boxes[filtered_idx] points = points[filtered_idx] cls_idx = cls_idx[filtered_idx] poses = poses[filtered_idx] ages = ages[filtered_idx] final_boxes = [] final_points = [] final_probs = [] final_cls_idx = [] final_poses = [] final_ages = [] for c in range(mc.CLASSES): idx_per_class = [i for i in range(len(probs)) if cls_idx[i] == c] keep = util.nms(boxes[idx_per_class], probs[idx_per_class], mc.NMS_THRESH) #boxcls = np.c_[boxes[idx_per_class], probs[idx_per_class]] #keep = soft_nms(boxcls) for i in range(len(keep)): #or i in keep: if keep[i]: #if probs[idx_per_class[i]] > 0.8: final_points.append(points[idx_per_class[i]]) final_boxes.append(boxes[idx_per_class[i]]) final_probs.append(probs[idx_per_class[i]]) final_poses.append(poses[idx_per_class[i]]) final_ages.append(ages[idx_per_class[i]]) final_cls_idx.append(c) #print(final_probs) return final_boxes, final_points, final_probs, final_poses, final_ages, final_cls_idx
# 计算类别 pred_cates = torch.argmax(pred_probs, dim=1) # 计算分类概率 pred_confidences_idxs = torch.argmax(pred_confidences, dim=1) pred_cate_probs = pred_probs[range(S * S), pred_cates] \ * pred_confidences[range(S * S), pred_confidences_idxs] # 计算预测边界框 pred_cate_bboxs = torch.zeros(S * S, 4) pred_cate_bboxs[:, 0] = pred_bboxs[range(S * S), pred_confidences_idxs * 4] pred_cate_bboxs[:, 1] = pred_bboxs[range(S * S), pred_confidences_idxs * 4 + 1] pred_cate_bboxs[:, 2] = pred_bboxs[range(S * S), pred_confidences_idxs * 4 + 2] pred_cate_bboxs[:, 3] = pred_bboxs[range(S * S), pred_confidences_idxs * 4 + 3] # 预测边界框的缩放,回到原始图像 pred_bboxs = util.deform_bboxs(pred_cate_bboxs, data_dict, S) nms_rects, nms_scores, nms_cates = util.nms(pred_bboxs, pred_cate_probs, pred_cates) # 在原图绘制标注边界框和预测边界框 dst = draw.plot_bboxs(data_dict['src'], data_dict['bndboxs'], data_dict['name_list'], cate_list, pred_bboxs, pred_cates, pred_cate_probs) cv2.imwrite('./detect.png', dst) # BGR -> RGB dst = cv2.cvtColor(dst, cv2.COLOR_BGR2RGB) draw.show(dst)
from utils.checkpoint import load_checkpoint from utils.util import nms import os import cv2 import numpy as np import time cfg = dark53_yolo loader = DataLoader(yoloPascal(cfg, 'val'), batch_size=1, shuffle=False) model = Yolodet(cfg, pretrained=False) load_checkpoint(model, 'weights/dark_yolo/model_273.pth') model.eval() model.cuda() for batch in loader: start = time.time() out = model(batch['input'].to(device='cuda')) print('fps: {}'.format(1 / (time.time() - start))) file_name = loader.dataset.coco.loadImgs( ids=[batch['img_id'].numpy()[0]])[0]['file_name'] img_path = os.path.join(loader.dataset.img_dir, file_name) image = cv2.imread(img_path) shape = image.shape bboxes = model.convert_pred(out, shape, 0.4) bboxes = nms(bboxes, 0.4, 0.4) for bb in bboxes: x, y, x1, y1 = bb.astype(np.int)[:4] cv2.rectangle(image, (x, y), (x1, y1), (255, 0, 0), 3) cv2.imshow('', image) if cv2.waitKey(0) & 0xff == 27: break cv2.destroyAllWindows()
print('the shape of scores is {}'.format(scores.shape)) boxes_total.append(boxes) scores_total.append(scores) boxes_total = np.concatenate(boxes_total, axis=0) scores_total = np.concatenate(scores_total, axis=0) # implement nms for each class for i in range(num_classes): score_per_class = scores_total[..., i:i + 1] rectangles = np.concatenate([boxes_total, score_per_class], axis=-1) have_object = np.where(rectangles[..., 4] > 0.6)[0] # print(have_object) rectangles = rectangles[have_object] pick = nms(rectangles, threshold=0.3) # pick = tf.image.non_max_suppression(rectangles[..., 0:4], rectangles[..., 4], 20, 0.3) # print(pick) # boxes_pick = tf.gather(rectangles, pick) if pick: boxes_pick = rectangles[pick] for box in boxes_pick: # cv2.rectangle(street_cv2, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2) # correct box x1, y1, x2, y2, _ = box left = int(max(0, x1)) top = int(max(0, y1)) right = int(min(street.size[0], x2)) bottom = int(min(street.size[1], y2))
from config import hrnet_yolo, dark53_yolo from utils.checkpoint import load_checkpoint from utils.util import nms import os import cv2 import numpy as np import time cfg=dark53_yolo loader = DataLoader(yoloPascal(cfg, 'val'), batch_size=1, shuffle=False) model = Yolodet(cfg, pretrained=False) load_checkpoint(model, 'weights/dark_yolo/model_13.pth') model.eval() model.cuda() for batch in loader: start= time.time() out = model(batch['input'].to(device='cuda')) print('fps: {}'.format(1/(time.time()-start))) file_name = loader.dataset.coco.loadImgs(ids=[batch['img_id'].numpy()[0]])[0]['file_name'] img_path = os.path.join( loader.dataset.img_dir, file_name) image = cv2.imread(img_path) shape = image.shape bboxes = model.convert_pred(out, shape , 0.5) bboxes = nms(bboxes,0.5,0.5) for bb in bboxes: x, y, x1, y1 = bb.astype(np.int)[:4] cv2.rectangle(image, (x, y), (x1, y1), (255, 0, 0), 3) cv2.imshow('', image) if cv2.waitKey(0) & 0xff == 27: break cv2.destroyAllWindows()