def draw_trackers(image, tracks, accumulation_gender=True, accumulation_age=True, show_both=False): ''' show_both: True will show accumulate_current ''' for track in tracks: if not track.is_confirmed() or track.time_since_update > 0: continue xmin, ymin, xmax, ymax = [int(x) for x in track.to_tlbr()] color = create_unique_color(track.track_id) # draw bbox cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2) # track id label = str(track.track_id) text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2) cv2.rectangle( image, (xmin, ymin), (xmin + 10 + text_size[0][0], ymin + 10 + text_size[0][1]), color, -1) cv2.putText(image, label, (xmin + 5, ymin + 5 + text_size[0][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) # draw info font_scale = 1 font = cv2.FONT_HERSHEY_SIMPLEX thickness = 2 # Gender gender = [] if len(track.genders) > 0: if accumulation_gender: # return accumulate gender gender.append("F") if np.mean( track.genders) <= 0.6 else gender.append("M") if show_both: # gender = "F" if np.mean(track.genders) <= 0.6 else "M" gender.append( "F") if track.genders[-1] < 0.5 else gender.append("M") else: gender = "F" if track.genders[-1] < 0.5 else "M" else: gender = "N/A" # Age age = [] if len(track.ages) > 0: if accumulation_age: # age = str(int(np.mean(track.ages))) age.append(str(int(np.mean(track.ages)))) if show_both: age.append(str(int(track.ages[-1]))) else: age.append(str(int(track.ages[-1]))) else: age.append("N/A") # GENDER AGE gen_age = "_".join(gender) + "_".join(age) text_size = cv2.getTextSize(gen_age, font, font_scale, thickness) txt_loc = (xmin, ymax + 10 + text_size[0][1]) cv2.putText(image, text=gen_age, org=txt_loc, fontFace=font, fontScale=font_scale, color=color, thickness=thickness) # EXPR text_size = cv2.getTextSize(track.expr, font, font_scale, thickness) txt_loc = (xmin, ymax + 30 + 2 * text_size[0][1]) cv2.putText(image, text=track.expr, org=txt_loc, fontFace=font, fontScale=font_scale, color=color, thickness=thickness)
def put_text(img, x, y, text, color): fontFace = cv2.FONT_HERSHEY_SIMPLEX fontScale = 1 thickness = 1 boxsize, baseline = cv2.getTextSize(text, fontFace, fontScale, thickness) cv2.putText(img, text, (x, y + boxsize[1]), fontFace, thickness, color)
def criaTeclado(index,letra,selector): if index == 0: x = 0 y = 0 elif index == 1: x = 70 y = 0 elif index == 2: x = 140 y = 0 elif index == 3: x = 210 y = 0 elif index == 4: x = 280 y = 0 elif index == 5: x = 350 y = 0 elif index == 6: x = 420 y = 0 elif index == 7: x = 490 y = 0 elif index == 8: x = 560 y = 0 elif index == 9: x = 630 y = 0 elif index == 10: x = 0 y = 70 elif index == 11: x = 70 y = 70 elif index == 12: x = 140 y = 70 elif index == 13: x = 210 y = 70 elif index == 14: x = 280 y = 70 elif index == 15: x = 350 y = 70 elif index == 16: x = 420 y = 70 elif index == 17: x = 490 y = 70 elif index == 18: x = 560 y = 70 elif index == 19: x = 630 y = 70 elif index == 20: x = 0 y = 140 elif index == 21: x = 70 y = 140 elif index == 22: x = 140 y = 140 elif index == 23: x = 210 y = 140 elif index == 24: x = 280 y = 140 elif index == 25: x = 350 y = 140 elif index == 26: x = 420 y = 140 # Teclas width = 70 height = 70 th = 2 if selector is True: cv2.rectangle(teclado, (x + th, y + th), (x + width - th, y + height - th), (255, 255, 255), -1) else: cv2.rectangle(teclado, (x + th, y + th), (x + width - th, y + height - th), (255, 0, 0), th) # Texto font_scale = 5 text_size = cv2.getTextSize(letra, cv2.FONT_HERSHEY_PLAIN, font_scale, 2)[0] width_text, height_text = text_size[0], text_size[1] text_x = int((width - width_text) / 2) + x text_y = int((height + height_text) / 2) + y cv2.putText(teclado, letra, (text_x, text_y), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 0, 0), 2)
# pass the blob through the network and obtain the detections and # predictions net.setInput(blob) detections = net.forward() # put 'esc' text on frame font_scale = 0.5 font = cv2.FONT_HERSHEY_COMPLEX # set the rectangle background to white rectangle_bgr = (0, 0, 0) # set some text text = "Press 'esc' to exit" # get the width and height of the text box (text_width, text_height) = cv2.getTextSize(text, font, fontScale=font_scale, thickness=1)[0] # set the text start position text_offset_x = 0 text_offset_y = frame.shape[0] - 4 # make the coords of the box with a small padding of two pixels box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height + 2)) cv2.rectangle(frame, box_coords[0], box_coords[1], rectangle_bgr, cv2.FILLED) cv2.putText(frame, text, (text_offset_x, text_offset_y), font, fontScale=font_scale, color=(0, 255, 0),
def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), allowed_classes=list( read_class_names(cfg.YOLO.CLASSES).values()), show_label=True): detected_classes = [] num_classes = len(classes) image_h, image_w, _ = image.shape hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) random.seed(0) random.shuffle(colors) random.seed(None) out_boxes, out_scores, out_classes, num_boxes = bboxes for i in range(num_boxes[0]): if int(out_classes[0][i]) < 0 or int(out_classes[0][i]) > num_classes: continue coor = out_boxes[0][i] coor[0] = int(coor[0] * image_h) coor[2] = int(coor[2] * image_h) coor[1] = int(coor[1] * image_w) coor[3] = int(coor[3] * image_w) fontScale = 0.5 score = out_scores[0][i] class_ind = int(out_classes[0][i]) class_name = classes[class_ind] # check if class is in allowed classes if class_name not in allowed_classes: continue else: detected_classes.append(class_name) bbox_color = colors[class_ind] bbox_thick = int(0.6 * (image_h + image_w) / 600) c1, c2 = (coor[1], coor[0]), (coor[3], coor[2]) cv2.rectangle(image, c1, c2, bbox_color, bbox_thick) if show_label: bbox_mess = '%s: %.2f' % (classes[class_ind], score) t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick // 2)[0] c3 = (c1[0] + t_size[0], c1[1] - t_size[1] - 3) cv2.rectangle(image, c1, (np.float32(c3[0]), np.float32(c3[1])), bbox_color, -1) #filled cv2.putText(image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA) return detected_classes, image
def test(self, sess): debug_dir = os.path.join(result_dir, "debug") if not os.path.exists(debug_dir): os.makedirs(debug_dir) img_names = self.coco.get_all_img() num = len(img_names) for img_name in tqdm(img_names): img = self.coco.read_img(img_name) height, width = img.shape[0:2] detections = [] for scale in test_scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, inp_height, inp_width, 3), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image( resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. #normalize_(resized_image, db.mean, db.std) images[0] = resized_image borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, ::-1, :]), axis=0) images = tf.convert_to_tensor(images) is_training = tf.convert_to_tensor(False) outs = self.net.corner_net(images, is_training=is_training) dets_tensor = self.net.decode(*outs[-6:]) dets = sess.run(dets_tensor) dets = dets.reshape(2, -1, 8) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] dets = dets.reshape(1, -1, 8) dets = rescale_dets(dets, ratios, borders, sizes) dets[:, :, 0:4] /= scale detections.append(dets) detections = np.concatenate(detections, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype( np.float32) if merge_bbox: top_bboxes[image_id][j + 1] = soft_nms_merge( top_bboxes[image_id][j + 1], Nt=nms_threshold, method=2, weight_exp=weight_exp) else: top_bboxes[image_id][j + 1] = soft_nms( top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack([ top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][ keep_inds] if debug: image = self.coco.read_img(img_name) bboxes = {} for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] > 0.5) cat_name = self.coco.class_name(j) cat_size = cv2.getTextSize(cat_name, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] color = np.random.random((3, )) * 0.6 + 0.4 color = color * 255 color = color.astype(np.int32).tolist() for bbox in top_bboxes[image_id][j][keep_inds]: bbox = bbox[0:4].astype(np.int32) if bbox[1] - cat_size[1] - 2 < 0: cv2.rectangle(image, (bbox[0], bbox[1] + 2), (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2), color, -1) cv2.putText(image, cat_name, (bbox[0], bbox[1] + cat_size[1] + 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1) else: cv2.rectangle(image, (bbox[0], bbox[1] - cat_size[1] - 2), (bbox[0] + cat_size[0], bbox[1] - 2), color, -1) cv2.putText(image, cat_name, (bbox[0], bbox[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1) cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) debug_file = os.path.join(debug_dir, {}.format(img_name)) # result_json = os.path.join(result_dir, "results.json") # detections = db.convert_to_coco(top_bboxes) # with open(result_json, "w") as f: # json.dump(detections, f) # cls_ids = list(range(1, categories + 1)) # image_ids = [db.image_ids(ind) for ind in db_inds] # db.evaluate(result_json, cls_ids, image_ids) return 0
for key in bboxes: #print(key) #print(len(bboxes[key])) bbox = np.array(bboxes[key]) new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh = 0.3) for jk in range(new_boxes.shape[0]): (x1, y1, x2, y2) = new_boxes[jk,:] (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2) cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2) textLabel = '{}: {}'.format(key,int(100*new_probs[jk])) all_dets.append((key,100*new_probs[jk])) (retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1) textOrg = (real_x1, real_y1-0) cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2) cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1) cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1) print('Full inference = {}'.format(time.time() - st)) print(all_dets) print(bboxes) # enable if you want to show pics if options.write: import os if not os.path.isdir("output"): os.mkdir("output") cv2.imwrite('./output/{}.png'.format(idx),img)
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=1.0, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb, crop_masks = args.crop, score_threshold = args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def prep_display(dets_out, img, gt, gt_masks, h, w, undo_transform=True, class_color=False): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. gt and gt_masks are also allowed to be none (until I reimplement that functionality). """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: masks = t[3][:args.top_k] # We'll need this later classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]] if classes.shape[0] == 0: return (img_gpu * 255).byte().cpu().numpy() def get_color(j): color = COLORS[(classes[j] * 5 if class_color else j * 5) % len(COLORS)] if not undo_transform: color = (color[2], color[1], color[0]) return color # Draw masks first on the gpu if args.display_masks and cfg.eval_mask_branch: for j in reversed(range(min(args.top_k, classes.shape[0]))): if scores[j] >= args.score_threshold: color = get_color(j) mask = masks[j, :, :, None] mask_color = mask @ (torch.Tensor(color).view(1, 3) / 255.0) mask_alpha = 0.45 # Alpha only the region of the image that contains the mask img_gpu = img_gpu * (1 - mask) \ + img_gpu * mask * (1-mask_alpha) + mask_color * mask_alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_text or args.display_bboxes: for j in reversed(range(min(args.top_k, classes.shape[0]))): score = scores[j] if scores[j] >= args.score_threshold: x1, y1, x2, y2 = boxes[j, :] color = get_color(j) if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = COCO_CLASSES[classes[j]] text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
heightFactor = frame.shape[0] / 300.0 widthFactor = frame.shape[1] / 300.0 # Scale object detection to frame xLeftBottom = int(widthFactor * xLeftBottom) yLeftBottom = int(heightFactor * yLeftBottom) xRightTop = int(widthFactor * xRightTop) yRightTop = int(heightFactor * yRightTop) # Draw location of object cv2.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop), (0, 255, 0)) # Draw label and confidence of prediction in frame resized if class_id in classNames: label = classNames[class_id] + ": " + str(confidence) labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) yLeftBottom = max(yLeftBottom, labelSize[1]) cv2.rectangle( frame, (xLeftBottom, yLeftBottom - labelSize[1]), (xLeftBottom + labelSize[0], yLeftBottom + baseLine), (255, 255, 255), cv2.FILLED) cv2.putText(frame, label, (xLeftBottom, yLeftBottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) print(label) #print class and confidence cv2.namedWindow("frame", cv2.WINDOW_NORMAL) cv2.imshow("frame", frame) if cv2.waitKey(1) >= 0: # Break with ESC
def yolo_detect( pathIn='/home/haidong/Desktop/1.png', pathOut='/home/haidong/Desktop/test.jpg', #pathOut=None, label_path='/home/haidong/darknet/data/coco.names', config_path='/home/haidong/darknet/cfg/yolov3-tiny.cfg', weights_path='/home/haidong/darknet/yolov3-tiny_20000.weights', confidence_thre=0.5, nms_thre=0.3, jpg_quality=80): ''' pathIn:原始图片的路径 pathOut:结果图片的路径 label_path:类别标签文件的路径 config_path:模型配置文件的路径 weights_path:模型权重文件的路径 confidence_thre:0-1,置信度(概率/打分)阈值,即保留概率大于这个值的边界框,默认为0.5 nms_thre:非极大值抑制的阈值,默认为0.3 jpg_quality:设定输出图片的质量,范围为0到100,默认为80,越大质量越好 ''' # 加载类别标签文件 LABELS = open(label_path).read().strip().split("\n") nclass = len(LABELS) # 为每个类别的边界框随机匹配相应颜色 np.random.seed(42) COLORS = np.random.randint(0, 255, size=(nclass, 3), dtype='uint8') # 载入图片并获取其维度 base_path = os.path.basename(pathIn) img = cv2.imread(pathIn) (H, W) = img.shape[:2] #(H,W) = img.shape[:2] print(1) # 加载模型配置和权重文件 print('从硬盘加载YOLO......') net = cv2.dnn.readNetFromDarknet(config_path, weights_path) # 获取YOLO输出层的名字 ln = net.getLayerNames() ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()] # 将图片构建成一个blob,设置图片尺寸,然后执行一次 # YOLO前馈网络计算,最终获取边界框和相应概率 blob = cv2.dnn.blobFromImage(img, 1 / 255.0, (416, 416), swapRB=True, crop=False) net.setInput(blob) start = time.time() layerOutputs = net.forward(ln) end = time.time() # 显示预测所花费时间 print('YOLO模型花费 {:.2f} 秒来预测一张图片'.format(end - start)) # 初始化边界框,置信度(概率)以及类别 boxes = [] confidences = [] classIDs = [] # 迭代每个输出层,总共三个 for output in layerOutputs: # 迭代每个检测 for detection in output: # 提取类别ID和置信度 scores = detection[5:] classID = np.argmax(scores) confidence = scores[classID] # 只保留置信度大于某值的边界框 if confidence > confidence_thre: # 将边界框的坐标还原至与原图片相匹配,记住YOLO返回的是 # 边界框的中心坐标以及边界框的宽度和高度 box = detection[0:4] * np.array([W, H, W, H]) (centerX, centerY, width, height) = box.astype("int") # 计算边界框的左上角位置 x = int(centerX - (width / 2)) y = int(centerY - (height / 2)) # 更新边界框,置信度(概率)以及类别 boxes.append([x, y, int(int(width) / 2), int(height)]) confidences.append(float(confidence)) classIDs.append(classID) # 使用非极大值抑制方法抑制弱、重叠边界框 idxs = cv2.dnn.NMSBoxes(boxes, confidences, confidence_thre, nms_thre) # 确保至少一个边界框 if len(idxs) > 0: # 迭代每个边界框 for i in idxs.flatten(): # 提取边界框的坐标 (x, y) = (boxes[i][0], boxes[i][1]) (w, h) = (boxes[i][2], boxes[i][3]) # 绘制边界框以及在左上角添加类别标签和置信度 color = [int(c) for c in COLORS[classIDs[i]]] cv2.rectangle(img, (x, y), (x + w, y + h), color, 1) text = '{}: {:.3f}'.format(LABELS[classIDs[i]], confidences[i]) (text_w, text_h), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2) cv2.rectangle(img, (x, y - text_h - baseline), (x + text_w, y), color, -1) cv2.putText(img, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2) # 输出结果图片 if pathOut is None: cv2.imwrite('with_box_' + base_path, img, [int(cv2.IMWRITE_JPEG_QUALITY), jpg_quality]) else: cv2.imwrite(pathOut, img, [int(cv2.IMWRITE_JPEG_QUALITY), jpg_quality])
def display_instances(image, boxes, masks, class_ids, class_names, scores=None, title="", figsize=(16, 16), ax=None, show_mask=True, show_bbox=True, colors=None, captions=None): """ boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates. masks: [height, width, num_instances] class_ids: [num_instances] class_names: list of class names of the dataset scores: (optional) confidence scores for each box title: (optional) Figure title show_mask, show_bbox: To show masks and bounding boxes or not figsize: (optional) the size of the image colors: (optional) An array or colors to use with each object captions: (optional) A list of strings to use as captions for each object """ # Number of instances N = boxes.shape[0] if not N: print("\n*** No instances to display *** \n") else: assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0] # If no axis is passed, create one and automatically call show() auto_show = False if not ax: _, ax = plt.subplots(1, figsize=figsize) auto_show = True # Generate random colors colors = colors or random_colors(N) # Show area outside image boundaries. height, width = image.shape[:2] ax.set_ylim(height + 10, -10) ax.set_xlim(-10, width + 10) ax.axis('off') ax.set_title(title) masked_image = image.astype(np.uint32).copy() for i in range(N): color = colors[i] # Bounding box if not np.any(boxes[i]): # Skip this instance. Has no bbox. Likely lost in image cropping. continue y1, x1, y2, x2 = boxes[i] if show_bbox: p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=3, alpha=0.7, linestyle="solid", edgecolor=color, facecolor='none') ax.add_patch(p) # Label if not captions: class_id = class_ids[i] score = scores[i] if scores is not None else None label = class_names[class_id] x = random.randint(x1, (x1 + x2) // 2) caption = "{} {:.3f}".format(label, score) if score else label font = cv2.FONT_HERSHEY_COMPLEX_SMALL masked_image=cv2.putText(masked_image.astype(np.uint8),label,(x1,y1-10), font, 0.7,(255,255,255),1,cv2.LINE_AA) size = cv2.getTextSize(label, font, 0.7, 1) width = size[0][0] score = round(score,3) masked_image=cv2.putText(masked_image.astype(np.uint8),str(score),(x1+width+5,y1-10), font, 0.7,(255,255,255),1,cv2.LINE_AA) else: caption = captions[i] ax.text(x1, y1 + 8, caption, color='w', size=11, backgroundcolor="none") # Mask mask = masks[:, :, i] if show_mask: masked_image = apply_mask(masked_image, mask, color) coloro = () for i in color: j = i*255 coloro = coloro+(j,) masked_image = cv2.rectangle(masked_image.astype(np.uint8),(x1, y2),(x2,y1),coloro,3) masked_image=masked_image.astype(np.uint32) # Mask Polygon # Pad to ensure proper polygons for masks that touch image edges. padded_mask = np.zeros( (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) padded_mask[1:-1, 1:-1] = mask contours = find_contours(padded_mask, 0.5) for verts in contours: # Subtract the padding and flip (y, x) to (x, y) verts = np.fliplr(verts) - 1 p = Polygon(verts, facecolor="none", edgecolor=color) ax.add_patch(p) print('savingimage') plt.imsave('savedimg.jpg',masked_image.astype(np.uint8)) if auto_show: plt.show()
def detect_image(self, image): if self.model_image_size != (None, None): assert self.model_image_size[ 0] % 32 == 0, 'Multiples of 32 required' assert self.model_image_size[ 1] % 32 == 0, 'Multiples of 32 required' boxed_image = image_preporcess( np.copy(image), tuple(reversed(self.model_image_size))) image_data = boxed_image out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.shape[0], image.shape[1]], #[image.size[1], image.size[0]], K.learning_phase(): 0 }) #print('Found {} boxes for {}'.format(len(out_boxes), 'img')) thickness = (image.shape[0] + image.shape[1]) // 600 fontScale = 1 ObjectsList = [] for i, c in reversed(list(enumerate(out_classes))): predicted_class = self.class_names[c] box = out_boxes[i] score = out_scores[i] label = '{} {:.2f}'.format(predicted_class, score) #label = '{}'.format(predicted_class) scores = '{:.2f}'.format(score) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.shape[0], np.floor(bottom + 0.5).astype('int32')) right = min(image.shape[1], np.floor(right + 0.5).astype('int32')) mid_h = (bottom - top) / 2 + top mid_v = (right - left) / 2 + left # put object rectangle cv2.rectangle(image, (left, top), (right, bottom), self.colors[c], thickness) # get text size (test_width, text_height), baseline = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, thickness / self.text_size, 1) # put text rectangle cv2.rectangle(image, (left, top), (left + test_width, top - text_height - baseline), self.colors[c], thickness=cv2.FILLED) # put text above rectangle cv2.putText(image, label, (left, top - 2), cv2.FONT_HERSHEY_SIMPLEX, thickness / self.text_size, (0, 0, 0), 1) # add everything to list ObjectsList.append( [top, left, bottom, right, mid_v, mid_h, label, scores]) return image, ObjectsList
def draw_trackers_info(image, tracks, list_expr, accumulation_gender=True, accumulation_age=True): info = np.zeros(8, dtype=int) for track in tracks: if not track.is_confirmed() or track.time_since_update > 0: continue xmin, ymin, xmax, ymax = [int(x) for x in track.to_tlbr()] color = create_unique_color(track.track_id) # draw bbox cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2) # track id label = str(track.track_id) text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2) cv2.rectangle( image, (xmin, ymin), (xmin + 10 + text_size[0][0], ymin + 10 + text_size[0][1]), color, -1) cv2.putText(image, label, (xmin + 5, ymin + 5 + text_size[0][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) # draw info font_scale = 2 font = cv2.FONT_HERSHEY_SIMPLEX thickness = 3 # Gender if len(track.genders) > 0: if accumulation_gender: # return accumulate gender if not np.mean(track.genders) < 0.6: gender = "M" info[6] += 1 else: gender = "F" info[7] += 1 else: if not track.genders[-1] < 0.6: gender = "M" info[6] += 1 else: gender = "F" info[7] += 1 else: gender = "N/A" # Expr if track.expr in list_expr: idx = np.where(list_expr == str(track.expr)) info[idx] += 1 # Age if len(track.ages) > 0: if accumulation_age: age = str(int(np.mean(track.ages))) else: age = str(int(track.ages[-1])) else: age = "N/A" # GENDER AGE gen_age = gender + age text_size = cv2.getTextSize(gen_age, font, font_scale, thickness) txt_loc = (xmin, ymax + 10 + text_size[0][1]) cv2.putText(image, text=gen_age, org=txt_loc, fontFace=font, fontScale=font_scale, color=color, thickness=thickness) # EXPR text_size = cv2.getTextSize(track.expr, font, font_scale, thickness) txt_loc = (xmin, ymax + 30 + 2 * text_size[0][1]) cv2.putText(image, text=track.expr, org=txt_loc, fontFace=font, fontScale=font_scale, color=color, thickness=thickness) return info
def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16): tl = 3 # line thickness tf = max(tl - 1, 1) # font thickness if os.path.isfile(fname): # do not overwrite return None if isinstance(images, torch.Tensor): images = images.cpu().numpy() if isinstance(targets, torch.Tensor): targets = targets.cpu().numpy() # un-normalise if np.max(images[0]) <= 1: images *= 255 bs, _, h, w = images.shape # batch size, _, height, width bs = min(bs, max_subplots) # limit plot images ns = np.ceil(bs**0.5) # number of subplots (square) # Check if we should resize scale_factor = max_size / max(h, w) if scale_factor < 1: h = math.ceil(scale_factor * h) w = math.ceil(scale_factor * w) # Empty array for output mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # Fix class - colour map prop_cycle = plt.rcParams['axes.prop_cycle'] # https://stackoverflow.com/questions/51350872/python-from-color-name-to-rgb hex2rgb = lambda h: tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) color_lut = [hex2rgb(h) for h in prop_cycle.by_key()['color']] for i, img in enumerate(images): if i == max_subplots: # if last batch has fewer images than we expect break block_x = int(w * (i // ns)) block_y = int(h * (i % ns)) img = img.transpose(1, 2, 0) if scale_factor < 1: img = cv2.resize(img, (w, h)) mosaic[block_y:block_y + h, block_x:block_x + w, :] = img if len(targets) > 0: image_targets = targets[targets[:, 0] == i] boxes = xywh2xyxy(image_targets[:, 2:6]).T classes = image_targets[:, 1].astype('int') gt = image_targets.shape[1] == 6 # ground truth if no conf column conf = None if gt else image_targets[:, 6] # check for confidence presence (gt vs pred) boxes[[0, 2]] *= w boxes[[0, 2]] += block_x boxes[[1, 3]] *= h boxes[[1, 3]] += block_y for j, box in enumerate(boxes.T): cls = int(classes[j]) color = color_lut[cls % len(color_lut)] cls = names[cls] if names else cls if gt or conf[j] > 0.3: # 0.3 conf thresh label = '%s' % cls if gt else '%s %.1f' % (cls, conf[j]) plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) # Draw image filename labels if paths is not None: label = os.path.basename(paths[i])[:40] # trim to 40 char t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf, lineType=cv2.LINE_AA) # Image border cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3) if fname is not None: mosaic = cv2.resize(mosaic, (int(ns * w * 0.5), int(ns * h * 0.5)), interpolation=cv2.INTER_AREA) cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) return mosaic
# is front of object outside the monitired boundary? Then write date, time and speed on image # and save it if ((x <= 2) and (direction == RIGHT_TO_LEFT)) \ or ((x+w >= monitored_width - 2) \ and (direction == LEFT_TO_RIGHT)): if (last_mph > MIN_SPEED): # save the image # timestamp the image cv2.putText( image, datetime.datetime.now().strftime( "%A %d %B %Y %I:%M:%S%p"), (10, image.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), 1) # write the speed: first get the size of the text size, base = cv2.getTextSize("%.0f mph" % last_mph, cv2.FONT_HERSHEY_SIMPLEX, 2, 3) # then center it horizontally on the image cntr_x = int((IMAGEWIDTH - size[0]) / 2) cv2.putText(image, "%.0f mph" % last_mph, (cntr_x, int(IMAGEHEIGHT * 0.2)), cv2.FONT_HERSHEY_SIMPLEX, 2.00, (0, 255, 0), 3) # and save the image to disk imageFilename = "car_at_" + datetime.datetime.now( ).strftime("%Y%m%d_%H%M%S") + ".jpg" # use the following image file name if you want to be able to sort the images by speed #imageFilename = "car_at_%02.0f" % last_mph + "_" + datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + ".jpg" cv2.imwrite(imageFilename, image) if SAVE_CSV:
def test_debug(self, image, detections, debug_boxes, boxes, ratio, coco, step): detections = detections.reshape(-1, 8) detections[:, 0:4:2] /= ratio[0] detections[:, 1:4:2] /= ratio[1] debug_boxes = debug_boxes.reshape(-1, 4) debug_boxes[:, 0:4:2] /= ratio[0] debug_boxes[:, 1:4:2] /= ratio[1] classes = detections[..., -1].astype(np.int64) # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes = {} for j in range(self.categories): keep_inds = (classes == j) top_bboxes[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32) if self.merge_bbox: top_bboxes[j + 1] = soft_nms_merge(top_bboxes[j + 1], Nt=0.5, method=2, weight_exp=8) else: top_bboxes[j + 1] = soft_nms(top_bboxes[j + 1], Nt=0.5, method=2) top_bboxes[j + 1] = top_bboxes[j + 1][:, 0:5] scores = np.hstack( [top_bboxes[j][:, -1] for j in range(1, self.categories + 1)]) if len(scores) > self.max_per_image: kth = len(scores) - self.max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, self.categories + 1): keep_inds = (top_bboxes[j][:, -1] >= thresh) top_bboxes[j] = top_bboxes[j][keep_inds] # if len(top_bboxes[j])!=0: # print(top_bboxes[j].shape) image = (image * 255).astype(np.uint8) bboxes = {} for j in range(1, self.categories + 1): #if step>10000: keep_inds = (top_bboxes[j][:, -1] > 0.5) top_bboxes[j] = top_bboxes[j][keep_inds] cat_name = coco.class_name(j) cat_size = cv2.getTextSize(cat_name, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] color = np.random.random((3, )) * 0.6 + 0.4 color = color * 255 color = color.astype(np.int32).tolist() for bbox in top_bboxes[j]: bbox = bbox[0:4].astype(np.int32) if bbox[1] - cat_size[1] - 2 < 0: cv2.rectangle( image, (bbox[0], bbox[1] + 2), (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2), color, -1) cv2.putText(image, cat_name, (bbox[0], bbox[1] + cat_size[1] + 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1) else: cv2.rectangle(image, (bbox[0], bbox[1] - cat_size[1] - 2), (bbox[0] + cat_size[0], bbox[1] - 2), color, -1) cv2.putText(image, cat_name, (bbox[0], bbox[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1) cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) for b in boxes: cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 1) for i in range(len(debug_boxes)): color = np.random.random((3, )) * 0.6 + 0.4 color = color * 255 color = color.astype(np.int32).tolist() cv2.circle(image, (debug_boxes[i][0], debug_boxes[i][1]), 2, color, 2) cv2.circle(image, (debug_boxes[i][2], debug_boxes[i][3]), 2, color, 2) cv2.imwrite(os.path.join(self.debug_dir, str(step) + '.jpg'), image)
def genImage(self, imageName): layerGap = 80 nodeGap = 60 nodeRadius = 20 padding = 40 bgColor = (230, 230, 230) hiddenColor = (50, 150, 30) biasColor = (201, 50, 50) inputColor = (27, 226, 226) outputColor = (12, 120, 220) fontColor = (5, 5, 5) fontScale = 0.6 fontThickness = 2 connectionColorEnabled = (0, 0, 150) connectionColorDisabled = (0, 0, 100) connectionThickness = 2 connectionArrowSize = 0.08 # Make a connection dictionary for the algorithm connectionDict = {} for node in self.nodeGenes: connectionDict[node.index] = [] for conn in self.connectionGenes: connectionDict[conn.input].append(conn.output) # Make a enabled connection function for drawing def getEnabled(connInput, connOutput): for i in self.connectionGenes: if i.input == connInput and i.output == connOutput: return i.enabled return None # The algorithm to sort all nodes into layers (SUCH A PAIN) layers = list() currentLayer = list( i.index for i in self.getNodes("Input") + self.getNodes("Bias")) nextLayer = [] outputLayer = list(i.index for i in self.getNodes("Output")) while len(currentLayer) > 0: for node in currentLayer: for conn in connectionDict[node]: if not (conn in nextLayer) and not (conn in outputLayer): nextLayer.append(conn) for prevLayer in layers: for conn in currentLayer: if conn in prevLayer: prevLayer.remove(conn) layers.append(currentLayer.copy()) currentLayer = nextLayer.copy() nextLayer = [] layers.append(outputLayer) # Find the widest part in the neural net widestLayerLength = len(layers[0]) for layer in layers: if (len(layer) > widestLayerLength): widestLayerLength = len(layer) # Calculate the image height and width that will be needed to fit the neural net width = padding * 2 + (widestLayerLength - 1) * nodeGap height = padding * 2 + (len(layers) - 1) * layerGap # Create a blank canvas img = np.array(bgColor * width * height, np.uint8) img = img.reshape(height, width, 3) # Calculate point positions nodePoints = {} for i in range(len(layers)): for j in range(len(layers[i])): x = int((width - (len(layers[i]) - 1) * nodeGap) / 2 + nodeGap * j) y = height - padding + -i * layerGap nodePoints[layers[i][j]] = (x, y) # Draw all of the connection arrows for key, value in connectionDict.items(): for i in value: connectionColor = connectionColorEnabled if not getEnabled(key, i): connectionColor = connectionColorDisabled direction = [ nodePoints[i][0] - nodePoints[key][0], nodePoints[i][1] - nodePoints[key][1] ] angle = np.arctan2(direction[0], direction[1]) xOffset = int(np.sin(angle) * nodeRadius) yOffset = int(np.cos(angle) * nodeRadius) pt1 = (nodePoints[key][0] + xOffset, nodePoints[key][1] + yOffset) pt2 = (nodePoints[i][0] - xOffset, nodePoints[i][1] - yOffset) cv2.arrowedLine(img, pt1, pt2, connectionColor, connectionThickness, 8, 0, connectionArrowSize) # Plot the nodes for i in range(len(layers)): for j in range(len(layers[i])): nodeType = self.getNode(layers[i][j]).type nodeColor = hiddenColor if nodeType == "Input": nodeColor = inputColor elif nodeType == "Output": nodeColor = outputColor elif nodeType == "Bias": nodeColor = biasColor cv2.circle(img, nodePoints[layers[i][j]], nodeRadius, nodeColor, -1) size, _ = cv2.getTextSize(str(layers[i][j]), cv2.FONT_HERSHEY_SIMPLEX, fontScale, fontThickness) cv2.putText(img, str(layers[i][j]), (nodePoints[layers[i][j]][0] - size[0] // 2, nodePoints[layers[i][j]][1] + size[1] // 2), cv2.FONT_HERSHEY_SIMPLEX, fontScale, fontColor, fontThickness) cv2.imwrite(imageName + ".png", img)
# Calculate the frame rate end = time.time() fps = 1 / (end - start) else: fps = cap.get(cv2.CAP_PROP_FPS) # Get window size x, y, w, h = cv2.getWindowImageRect(title_window) # Display the title on the window title = title_window font = cv2.FONT_HERSHEY_SIMPLEX font_scale = 1 (text_width, text_height) = cv2.getTextSize(title, font, fontScale=font_scale, thickness=1)[0] img = frame.copy() img = cv2.rectangle(img, (int(w / 2 - text_width / 2 - 10), text_height + 45), (int(w / 2 + text_width / 2 + 5), 20), (255, 255, 255), cv2.FILLED) frame = cv2.addWeighted(img, .3, frame, .7, 0) frame = cv2.putText(frame, title, (int(w / 2 - text_width / 2), text_height + 30), font, font_scale, (20, 30, 0), 2, cv2.LINE_AA) # Display the frame rate on the window text_fps = 'FPS : ' + str(int(fps)) font = cv2.FONT_HERSHEY_SIMPLEX font_scale = .8
def evaluate(_): win_name = 'Detector' cv2.namedWindow(win_name) video = FLAGS.video if is_url(video): videoPafy = pafy.new(video) video = videoPafy.getbest(preftype="mp4").url cam = cv2.VideoCapture(video) if not cam.isOpened(): raise IOError('Can\'t open "{}"'.format(FLAGS.video)) source_h = cam.get(cv2.CAP_PROP_FRAME_HEIGHT) source_w = cam.get(cv2.CAP_PROP_FRAME_WIDTH) # print("image size = (%d, %d)" % (source_h, source_w)) model_cls = find_class_by_name(FLAGS.model_name, [yolo]) model = model_cls(input_shape=(source_h, source_w, 3)) model.init() frame_num = 0 start_time = time.time() fps = 0 try: while True: ret, frame = cam.read() # cv2.imwrite("1.png", frame) if not ret: logger.info('Can\'t read video data. Potential end of stream') return predictions = model.evaluate(frame) for o in predictions: x1 = o['box']['left'] x2 = o['box']['right'] y1 = o['box']['top'] y2 = o['box']['bottom'] color = o['color'] class_name = o['class_name'] # print("[%s] l = %d, r = %d, t = %d, b = %d" % (class_name, x1, x2, y1, y2)) # Draw box cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) # Draw label (test_width, text_height), baseline = cv2.getTextSize( class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.75, 1) cv2.rectangle(frame, (x1, y1), (x1 + test_width, y1 - text_height - baseline), color, thickness=cv2.FILLED) cv2.putText(frame, class_name, (x1, y1 - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 0), 1) # cv2.imwrite("2.png", frame) # return end_time = time.time() fps = fps * 0.9 + 1 / (end_time - start_time) * 0.1 start_time = end_time # Draw additional info frame_info = 'Frame: {0}, FPS: {1:.2f}'.format(frame_num, fps) cv2.putText(frame, frame_info, (10, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) logger.info(frame_info) cv2.imshow(win_name, frame) if predictions: logger.info('Predictions: {}'.format( format_predictions(predictions))) key = cv2.waitKey(1) & 0xFF # Exit if key == ord('q'): break # Take screenshot if key == ord('s'): cv2.imwrite('frame_{}.jpg'.format(time.time()), frame) frame_num += 1 finally: cv2.destroyAllWindows() cam.release() model.close()
def lane_detection(cv_bgr, x_meter, y_meter, cols, rows, fontFace, fontScale, fontThickness): is_pass = False tilt1_deg = None tilt2_deg = None angle1_deg = None angle2_deg = None curve1_r = None curve2_r = None meters_from_center = None ######################################## # Region Of Interest Coordinates ######################################## roi_vertices = calc_roi_vertices( cv_bgr, # robocar camera demo_lane top_width_rate=0.28, top_height_position=0.45, bottom_width_rate=2.0, bottom_height_position=1) ######################################## # Inverse Perspective Mapping Coordinates ######################################## ipm_vertices = calc_ipm_vertices( cv_bgr, # robocar camera demo_lane top_width_rate=0.28, top_height_position=0.45, bottom_width_rate=2.0, bottom_height_position=1) ######################################## # Region Of Interest ######################################## cv_bgr_roi = to_roi(cv_bgr, roi_vertices) ######################################## # Inverse Perspective Mapping ######################################## cv_bgr_ipm = to_ipm(cv_bgr_roi, ipm_vertices) ######################################## # WHITE DETECTION ######################################## cv_bgr_ipm_white = to_yellow(cv_bgr_ipm) cv_bgr_white = to_yellow(cv_bgr) ######################################## # BINARY ######################################## cv_bin = to_bin(cv_bgr_ipm_white) cv_rgb_bin = bin_to_rgb(cv_bin) cv_rgb_road = None cv_rgb_sliding_windows = None cv_rgb_ellipse = None cv_rgb_tilt = None histogram = None meters_from_center = None is_sliding_window_success = False is_pixel_pts_success = False is_pixel_ellipse_success = False is_meter_pts_success = False is_meter_ellipse_success = False tilt_deg = 0 ######################################## # レーンを検出する ######################################## try: # sliding windowsを行い、ラインを構成するピクセル座標を求める cv_rgb_sliding_windows, histogram, line_x, line_y = sliding_windows( cv_bin) is_sliding_window_success = True ''' 描画値 ピクセル座標系における計算 ''' # 等間隔なy座標を生成する plot_y = np.linspace(0, rows - 1, rows) # 左右センターの二次多項式と座標を求める line_polyfit_const, pts_line = calc_line_curve(line_x, line_y, plot_y) is_pixel_pts_success = True # 弧と傾きを描画する cv_rgb_ellipse, cv_rgb_tilt \ = draw_ellipse_and_tilt(cols,rows,plot_y,pts_line,line_polyfit_const) # 白線画像にレーンを描画する cv2.polylines(cv_rgb_bin, [pts_line], False, (255, 0, 0), thickness=fontThickness * 20) # 白線道路領域をIPM逆変換する cv_rgb_bin = reverse_ipm(cv_rgb_bin, ipm_vertices) # 道路にラインを描画する cv_rgb_road = new_rgb(rows, cols) cv2.polylines(cv_rgb_road, [pts_line], False, (255, 0, 0), thickness=fontThickness * 20) # 道路をIPM変換する cv_rgb_road = reverse_ipm(cv_rgb_road, ipm_vertices) ''' 実測値 メートル座標系における計算 ''' # ピクセルをメートルに変換 ym_per_pix = 1.0 * y_meter / rows xm_per_pix = 1.0 * x_meter / cols # 等間隔なy座標を生成する plot_ym = np.linspace(0, rows - 1, rows) * ym_per_pix # ラインの二次多項式と座標を求める line_polyfit_const, \ _pts_line = calc_line_curve(line_x*xm_per_pix,line_y*ym_per_pix,plot_ym) is_meter_pts_success = True ######################################## # 弧の座標と角度を求める # センターを上下2分割にして曲率半径と中心座標、y軸との傾き角度を計算する ######################################## quarter_y = (np.max(plot_ym) - np.min(plot_ym)) / 4 # 下半分を計算する y0 = np.max(plot_ym) - 2 * quarter_y y1 = np.max(plot_ym) x0,x1, \ curve1_x,curve1_y,curve1_r, \ rotate1_deg,angle1_deg, \ tilt1_deg = calc_curve(y0,y1,line_polyfit_const) # 上半分を計算する quarter_y = (np.max(plot_ym) - np.min(plot_ym)) / 4 y2 = np.min(plot_ym) y3 = np.max(plot_ym) - 2 * quarter_y x2,x3, \ curve2_x,curve2_y,curve2_r, \ rotate2_deg,angle2_deg, \ tilt2_deg = calc_curve(y2,y3,line_polyfit_const) is_meter_ellipse_success = True # 画面最下部中央とライン最上部のtiltを実世界の角度で求める # 実世界の角度なのでx,y座標はcm座標に変換して計算する tilt_rad = math.atan( (cols * xm_per_pix / 2 - x0) / (rows * ym_per_pix - y3)) tilt_deg = math.degrees(tilt_rad) # 中央線までの距離を計算する # 最下部の位置で計算する bottom_y = np.max(plot_ym) bottom_x = line_polyfit_const[0] * bottom_y**2 + line_polyfit_const[ 1] * bottom_y + line_polyfit_const[2] meters_from_center = bottom_x - (cols / 2) * xm_per_pix is_pass = True except: #import traceback #traceback.print_exc() pass finally: ''' レーンを検出出来なかった時は、検出画像に空の画像を用意する ''' # エラー時、もしくは描画用処理をスキップした時 if cv_rgb_sliding_windows is None: cv_rgb_sliding_windows = new_rgb(rows, cols) if histogram is None: histogram = np.sum(cv_bin[int(rows / 2):, :], axis=0) if cv_rgb_bin is None: cv_rgb_bin = bin_to_rgb(cv_bin) if cv_rgb_ellipse is None: cv_rgb_ellipse = new_rgb(rows, cols) if cv_rgb_tilt is None: cv_rgb_tilt = new_rgb(rows, cols) pass frame_end_time = time.time() ######################################## # ヒストグラム画像を作成する ######################################## cv_rgb_histogram = draw_histogram(cols, rows, histogram, lineType) ######################################## # 見た目画像を作成する ######################################## # row1画面を作成する panel_left_row1 = new_rgb(int(rows / 3), int(cols / 3)) cv_rgb = to_rgb(cv_bgr) # パネル用画像を小さくする cv_bgr_ipm_white = cv2.resize(cv_bgr_ipm_white, (int(cols / 3), int(rows / 3))) cv_rgb_histogram = cv2.resize(cv_rgb_histogram, (int(cols / 3), int(rows / 3))) cv_rgb_sliding_windows = cv2.resize(cv_rgb_sliding_windows, (int(cols / 3), int(rows / 3))) cv_rgb_bin = cv2.resize(cv_rgb_bin, (int(cols / 3), int(rows / 3))) cv_rgb_tilt = cv2.resize(cv_rgb_tilt, (int(cols / 3), int(rows / 3))) cv_rgb_ellipse = cv2.resize(cv_rgb_ellipse, (int(cols / 3), int(rows / 3))) if is_pass: ''' 左右について tiltx_deg: -が右、+が左 anglex_deg: +が右、-が左 meters_from_center: -が右にいる、+が左にいる handle_angle: +が右、-が左 ''' """ DRAW TEXT """ sample_str = 'Sample strings' [(text_width, text_height), baseLine] = cv2.getTextSize(text=sample_str, fontFace=fontFace, fontScale=fontScale, thickness=fontThickness) x_left = int(baseLine) y_top = int(baseLine) ######################################## # row1 leftに文字を書く ######################################## if is_meter_ellipse_success: display_str = [] color = (0, 255, 255) display_str.append("Far") if tilt2_deg < 0: display_str.append("tilt2:" + str(round(tilt2_deg, 2)) + "deg right") else: display_str.append("tilt2:" + str(round(tilt2_deg, 2)) + "deg left") if angle2_deg < 0: display_str.append("angle2:" + str(round(angle2_deg, 2)) + "deg left") else: display_str.append("angle2:" + str(round(angle2_deg, 2)) + "deg right") display_str.append("r2:" + str(round(curve2_r, 2)) + "m") end_x, end_y = draw_text(panel_left_row1, display_str, color, start_x=x_left, start_y=y_top, fontFace=fontFace, fontScale=fontScale, fontThickness=fontThickness) display_str = [] display_str.append("Near") color = (255, 0, 0) if tilt1_deg < 0: display_str.append("tilt1:" + str(round(tilt1_deg, 2)) + "deg right") else: display_str.append("tilt1:" + str(round(tilt1_deg, 2)) + "deg left") if angle1_deg < 0: display_str.append("angle1:" + str(round(angle1_deg, 2)) + "deg left") else: display_str.append("angle1:" + str(round(angle1_deg, 2)) + "deg right") end_x, end_y = draw_text(panel_left_row1, display_str, color, start_x=x_left, start_y=end_y, fontFace=fontFace, fontScale=fontScale, fontThickness=fontThickness) display_str = [] display_str.append("r1:" + str(round(curve1_r, 2)) + "m") color = (255, 0, 0) end_x, end_y = draw_text(panel_left_row1, display_str, color, start_x=x_left, start_y=end_y, fontFace=fontFace, fontScale=fontScale, fontThickness=fontThickness) """ #################### # cv_rgbに矢印を描く #################### arrow_x = int(cv_rgb.shape[1]/2-35) arrow_y = int(cv_rgb.shape[0]/2-35) handle_angle = -1*tilt1_deg display_str = [] display_str.append(str(round(handle_angle,2))+"deg") if meters_from_center >= 0: # 左にいる if np.abs(meters_from_center)*100 > 20: # とても離れて左にいる: if tilt2_deg > 0: # 先は左に曲がる:少し左に曲がる handle_angle = -1*MAX_HANDLE_ANGLE/2 else: # 先は右に曲がる:右に全開で曲がる handle_angle = 1*MAX_HANDLE_ANGLE elif np.abs(meters_from_center)*100 > 10: if tilt2_deg > 0 : # 離れて左いる、奥は左カーブ:右に少し曲がる handle_angle=MAX_HANDLE_ANGLE/2 else: # 離れて左いる、奥は右カーブ:右に全開で曲がる handle_angle=MAX_HANDLE_ANGLE else: # 右にいる if np.abs(meters_from_center)*100 > 20: # とても離れて右にいる if tilt2_deg < 0: # 先は右に曲がる:少し右に曲がる handle_angle = 1*MAX_HANDLE_ANGLE/2 else: # 先は左に曲がる:左に全開で曲がる handle_angle = -1*MAX_HANDLE_ANGLE elif np.abs(meters_from_center)*100 > 10: if tilt2_deg < 0 : # 離れて右いる、奥は右カーブ:左に少し曲がる handle_angle=-1*MAX_HANDLE_ANGLE/2 else: # 離れて右いる、奥は左カーブ、左に全開で曲がる handle_angle=-1*MAX_HANDLE_ANGLE # 動作可能な角度内に調整する if handle_angle > MAX_HANDLE_ANGLE: handle_angle = MAX_HANDLE_ANGLE elif handle_angle < -1*MAX_HANDLE_ANGLE: handle_angle = -1*MAX_HANDLE_ANGLE ratio = 10*np.abs(handle_angle)/100 if np.abs(handle_angle) <= 5: arrow_type = 2 arrow_color=(0,255-(255*ratio),0) arrow_text_color=(0,255,0) elif handle_angle > 5: arrow_type = 1 arrow_color=(255-(255*ratio),255-(255*ratio),255) arrow_text_color=(0,0,255) else: arrow_type = 3 arrow_color=(255,255-(255*ratio),255-(255*ratio)) arrow_text_color=(255,0,0) draw_arrow(cv_rgb,arrow_x,arrow_y,arrow_color,size=2,arrow_type=arrow_type,lineType=lineType) end_x, end_y = draw_text(cv_rgb,display_str,arrow_color,start_x=arrow_x,start_y=arrow_y-10,fontFace=fontFace, fontScale=fontScale, fontThickness=fontThickness) """ """ #################### # 奥のカーブ角度が大きい時、slow downを表示する #################### if np.abs(tilt2_deg) > np.abs(tilt1_deg) and np.abs(tilt2_deg) >= 15.0: display_str = ["slow down"] color = (0,0,255) end_x, end_y = draw_text(cv_rgb,display_str,color,start_x=arrow_x,start_y=arrow_y-30,fontFace=fontFace, fontScale=fontScale, fontThickness=fontThickness) """ ######################################## # cv_bgr_white に文字を描く ######################################## display_str = ["white filter"] color = (255, 255, 255) end_x, end_y = draw_text(cv_bgr_ipm_white, display_str, color, start_x=x_left, start_y=y_top, fontFace=fontFace, fontScale=fontScale, fontThickness=fontThickness) ######################################## # histogram に文字を描く ######################################## display_str = ["histogram"] color = (255, 255, 255) end_x, end_y = draw_text(cv_rgb_histogram, display_str, color, start_x=x_left, start_y=y_top, fontFace=fontFace, fontScale=fontScale, fontThickness=fontThickness) ######################################## # sliding windows に文字を描く ######################################## display_str = ["sliding windows"] color = (255, 255, 255) end_x, end_y = draw_text(cv_rgb_sliding_windows, display_str, color, start_x=x_left, start_y=y_top, fontFace=fontFace, fontScale=fontScale, fontThickness=fontThickness) ######################################## # cv_rgb_bin に文字を描く ######################################## display_str = ["road"] color = (255, 255, 255) end_x, end_y = draw_text(cv_rgb_bin, display_str, color, start_x=x_left, start_y=y_top, fontFace=fontFace, fontScale=fontScale, fontThickness=fontThickness) ######################################## # tilt に文字を描く ######################################## display_str = ["tilts"] color = (255, 255, 255) end_x, end_y = draw_text(cv_rgb_tilt, display_str, color, start_x=x_left, start_y=y_top, fontFace=fontFace, fontScale=fontScale, fontThickness=fontThickness) if is_meter_ellipse_success: display_str = ["Far"] color = (0, 255, 255) if tilt2_deg < 0: display_str.append("tilt2:" + str(round(tilt2_deg, 2)) + "deg right") else: display_str.append("tilt2:" + str(round(tilt2_deg, 2)) + "deg left") end_x, end_y = draw_text(cv_rgb_tilt, display_str, color, start_x=x_left, start_y=end_y, fontFace=fontFace, fontScale=fontScale, fontThickness=fontThickness) display_str = ["Near"] color = (255, 0, 0) if tilt1_deg < 0: display_str.append("tilt1:" + str(round(tilt1_deg, 2)) + "deg right") else: display_str.append("tilt1:" + str(round(tilt1_deg, 2)) + "deg left") end_x, end_y = draw_text(cv_rgb_tilt, display_str, color, start_x=x_left, start_y=end_y, fontFace=fontFace, fontScale=fontScale, fontThickness=fontThickness) ######################################## # curve に文字を描く ######################################## display_str = ["curve"] color = (255, 255, 255) end_x, end_y = draw_text(cv_rgb_ellipse, display_str, color, start_x=x_left, start_y=y_top, fontFace=fontFace, fontScale=fontScale, fontThickness=fontThickness) if is_meter_ellipse_success: display_str = [] # Far if angle2_deg < 0: display_str.append("angle2:" + str(round(angle2_deg, 2)) + "deg left") else: display_str.append("angle2:" + str(round(angle2_deg, 2)) + "deg right") display_str.append("r2:" + str(round(curve2_r, 2)) + "m") color = (0, 200, 200) end_x, end_y = draw_text(cv_rgb_ellipse, display_str, color, start_x=x_left, start_y=end_y, fontFace=fontFace, fontScale=fontScale, fontThickness=fontThickness) display_str = [] # Near if angle1_deg < 0: display_str.append("angle1:" + str(round(angle1_deg, 2)) + "deg left") else: display_str.append("angle1:" + str(round(angle1_deg, 2)) + "deg right") display_str.append("r1:" + str(round(curve1_r, 2)) + "m") color = (200, 0, 0) end_x, end_y = draw_text(cv_rgb_ellipse, display_str, color, start_x=x_left, start_y=end_y, fontFace=fontFace, fontScale=fontScale, fontThickness=fontThickness) # 画像を結合する panel_rgb_row2 = to_rgb(cv_bgr_ipm_white) panel_rgb_row2 = cv2.hconcat([panel_rgb_row2, cv_rgb_sliding_windows]) panel_rgb_row2 = cv2.hconcat([panel_rgb_row2, cv_rgb_tilt]) panel_rgb_row3 = cv_rgb_histogram panel_rgb_row3 = cv2.hconcat([panel_rgb_row3, cv_rgb_bin]) panel_rgb_row3 = cv2.hconcat([panel_rgb_row3, cv_rgb_ellipse]) panel_rgb_rows = cv2.vconcat([panel_rgb_row2, panel_rgb_row3]) return is_pass, \ to_bgr(panel_rgb_rows), to_bgr(panel_left_row1), to_bgr(cv_rgb), \ tilt1_deg,tilt2_deg,angle1_deg,angle2_deg,curve1_r,curve2_r, \ meters_from_center, \ tilt_deg
def overlay_on_image(frames, object_infos, LABELS): global map_flag ## try: color_image = frames if isinstance(object_infos, type(None)): return color_image # Show images height = color_image.shape[0] width = color_image.shape[1] entire_pixel = height * width img_cp = color_image.copy() #show inspection result if map_flag == "measure_finish": ## map_flag = "wait" ## heat_map = cv2.applyColorMap(np.uint8(255 * map_ref), cv2.COLORMAP_JET) ## heat_map = cv2.addWeighted(heat_map, 0.5, img_cp, 0.5, 2.2) ## cv2.imshow("Reference", heat_map) ## cv2.imwrite("Reference.jpg", heat_map) ## elif map_flag == "inspection_finish": ## map_flag = "wait" ## heat_map = cv2.applyColorMap( np.uint8(255 * np.abs(map_result - map_ref)), cv2.COLORMAP_JET) ## heat_map = cv2.addWeighted(heat_map, 0.5, img_cp, 0.5, 2.2) ## cv2.imshow("Result", heat_map) ## cv2.imwrite("Result.jpg", heat_map) ## for (object_info, LABEL) in zip(object_infos, LABELS): drawing_initial_flag = True for box_index in range(100): if object_info[box_index + 1] == 0.0: break base_index = box_index * 7 if (not np.isfinite(object_info[base_index]) or not np.isfinite(object_info[base_index + 1]) or not np.isfinite(object_info[base_index + 2]) or not np.isfinite(object_info[base_index + 3]) or not np.isfinite(object_info[base_index + 4]) or not np.isfinite(object_info[base_index + 5]) or not np.isfinite(object_info[base_index + 6])): continue object_info_overlay = object_info[base_index:base_index + 7] min_score_percent = 30 ## source_image_width = width source_image_height = height base_index = 0 class_id = object_info_overlay[base_index + 1] percentage = int(object_info_overlay[base_index + 2] * 100) if (percentage <= min_score_percent): continue box_left = int(object_info_overlay[base_index + 3] * source_image_width) box_top = int(object_info_overlay[base_index + 4] * source_image_height) box_right = int(object_info_overlay[base_index + 5] * source_image_width) box_bottom = int(object_info_overlay[base_index + 6] * source_image_height) label_text = LABEL[int(class_id)] + " (" + str( percentage) + "%)" box_color = (255, 128, 0) box_thickness = 1 cv2.rectangle(img_cp, (box_left, box_top), (box_right, box_bottom), box_color, box_thickness) if "person" in label_text: ## label_background_color = (0, 0, 255) ## heatmap(box_left, box_top, box_right, box_bottom) ## else: ## label_background_color = (125, 175, 75) ## label_text_color = (0, 0, 0) ## label_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0] label_left = box_left label_top = box_top - label_size[1] if (label_top < 1): label_top = 1 label_right = label_left + label_size[0] label_bottom = label_top + label_size[1] cv2.rectangle(img_cp, (label_left - 1, label_top - 1), (label_right + 1, label_bottom + 1), label_background_color, -1) cv2.putText(img_cp, label_text, (label_left, label_bottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_text_color, 1) cv2.putText(img_cp, fps, (width - 170, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38, 0, 255), 1, cv2.LINE_AA) cv2.putText(img_cp, detectfps, (width - 170, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38, 0, 255), 1, cv2.LINE_AA) cv2.putText(img_cp, message1, (width - 280, 45), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 38), 1, cv2.LINE_AA) ## cv2.putText(img_cp, message2, (width - 280, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 38), 1, cv2.LINE_AA) ## return img_cp except: import traceback traceback.print_exc()
heightFactor = frame.shape[0] / 300.0 widthFactor = frame.shape[1] / 300.0 # Scale object detection to frame xLeftBottom = int(widthFactor * xLeftBottom) yLeftBottom = int(heightFactor * yLeftBottom) xRightTop = int(widthFactor * xRightTop) yRightTop = int(heightFactor * yRightTop) # Draw location of object cv2.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop), (200, 0, 0), 2) # Draw label and confidence of prediction in frame resized if class_id in classNames: label = classNames[class_id] + ": " + str(confidence) labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) yLeftBottom = max(yLeftBottom, labelSize[1]) cv2.rectangle( frame, (xLeftBottom, yLeftBottom - labelSize[1]), (xLeftBottom + labelSize[0], yLeftBottom + baseLine), (200, 0, 0), cv2.FILLED) cv2.putText(frame, label, (xLeftBottom, yLeftBottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) curr_time = float(frame_no) / frameps minutes = int(curr_time / 60) seconds = curr_time % 60 ## Uncomment below lines to get warning if zero peopple in frame.
def main(): print("") print("##### YOLO OBJECT DETECTION FOR VIDEOS #####") print("") print("Loading the model") print("...") os.environ["CUDA_VISIBLE_DEVICES"]="0" device = torch.device('cuda') model = YOLOv1(int(args.split_size), int(args.num_boxes), int(args.num_classes)).to(device) num_param = sum(p.numel() for p in model.parameters() if p.requires_grad) print("Amount of YOLO parameters: " + str(num_param)) print("...") print("Loading model weights") print("...") weights = torch.load(args.weights) model.load_state_dict(weights["state_dict"]) model.eval() # Transform is applied to the input frames # It resizes the image and converts it into a tensor transform = transforms.Compose([ transforms.Resize((448,448), Image.NEAREST), transforms.ToTensor(), ]) print("Loading input video file") print("...") vs = cv2.VideoCapture(args.input) frame_width = int(vs.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(vs.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Defining the output video file out = cv2.VideoWriter(args.output, cv2.VideoWriter_fourcc(*"mp4v"), 30, (frame_width, frame_height)) # Used to scale the bounding box predictions to the original input frame # (448 is the dimension of the input image for the model) ratio_x = frame_width/448 ratio_y = frame_height/448 idx = 1 # Used to track how many frames have been already processed sum_fps = 0 # Used to track the average FPS at the end amount_frames = int(vs.get(cv2.CAP_PROP_FRAME_COUNT)) # Amount of frames while True: grabbed, frame = vs.read() if not grabbed: break # Logging the amount of processed frames print("Loading frame " + str(idx) + " out of " + str(amount_frames)) print("Percentage done: {0:.0%}".format(idx/amount_frames)) print("") idx += 1 # Frame index img = Image.fromarray(frame) img_tensor = transform(img).unsqueeze(0).to(device) img = cv2.UMat(frame) with torch.no_grad(): start_time = time.time() output = model(img_tensor) # Makes a prediction on the input frame curr_fps = int(1.0 / (time.time() - start_time)) # Prediction FPS sum_fps += curr_fps print("FPS for YOLO prediction: " + str(curr_fps)) print("") # Extracts the class index with the highest confidence scores corr_class = torch.argmax(output[0,:,:,10:23], dim=2) for cell_h in range(output.shape[1]): for cell_w in range(output.shape[2]): # Determines the best bounding box prediction best_box = 0 max_conf = 0 for box in range(int(args.num_boxes)): if output[0, cell_h, cell_w, box*5] > max_conf: best_box = box max_conf = output[0, cell_h, cell_w, box*5] # Checks if the confidence score is above the specified threshold if output[0, cell_h, cell_w, best_box*5] >= float(args.threshold): # Extracts the box confidence score, the box coordinates and class confidence_score = output[0, cell_h, cell_w, best_box*5] center_box = output[0, cell_h, cell_w, best_box*5+1:best_box*5+5] best_class = corr_class[cell_h, cell_w] # Transforms the box coordinates into pixel coordinates centre_x = center_box[0]*32 + 32*cell_w centre_y = center_box[1]*32 + 32*cell_h width = center_box[2] * 448 height = center_box[3] * 448 # Calculates the corner values of the bounding box x1 = int((centre_x - width/2) * ratio_x) y1 = int((centre_y - height/2) * ratio_y) x2 = int((centre_x + width/2) * ratio_x) y2 = int((centre_y + height/2) * ratio_y) # Draws the bounding box with the corresponding class color # around the object cv2.rectangle(img, (x1,y1), (x2,y2), category_color[best_class], 1) # Generates the background for the text, painted in the corresponding # class color and the text with the class label including the # confidence score labelsize = cv2.getTextSize(category_list[best_class], cv2.FONT_HERSHEY_DUPLEX, 0.5, 1) cv2.rectangle(img, (x1, y1-20), (x1+labelsize[0][0]+45,y1), category_color[best_class], -1) cv2.putText(img, category_list[best_class] + " " + str(int(confidence_score.item()*100)) + "%", (x1,y1-5), cv2.FONT_HERSHEY_DUPLEX , 0.5, (0,0,0), 1, cv2.LINE_AA) # Generates a small window in the top left corner which # displays the current FPS for the prediction cv2.putText(img, str(curr_fps) + "FPS", (25, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2) out.write(img) # Stores the frame with the predictions on a new mp4 file print("Average FPS was: " + str(int(sum_fps / amount_frames))) print("")
def get_text_size_wh(self, txt): ((txt_w, txt_h), _) = cv2.getTextSize( txt, self.font_face, self.font_scale, self.font_line_thickness ) return txt_w, txt_h
def calc_visualize(func_image, objects): if len(objects) == 0: map2d = np.ones([max_y, max_x,3],dtype=np.int8) return func_image, map2d # Create Colormap cmap = LinearSegmentedColormap.from_list("", ["red","yellow","green"]) # Create empty Map and combined image map2d = np.ones([max_y, max_x,3],dtype=np.int8) offset = 20 combined = np.zeros([1080,1920+max_x+offset,3], np.uint8) # Transform x,y coordinates (adapt to camera angle based on calculated homography) objects_x_y_transformed = np.apply_along_axis(toworld, 1, objects[:,0:2]) objects = np.column_stack((objects, objects_x_y_transformed)) #x,y,x1,y1,x2,y2,map_x,map_y # Get distances for transformed coordinates for all objects using KD-Tree - set distance to 255 if distance > threshold tree = cKDTree(objects_x_y_transformed) t_dst = tree.sparse_distance_matrix(tree, max_distance_detection) t_dst = t_dst.toarray() t_dst = np.array(t_dst, dtype=np.int32) t_dst2 = t_dst.copy() t_dst2[np.where(t_dst2==0)]=255 objects = np.column_stack((objects,np.min(t_dst2,1))) #x,y,x1,y1,x2,y2,map_x,map_y,distance -> get minimum distance to another object for each object (to draw bounding boxes and points) # Create distance lines near_pairs = np.column_stack((np.argwhere(t_dst > 0),t_dst[np.nonzero(t_dst)])) # Get coordinates for drawing lines if len(near_pairs) > 0: near_pairs = np.apply_along_axis(get_line_coordinates, 1, near_pairs, objects) # Draw object bounding boxes, colored based on minimum distance to another person for object_ in objects: norm = matplotlib.colors.Normalize(vmin=0, vmax=max_distance_detection, clip=True) color = np.array(cmap(norm(object_[8]))[0:3])*255 color = (color[2],color[1],color[0]) if int(object_[8]) < 255: cv2.putText(func_image, str(int(object_[8])), (object_[0],object_[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2, cv2.LINE_AA) cv2.rectangle(func_image, (int(object_[2]),int(object_[3])), (int(object_[4]),int(object_[5])), color, 2) #x1,y1,x2,y2,color,linestrength cv2.circle(map2d, (int(object_[6]),int(object_[7])), 10, color, -1) # Draw lines between objects, colored based on distance for line_ in near_pairs: norm = matplotlib.colors.Normalize(vmin=0, vmax=max_distance_detection, clip=True) color = np.array(cmap(norm(line_[8]))[0:3])*255 color = (color[2],color[1],color[0]) text_pt_x = int((int(line_[0])+int(line_[4])) / 2) text_pt_y = int((int(line_[1])+int(line_[5])) / 2) cv2.putText(func_image, str(int(line_[8])), (text_pt_x,text_pt_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2, cv2.LINE_AA) cv2.line(func_image,(int(line_[0]),int(line_[1])),(int(line_[4]),int(line_[5])),color,2) text_pt_x_map = int((int(line_[2])+int(line_[6])) / 2) text_pt_y_map = int((int(line_[3])+int(line_[7])) / 2) cv2.putText(map2d, str(int(line_[8])), (text_pt_x_map,text_pt_y_map), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2, cv2.LINE_AA) cv2.line(map2d,(int(line_[2]),int(line_[3])),(int(line_[6]),int(line_[7])),color,2) # Detect and draw crowds for image (based on transformed coordinates) crowd_data = crowd_detection(objects, max_distance_detection_crowd, 'image', min_crowd_size) crowd_data = crowd_suppression(crowd_data) for crowd in crowd_data: border_offset=3 (label_width, label_height), baseline = cv2.getTextSize('Crowdsize: X', cv2.FONT_HERSHEY_DUPLEX, 0.6, 1) cv2.rectangle(func_image,(crowd[0],crowd[1]),(crowd[0]+label_width+10,crowd[1]-label_height-border_offset-10),(255,0,0),-1) cv2.putText(func_image, 'Crowdsize: {}'.format(crowd[4]), (crowd[0]+5, crowd[1]-border_offset-5), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 255, 255), 1, cv2.LINE_AA) cv2.rectangle(func_image, (int(crowd[0]),int(crowd[1])), (int(crowd[2]),int(crowd[3])), (255,0,0), 2) # Detect and draw crowds for map (based on transformed coordinates) crowd_data = crowd_detection(objects, max_distance_detection_crowd, 'map', min_crowd_size) crowd_data = crowd_suppression(crowd_data) for crowd in crowd_data: border_offset=3 (label_width, label_height), baseline = cv2.getTextSize('Crowdsize: X', cv2.FONT_HERSHEY_DUPLEX, 0.6, 1) cv2.rectangle(map2d,(crowd[0],crowd[1]),(crowd[0]+label_width+10,crowd[1]-label_height-border_offset-10),(255,0,0),-1) cv2.putText(map2d, 'Crowdsize: {}'.format(crowd[4]), (crowd[0]+5, crowd[1]-border_offset-5), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 255, 255), 1, cv2.LINE_AA) cv2.rectangle(map2d, (int(crowd[0]),int(crowd[1])), (int(crowd[2]),int(crowd[3])), (255,255,255), 2) return func_image, map2d
def analyze_with_annotation(xml, csv, img, out_dir, det=False): #xml = Annotation file item_idx = 0 fname = os.path.basename(os.path.splitext(xml)[0]) analyze = parse_xml.parcingXml(xml) analyze = np.array(analyze) csv_arr = parse_xml.parcingCsv(csv) for i in range(len(analyze[0])): file_data = OrderedDict() file_data["facilities"] = [] xmin = int(analyze[0][i]) xmax = int(analyze[1][i]) ymin = int(analyze[2][i]) ymax = int(analyze[3][i]) object_class = analyze[4][i] csv_copy = copy.deepcopy(csv_arr) csv_crop = csv_copy[ymin:ymax, xmin:xmax] csv_flat = csv_crop.flatten() csv_flat = np.round_(csv_flat, 1) temp_min = csv_flat.min() temp_max = csv_flat.max() temp_average = np.average(csv_flat) temp_average = np.round_(temp_average, 1) # find heating points csv_copy = copy.deepcopy(csv_arr) csv_crop = csv_copy[ymin:ymax, xmin:xmax] thresh = np.percentile(csv_crop, 75) thresh_arr = np.zeros((len(csv_crop), len(csv_crop[0])), dtype=np.uint8) thresh_arr = np.where(csv_crop[:, :] < thresh, 0, 255) thresh_arr = np.array(thresh_arr, dtype=np.uint8) hp_contour, _ = cv2.findContours(thresh_arr, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) # find reflection points csv_copy = copy.deepcopy(csv_arr) csv_crop = csv_copy[ymin:ymax, xmin:xmax] CRITICAL_GRAD = 0.4 thresh = np.percentile(csv_crop, 75) thresh_arr = np.zeros((len(csv_crop), len(csv_crop[0])), dtype=np.uint8) thresh_arr = np.where(csv_crop[:, :] < thresh, 0, 255) thresh_arr = np.array(thresh_arr, dtype=np.uint8) height, width = thresh_arr.shape suspected_points = [] for i in range(height): for j in range(width): if thresh_arr[i][j] != 0: temp = calculateMaxSubmission(i, j, csv_crop) if temp > CRITICAL_GRAD: suspected_points.append([j, i]) masking_img = np.zeros((height, width, 3), dtype=np.uint8) for pts in suspected_points: xy = np.array(pts) cv2.circle(masking_img, (xy[0], xy[1]), 3, (255, 255, 255), -1) masking_img = masking_img[:, :, 0] masking_img = masking_img.astype(np.uint8) rp_contour, heirachy = cv2.findContours(masking_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) # object_data = writeJason(xmin, ymin, xmax, ymax, temp_min, temp_max, temp_average, object_class, hp_contour, rp_contour) json_data = {} json_data["xmin"] = xmin json_data["ymin"] = ymin json_data["xmax"] = xmax json_data["ymax"] = ymax json_data["tmin"] = temp_min json_data["tmax"] = temp_max json_data["tmean"] = temp_average json_data["class"] = object_class json_data["hp_counter"] = hp_contour json_data["rp_counter"] = rp_contour #rule-base analysis rule = DiagnosisRule("./data/diagnosis_rule.json") diag_result = rule.diagnose(object_class, temp_max) json_data["DiagnosisCode"] = diag_result["code"] json_data["Cause of Failure"] = diag_result["cause"] json_data["Diagnosis"] = diag_result["action"] json_data["Over temperature"] = diag_result["Over Temperature"] json_data["FacilityName"] = diag_result["name"] json_data["Limit Temperature"] = diag_result["Limit Temperature"] json_data["FileName"] = fname + '.jpg' json_data["PointTemperature"] = json_data["tmax"] if diag_result["Over Temperature"] > 0: json_data[ "deltaT"] = json_data["tmax"] / json_data["Limit Temperature"] json_data["deltaT"] = round(json_data["deltaT"], 2) file_data["facilities"].append(writeJson2(json_data)) with open(os.path.join(out_dir, (fname + '_{0}'.format(item_idx) + '.json')), 'w', encoding='utf-8') as make_file: json.dump(file_data, make_file, indent="\t", ensure_ascii=False) # create image img_original = cv2.imread(img) cv2.rectangle(img_original, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) textLabel = object_class (retval, baseLine) = cv2.getTextSize(textLabel, cv2.FONT_HERSHEY_COMPLEX, 1, 1) textOrg = (xmin, ymin) cv2.rectangle(img_original, (textOrg[0] - 5, textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (0, 0, 0), 2) cv2.rectangle(img_original, (textOrg[0] - 5, textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (255, 255, 255), -1) cv2.putText(img_original, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1) png_name = fname + '_{0}'.format(item_idx) + '.png' cv2.imwrite(os.path.join(out_dir, png_name), img_original) item_idx += 1
result_lm = model_lm(torch.from_numpy(img)) result_lm = np.array(result_lm) result_lm = result_lm * (0.19 * h) result_lm = result_lm.reshape(68, 2) result_lm[:, 0] += x + (0.28 * h) result_lm[:, 1] += y + (0.49 * w) _, maximum = torch.max(result.data, 1) pred = maximum.item() # displaying results based on classification if pred == 0: cv2.circle(frm, (keypoints['left_eye']), 2, yellow, 2) cv2.circle(frm, (keypoints['right_eye']), 2, yellow, 2) cv2.circle(frm, (keypoints['nose']), 2, yellow, 2) cv2.circle(frm, (keypoints['mouth_left']), 2, yellow, 2) cv2.circle(frm, (keypoints['mouth_right']), 2, yellow, 2) (lw, lh), bl = cv2.getTextSize("Correctly Masked", f, s, t) cv2.putText(frm, "Correctly Masked", ((int( ((w + x) - x - lw) / 2) + x), y - 10), f, s, green, t) cv2.rectangle( frm, (x, y), (x + w, y + h), green, 2) # green colour rectangle if mask is worn correctly elif pred == 1: cv2.circle(frm, (keypoints['left_eye']), 2, yellow, 2) cv2.circle(frm, (keypoints['right_eye']), 2, yellow, 2) cv2.circle(frm, (keypoints['nose']), 2, yellow, 2) cv2.circle(frm, (keypoints['mouth_left']), 2, yellow, 2) cv2.circle(frm, (keypoints['mouth_right']), 2, yellow, 2) (lw, lh), bl = cv2.getTextSize("Unmasked", f, s, t) cv2.putText(frm, "Unmasked", ((int( ((w + x) - x - lw) / 2) + x), y - 10), f, s, red, t) cv2.rectangle(
def main(): # load configs and set random seed configs = json.load(open('./configs/fer2013_config.json')) image_size = (configs['image_size'], configs['image_size']) # model = densenet121(in_channels=3, num_classes=7) #model = alexnet(in_channels=3, num_classes=7) model = resmasking_dropout1(in_channels=3, num_classes=7) model.cuda() # state = torch.load('./saved/checkpoints/densenet121_rot30_2019Nov11_14.23') state = torch.load('./saved/checkpoints/resmasking_dropout1__demo_part') #state = torch.load('./saved/checkpoints/resmasking_dropout1__demo_whole') #state = torch.load('./saved/checkpoints/Z_resmasking_dropout1_rot30_2019Nov30_13.32') model.load_state_dict(state['net']) model.eval() #vid = cv2.VideoCapture(0) vid = cv2.VideoCapture('video/test.mp4') # cv2.namedWindow('disp') # cv2.resizeWindow('disp', width=800) with torch.no_grad(): while True: ret, frame = vid.read() if frame is None or ret is not True: continue try: frame = np.fliplr(frame).astype(np.uint8) # frame += 50 h, w = frame.shape[:2] gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # gray = frame blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0)) net.setInput(blob) faces = net.forward() for i in range(0, faces.shape[2]): confidence = faces[0, 0, i, 2] if confidence < 0.5: continue box = faces[0, 0, i, 3:7] * np.array([w, h, w, h]) start_x, start_y, end_x, end_y = box.astype("int") #covnert to square images center_x, center_y = (start_x + end_x) // 2, (start_y + end_y) // 2 square_length = ((end_x - start_x) + (end_y - start_y)) // 2 // 2 square_length *= 1.1 start_x = int(center_x - square_length) start_y = int(center_y - square_length) end_x = int(center_x + square_length) end_y = int(center_y + square_length) cv2.rectangle(frame, (start_x, start_y), (end_x, end_y), (0, 255, 255), 4) # cv2.rectangle(frame , (x, y), (x + w, y + h), (179, 255, 179), 2) # face = gray[y:y + h, x:x + w] face = gray[start_y:end_y, start_x:end_x] face = ensure_color(face) face = cv2.resize(face, image_size) face = transform(face).cuda() face = torch.unsqueeze(face, dim=0) output = torch.squeeze(model(face), 0) proba = torch.softmax(output, 0) # emo_idx = torch.argmax(proba, dim=0).item() emo_proba, emo_idx = torch.max(proba, dim=0) emo_idx = emo_idx.item() emo_proba = emo_proba.item() emo_label = FER_2013_EMO_DICT[emo_idx] label_size, base_line = cv2.getTextSize( '{}: 000'.format(emo_label), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2) cv2.rectangle( frame, (end_x, start_y + 1 - label_size[1]), (end_x + label_size[0], start_y + 1 + base_line), (0, 255, 255), cv2.FILLED) cv2.putText( frame, '{} {}'.format(emo_label, int(emo_proba * 100)), (end_x, start_y + 1), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (150, 10, 10), 2) cv2.imshow('disp', frame) # cv2.imshow('disp', np.concatenate((gray ), axis=1)) if cv2.waitKey(1) == ord('q'): break except: continue cv2.destroyAllWindows()
# Load the saved prediction model and predict the digits pred = model.predict(ab.reshape(1, 28, 28, 1), batch_size=1) ans = pred.argmax() if ans == 0: ind = np.argsort(pred) ans = ind[1] if ans == 7: if (ww/22) < 0.5: ans = 1 if ans == 1: if (ww/22) > 0.55: ans = 7 put[(i // 9)][(i % 9)] = ans print(put) pfix = np.array(put) anss = get_ans(put) print(anss) ww = imw.shape[0] // 9 hh = imw.shape[1] // 9 for i in range(9): for j in range(9): if pfix[i][j] != 0: continue asize = cv2.getTextSize(str(anss[i][j]),cv2.FONT_HERSHEY_SIMPLEX,1,2)[0] xx = (hh-asize[0])//2 yy = (ww+asize[1])//2 imw = cv2.putText(imw,str(anss[i][j]),(hh*j+xx,ww*i+yy),cv2.FONT_HERSHEY_SIMPLEX,1,(0,198,0),2) cv2.imshow("read",imw) cv2.waitKey(0)