def predict(model, dataloader=None, image=None): model.eval() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if image is not None: image = image[0].to(device=device) start_time = time.time() with torch.no_grad(): outputs = model(image) outputs = non_max_suppression(outputs, conf_thres=0.5, nms_thres=0.2) elapsed_time = time.time() - start_time if outputs[0] is not None: boxes = outputs[0][:, 0:4] boxes = resize_boxes(boxes, (416, 416), (256, 256)) pred_x = [] pred_y = [] for box in boxes: x0, y0, x1, y1 = box x = ((x0 + x1) / 2).tolist() y = ((y0 + y1) / 2).tolist() pred_x.append(x) pred_y.append(y) image = Image.fromarray( image.cpu().numpy()[0, 0, :, :]).convert("RGB").resize((256, 256)) return image, pred_x, pred_y for i, (image, targets) in enumerate(dataloader): image = image[0].to(device=device) name = targets["name"][0] start_time = time.time() with torch.no_grad(): outputs = model(image) outputs = non_max_suppression(outputs, conf_thres=0.5, nms_thres=0.2) elapsed_time = time.time() - start_time if outputs[0] is not None: boxes = outputs[0][:, 0:4] boxes = resize_boxes(boxes, (416, 416), (256, 256)) else: continue image_copy = Image.fromarray(image.cpu().numpy()[0, 0, :, :]).resize( (256, 256)) if image_copy.mode != "RGB": image_copy = image_copy.convert("RGB") draw = ImageDraw.Draw(image_copy) for box in boxes: x0, y0, x1, y1 = box draw.rectangle([(x0, y0), (x1, y1)], outline=(255, 0, 255)) # image_copy.show() # image_copy.save(os.path.join(images_path, f"yolo_v3/{attempt}/images/{name}.png")) print(f"{name}, time: {elapsed_time}") plt.imshow(image_copy) plt.show() break
def _detect(self, image) -> List[Tuple[int]]: ori_image_height, ori_image_width = np.array(image).shape[:2] image = self._detection_image_transform(image) image = image.to(self.device) image = image.unsqueeze(0) with torch.no_grad(): outputs = self.model_detect(image) outputs = non_max_suppression(outputs, conf_thres=self.args.conf_thres, nms_thres=self.args.nms_thres) bounding_boxes = [] for detections in outputs: if detections is not None: # Rescale boxes to original image detections = rescale_boxes(detections, self.args.image_size, (ori_image_height, ori_image_width)) for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: if cls_pred.item() in (2, 7): # 2 == car, 7 == truck bounding_box = [x1, y1, x2, y2] bounding_box = tuple(int(v.item()) for v in bounding_box) bounding_boxes.append(bounding_box) return bounding_boxes
def detect(self, image): clone = image.copy() image = rgb2gray(image) # list to store the detections detections = [] # current scale of the image downscale_power = 0 # downscale the image and iterate for im_scaled in pyramid(image, downscale=self.downscale, min_size=self.window_size): # if the width or height of the scaled image is less than # the width or height of the window, then end the iterations if im_scaled.shape[0] < self.window_size[1] or im_scaled.shape[ 1] < self.window_size[0]: break for (x, y, im_window) in sliding_window(im_scaled, self.window_step_size, self.window_size): if im_window.shape[0] != self.window_size[ 1] or im_window.shape[1] != self.window_size[0]: continue # calculate the HOG features feature_vector = hog(im_window) X = np.array([feature_vector]) prediction = self.clf.predict(X) if prediction == 1: x1 = int(x * (self.downscale**downscale_power)) y1 = int(y * (self.downscale**downscale_power)) detections.append( (x1, y1, x1 + int(self.window_size[0] * (self.downscale**downscale_power)), y1 + int(self.window_size[1] * (self.downscale**downscale_power)))) # Move the the next scale downscale_power += 1 # Display the results before performing NMS clone_before_nms = clone.copy() for (x1, y1, x2, y2) in detections: # Draw the detections cv2.rectangle(clone_before_nms, (x1, y1), (x2, y2), (0, 255, 0), thickness=2) # Perform Non Maxima Suppression detections = non_max_suppression(np.array(detections), self.threshold) clone_after_nms = clone # Display the results after performing NMS for (x1, y1, x2, y2) in detections: # Draw the detections cv2.rectangle(clone_after_nms, (x1, y1), (x2, y2), (0, 255, 0), thickness=2) return clone_before_nms, clone_after_nms
def main(input_path, DEBUG): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) classes = load_coco_names(FLAGS.class_names) frozenGraph = load_graph(FLAGS.frozen_model) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) boxes_list = [] with tf.Session(graph=frozenGraph, config=config) as sess: for item in input_path: start = clock() FLAGS.input_img = item img = Image.open(FLAGS.input_img) img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) boxes_list.append(filtered_boxes) if DEBUG: draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True) print(filtered_boxes) print("Execution Time : {} / #Symbols : {} / Path : {}".format( clock() - start, len(filtered_boxes), item)) sess.close() tf.reset_default_graph() return boxes_list, classes, FLAGS.size
def detect(self, image, output_img='out'): img = Image.open(image) img_resized = img.resize(size=(self.size[0], self.size[1])) timer = Timer() timer.tic() detected_boxes = self.sess.run( self.boxes, feed_dict={self.inputs: [np.array(img_resized, dtype=np.float32)]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=self.conf_threshold, iou_threshold=self.iou_threshold) timer.toc() total_t = timer.total_time print('Detection took {:.3f}s'.format(total_t)) draw_boxes(filtered_boxes, img, self.classes, (self.size[0], self.size[1])) img.save((output_img if self.tiny else output_img + '-tiny') + '.jpg')
def draw_and_show(self, detected_boxes, pil_im): filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) self.draw_boxes_and_objects(filtered_boxes, pil_im, classes, (FLAGS.size, FLAGS.size), True) img = np.array(pil_im) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) #self.writeVideo2.write(img) if len(self.personBox) > 0: y1 = int(self.personBox[0]) y2 = int(self.personBox[1]) x1 = int(self.personBox[2]) x2 = int(self.personBox[3]) if y1 > 0 and y2 > 0 and x1 > 0 and x2 > 0: print(x1, x2, y1, y2) #img[y2:x2, y1:x1] = cv2.blur(img[y2:x2, y1:x1], (23, 23)) y = 460 x = 1020 h = 50 w = 50 #img[y:y + h, x:x + w] = cv2.blur(img[y:y + h, x:x + w], (23, 23)) cv2.imshow('CSI Camera', img) if self.count % 5 == 0: self.writeVideo.write(img)
def detection(path): image = Image.open(path) img_resized = utils.letter_box_image(image, input_size, input_size, 128) img_resized = img_resized.astype(np.float32) boxes, inputs = utils.get_boxes_and_inputs_pb(frozenGraph) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = utils.non_max_suppression(detected_boxes, confidence_threshold=conf_threshold, iou_threshold=iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) if filtered_boxes: # if len(filtered_boxes[0][:]) == 1: img, region, score, box = utils.draw_boxes(filtered_boxes, image, classes, (input_size, input_size), True) # box = np.array(box) # print(box) if score > 0.90: person_image_height = box[0][3] - box[0][1] # region.save(out_image) print(person_image_height) # 计算当前用户身高 # 可根据参照物(本例采用椅子作为参照物,其实际高度为96cm,在固定距离下该参照物在图像中像素值为230)实际高度与图像高度像素, # 获取人物图像像素高度。具体调参需在具体环境下进行调参 # 此方法存在较大的误差,故结果仅供趣味输出,追求准确仍需具体输入准确值 person_height = (person_image_height * 96) / 230 print("person_height: %.2fcm \n" % (person_height))
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) img = Image.open(FLAGS.input_img) img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) classes = load_coco_names(FLAGS.class_names) if FLAGS.frozen_model: t0 = time.time() frozenGraph = load_graph(FLAGS.frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) #print(frozenGraph.inputs) #print(frozenGraph.outputs) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) with tf.Session(graph=frozenGraph, config=config) as sess: t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) else: if FLAGS.tiny: model = yolo_v3_tiny.yolo_v3_tiny elif FLAGS.spp: model = yolo_v3.yolo_v3_spp else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size, FLAGS.data_format) saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) with tf.Session(config=config) as sess: t0 = time.time() saver.restore(sess, FLAGS.ckpt_file) print('Model restored in {:.2f}s'.format(time.time() - t0)) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True) img.save(FLAGS.output_img)
def __call__(self, sample): preds, labels = sample if not isinstance(preds, np.ndarray): preds = np.array(preds) filtered_boxes = non_max_suppression(preds, self.conf_threshold, self.iou_threshold) det_boxes = [] det_scores = [] det_classes = [] for cls, bboxs in filtered_boxes.items(): det_classes.extend([LABEL_MAP[cls + 1]] * len(bboxs)) for box, score in bboxs: rect_pos = box.tolist() y_min, x_min = rect_pos[1], rect_pos[0] y_max, x_max = rect_pos[3], rect_pos[2] height, width = 416, 416 det_boxes.append( [y_min / height, x_min / width, y_max / height, x_max / width]) det_scores.append(score) if len(det_boxes) == 0: det_boxes = np.zeros((0, 4)) det_scores = np.zeros((0, )) det_classes = np.zeros((0, )) return [np.array([det_boxes]), np.array([det_scores]), np.array([det_classes])], labels
def main(): model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE) optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) loss_fn = YoloLoss() train_dataset = VOCDataset("data/train.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR) test_dataset = VOCDataset("data/test.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR) train_loader=DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=1, pin_memory=PIN_MEMORY, shuffle=True,drop_last=True) test_loader=DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, num_workers=1, pin_memory=PIN_MEMORY, shuffle=True,drop_last=True) for epoch in range(EPOCHS): pred_boxes, target_boxes = get_bboxes(train_loader, model, iou_threshold=0.5, threshold=0.4) mAP = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5) print(f"Train mAP:{mAP}") train_fn(train_loader, model, optimizer, loss_fn) if epoch > 99: for x, y in test_loader: x = x.to(DEVICE) for idx in range(16): bboxes = cellboxes_to_boxes(model(x)) bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4) plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes) if __name__ == "__main__": main()
def post_process(self, outputs): """ Transforms raw output into boxes, confs, classes Applies NMS thresholding on bounding boxes and confs Parameters: output: raw output tensor Returns: boxes: x1,y1,x2,y2 tensor (dets, 4) confs: class * obj prob tensor (dets, 1) classes: class type tensor (dets, 1) """ z = [] for i in range(len(outputs)): outputs[i] = outputs[i].to(self.device) if self.grid[i].shape[2:4] != outputs[i].shape[2:4]: _, _, height, width, _ = outputs[i].shape self.grid[i] = self._make_grid(width, height).to(outputs[i].device) y = outputs[i].sigmoid() y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to( outputs[i].device)) * self.strides[i] # xy y[..., 2:4] = (y[..., 2:4] * 2)**2 * self.anchor_grid[i] # wh z.append(y.view(1, -1, self.no)) pred = torch.cat(z, 1) pred = non_max_suppression(pred, conf_thres=self.conf_thresh, iou_thres=self.iou_thresh) return pred
def main(argv=None): # GPU配置 # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) # config = tf.ConfigProto(gpu_options=gpu_options,log_device_placement=False,) # 类别、视频或图像输入 classes = load_coco_names(args.class_names) vid = cv2.VideoCapture(args.input_video) video_frame_cnt = int(vid.get(7)) # AVI:10148 RSTP: 中无总帧数属性 视频文件中的帧数 timeF = 10 # 分帧率 130ms配合2 fpsnum = int(vid.get(1)) # 基于以0开始的被捕获或解码的帧索引 if (fpsnum % timeF == 0): for i in range(video_frame_cnt): ret, img_ori = vid.read() # 图像填充 img_ori = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB) img_ori = Image.fromarray(img_ori) # CV2图片转PIL img_resized = letter_box_image(img_ori,img_ori.size[1], img_ori.size[0], args.size, args.size, 128) img_resized = img_resized.astype(np.float32) # 图像插值 # img = cv2.resize(img_ori, (args.size, args.size)) # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # cv2默认为bgr顺序 # img_resized = np.asarray(img, np.float32) # 编码方式1 # scipy.misc.imsave(args.temp_img, img_resized) # _, jpeg_bytes = base64_encode_img(args.temp_img) # 编码方式2 img_encode = cv2.imencode('.jpg', img_resized)[1] data_encode = np.array(img_encode) jpeg_bytes = data_encode.tostring() start_time = time.time() # 服务器通讯配置 channel = grpc.insecure_channel(args.server) stub = prediction_service_pb2.PredictionServiceStub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = 'yolov3_2' request.model_spec.signature_name = 'predict_images' # 等待服务器答复 request.inputs['images'].CopyFrom(tf.contrib.util.make_tensor_proto(jpeg_bytes, shape=[1])) response = stub.Predict(request, 10.0) # 对返回值进行操作 results = {} for key in response.outputs: tensor_proto = response.outputs[key] nd_array = tf.contrib.util.make_ndarray(tensor_proto) results[key] = nd_array detected_boxes = results['scores'] # nms计算 filtered_boxes = non_max_suppression(detected_boxes,confidence_threshold=args.conf_threshold,iou_threshold=args.iou_threshold) end_time = time.time() difference_time = end_time - start_time # 网络运行时间 # 画图 draw_boxes(filtered_boxes, img_ori, classes, (args.size, args.size), True) # 输出图像 cv2charimg = cv2.cvtColor(np.array(img_ori), cv2.COLOR_RGB2BGR) # PIL图片转cv2 图片 cv2.putText(cv2charimg, '{:.2f}ms'.format((difference_time) * 1000), (40, 40), 0, fontScale=1, color=(0, 255, 0), thickness=2) cv2.imshow('image', cv2charimg) if cv2.waitKey(1) & 0xFF == ord('q'): # 视频退出 break
def show_camera(sess, boxes, inputs): # To flip the image, modify the flip_method parameter (0 and 2 are the most common) classes = load_coco_names(FLAGS.class_names) print(gstreamer_pipeline(flip_method=0)) cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=0), cv2.CAP_GSTREAMER) if cap.isOpened(): window_handle = cv2.namedWindow('CSI Camera', cv2.WINDOW_AUTOSIZE) while cv2.getWindowProperty('CSI Camera', 0) >= 0: ret_val, img = cap.read() cv2_im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im) img_resized = letter_box_image(pil_im, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) draw_boxes(filtered_boxes, pil_im, classes, (FLAGS.size, FLAGS.size), True) img = np.array(pil_im) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) cv2.imshow('CSI Camera', img) keyCode = cv2.waitKey(30) & 0xff if keyCode == 27: break cap.release() cv2.destroyAllWindows() else: print('Unable to open camera')
def run_detection(img): global interpreter, inputs, output_details input_size = 416 interpreter.set_tensor(inputs, img) interpreter.invoke() try: pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] except: return [0, 0, 0, 0] # boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) boxes, pred_conf = pred[1], pred[0] # scores_max = tf.math.reduce_max(pred_conf[0], axis=-1) # valid_indices,selected_scores = tf.image.non_max_suppression_with_scores( # boxes=boxes[0], # scores=scores_max, # max_output_size=100, # iou_threshold=0.45, # score_threshold=0.25, # soft_nms_sigma=0.0 # ) # boxes = tf.gather(boxes[0],valid_indices) # scores = tf.gather(pred_conf[0],valid_indices) # classes = tf.math.argmax(scores,1) #scores = tf.gather(scores_max,valid_indices) boxes, scores, classes = non_max_suppression(boxes[0], pred_conf[0]) valid_detections = boxes.shape[0] pred_bbox = [boxes, scores, classes, valid_detections] return pred_bbox
def random_image(self, height, width): """Creates random specifications of an image with multiple shapes. Returns the background color of the image and a list of shape specifications that can be used to draw the image. 创建具有多种形状的图像的随机规格。 返回图像的背景颜色和形状列表 可用于绘制图像的规格。 """ # Pick random background color bg_color = np.array([random.randint(0, 255) for _ in range(3)]) # Generate a few random shapes and record their # bounding boxes # 生成几个随机形状并记录它们边界框 shapes = [] boxes = [] N = random.randint(1, 4) for _ in range(N): shape, color, dims = self.random_shape(height, width) shapes.append((shape, color, dims)) x, y, s = dims boxes.append([y - s, x - s, y + s, x + s]) # Apply non-max suppression wit 0.3 threshold to avoid # shapes covering each other keep_ixs = utils.non_max_suppression(np.array(boxes), np.arange(N), 0.3) shapes = [s for i, s in enumerate(shapes) if i in keep_ixs] return bg_color, shapes
def do_test(model, images_path, labels_path, batch_size=32, progress_callback=None): size = 416 t = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) data = DataLoader(YoloDataset(images_path, labels_path, t, size), batch_size=batch_size, shuffle=True, num_workers=mp.cpu_count()) count = [0] * 80 correct = [0] * 80 with torch.no_grad(): for i, (local_batch, local_labels) in enumerate(data): local_batch = local_batch.to(device) outputs = model(local_batch) detections = yolo.YoloV3.get_detections(outputs) for j, detections in enumerate(utils.parse_detections(detections)): detections = utils.non_max_suppression( detections, confidence_threshold=0.2) for target in utils.parse_labels(local_labels[0][j], size): count[target['coco_idx']] += 1 for det in [ det for det in detections if det.coco_idx == target['coco_idx'] ]: if utils.iou(target['bb'], det.bb) >= 0.5: correct[target['coco_idx']] += 1 break psum = 0 for j in range(80): if count[j] == 0: psum = -80 break p = correct[j] / count[j] psum += p if progress_callback: progress_callback(model, batch_number=(i + 1), batch_count=len(data), map_score=psum / 80) return psum / 80
def ensemble_prediction(model, config, image): """ Test time augmentation method using non-maximum supression""" masks = [] scores = [] boxes = [] results = {} result = model.detect([image], verbose=0, mask_threshold=config.DETECTION_MASK_THRESHOLD)[0] masks.append(result['masks']) scores.append(result['scores']) boxes.append(utils.extract_bboxes(result['masks'])) temp_img = np.fliplr(image) result = model.detect([temp_img], verbose=0, mask_threshold=config.DETECTION_MASK_THRESHOLD)[0] mask = np.fliplr(result['masks']) masks.append(mask) scores.append(result['scores']) boxes.append(utils.extract_bboxes(mask)) temp_img = np.flipud(image) result = model.detect([temp_img], verbose=0, mask_threshold=config.DETECTION_MASK_THRESHOLD)[0] mask = np.flipud(result['masks']) masks.append(mask) scores.append(result['scores']) boxes.append(utils.extract_bboxes(mask)) angle = np.random.choice([1, -1]) temp_img = np.rot90(image, k=angle, axes=(0, 1)) result = model.detect([temp_img], verbose=0, mask_threshold=config.DETECTION_MASK_THRESHOLD)[0] mask = np.rot90(result['masks'], k=-angle, axes=(0, 1)) masks.append(mask) scores.append(result['scores']) boxes.append(utils.extract_bboxes(mask)) masks = np.concatenate(masks, axis=-1) scores = np.concatenate(scores, axis=-1) boxes = np.concatenate(boxes, axis=0) # config.DETECTION_NMS_THRESHOLD) keep_ind = utils.non_max_suppression(boxes, scores, 0.1) masks = masks[:, :, keep_ind] scores = scores[keep_ind] results['masks'] = masks results['scores'] = scores return results
def detect_image(image_np): target_dimension = int(model.meta["height"]) processed_img = utils.process_image(image_np, target_dimension) image_dimension = torch.FloatTensor([image_np.shape[1], image_np.shape[0]]) scaling_factor = torch.min(target_dimension / image_dimension) if CUDA: processed_img = processed_img.cuda() image_var = Variable(processed_img) # 416 * 416 * (1/(8*8) + 1/(16*16) + 1/(32*32) )*3 start = time.time() with torch.no_grad(): output = model(image_var, CUDA) end = time.time() print("Total time: {}".format(end - start)) # print("output", output.shape) thresholded_output = utils.object_thresholding(output[0]) # print("Thresholded", thresholded_output.shape) # print(output[0]) true_output = utils.non_max_suppression(thresholded_output) # print("True output", true_output.shape) original_image_np = np.copy(image_np) if true_output.size(0) > 0: # Offset for padded image vertical_offset = (target_dimension - scaling_factor * image_dimension[0].item()) / 2 horizontal_offset = (target_dimension - scaling_factor * image_dimension[1].item()) / 2 for output_box in true_output: rect_coords = utils.center_coord_to_diagonals(output_box[:4]) rect_coords = torch.FloatTensor(rect_coords) # transform box detection w.r.t. boundaries of the padded image rect_coords[[0, 2]] -= vertical_offset rect_coords[[1, 3]] -= horizontal_offset rect_coords /= scaling_factor # Clamp to actual image's boundaries rect_coords[[0, 2]] = torch.clamp(rect_coords[[0, 2]], 0.0, image_dimension[0]) rect_coords[[1, 3]] = torch.clamp(rect_coords[[1, 3]], 0.0, image_dimension[1]) # print(image_np.shape) class_label = coco_classes[output_box[5].int()] print("Output Box:", output_box, "Class Label:", class_label) print("Rect coords:", rect_coords) if constants.PERFORM_FACE_DETECTION and class_label == "person": rc = rect_coords.int() person_img_np = original_image_np[rc[1]:rc[3], rc[0]:rc[2]] # print("person_img_np: ", person_img_np, person_img_np.shape) # cv2.imshow("bounded_box_img", person_img_np) # cv2.waitKey(0) face_label = face_recognition_utils.recognize_face_in_patch( person_img_np) if face_label is not None: class_label = face_label image_np = utils.draw_box(rect_coords, image_np, class_label) return image_np
def nms(pre_nms_box, scores): #indices = utils.non_maximum_suppression(pre_nms_box, scores, proposal_count, iou_min=0.5, sorted=True) #indices = tf.image.non_max_suppression(pre_nms_box, scores, proposal_count, nms_threshold, name="rpn_non_max_suppression") indices = utils.non_max_suppression(pre_nms_box, scores, proposal_count, nms_threshold, name="rpn_non_max_suppression") proposals = tf.gather(pre_nms_box, indices) num_pad = tf.maximum(proposal_count - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, num_pad), (0, 0)]) proposals = tf.gather(proposals, tf.range(proposal_count)) return proposals
def draw_and_show(self,detected_boxes,pil_im): filtered_boxes = non_max_suppression(detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) self.draw_boxes_and_objects(filtered_boxes, pil_im, classes, (FLAGS.size, FLAGS.size), True) img = np.array(pil_im) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) cv2.imshow('CSI Camera', img) self.writeVideo.write(img)
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) classes = load_coco_names(FLAGS.class_names) t0 = time.time() frozenGraph = load_graph(FLAGS.frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) with tf.Session(graph=frozenGraph, config=config) as sess: t0 = time.time() print(FLAGS.input_img) cap = cv2.VideoCapture(FLAGS.input_img) # cap = cv2.VideoCapture(0) fps = cap.get(cv2.CAP_PROP_FPS) width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) videoWriter = cv2.VideoWriter( "output.mp4", cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, (int(width), int(height))) while (cap.isOpened()): ret, frame = cap.read() if ret == True: frame = cv2.flip(frame, 0) img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True) fimg = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) cv2.imshow("show", fimg) videoWriter.write(fimg) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break cap.release() videoWriter.release()
def plot_one_image(img_path, class_names, weights_file, iou_thre, con_thre): image = cv2.imread(img_path) bboxes = [] augmentations = test_transforms(image=image, bboxes=bboxes) image = augmentations["image"] img = image image = image.reshape(1, image.shape[0], image.shape[1], image.shape[2]) model = YOLO(len(class_names)) optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-4) load_checkpoint(weights_file, model, optimizer, 1e-5) pred_bboxes = [] with torch.no_grad(): out = model(image) for i in range(3): scale = torch.zeros((out[i].shape[0], out[i].shape[1], out[i].shape[2], out[i].shape[3], 1)) # here scale used to cache the scale where the box is in pred_bboxes.append(torch.cat((out[i], scale), -1)) boxes = non_max_suppression(pred_bboxes, scaled_anchors, con_thre, iou_thre) # print(names[torch.argmax(torch.sigmoid(boxes[0][..., 5:]))]) x = boxes[0][0:4] y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[0] = x[0] - x[2] / 2 # top left x y[1] = x[1] - x[3] / 2 # top left y y[2] = x[0] + x[2] / 2 # bottom right x y[3] = x[1] + x[3] / 2 # bottom right y S = [32, 16, 8] image = img.permute(1, 2, 0) image = image.cpu().float().numpy() i = int(boxes[0][5].item()) cv2.rectangle(image, (int(y[0].item() * S[i]), int(y[1].item() * S[i])), (int(y[2].item() * S[i]), int(y[3].item() * S[i])), (0, 0, 255), 2) label = class_names[torch.argmax(torch.sigmoid(boxes[0][..., 5:]))] tl = 3 # line thickness tf = max(tl - 1, 1) # font thickness t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] cv2.putText(image, label, (int(y[2].item() * S[i] + 5), int(y[3].item() * S[i] + 5)), 0, tl / 3, [220, 220, 220], thickness=tf, lineType=cv2.LINE_AA) cv2.imshow("fff", image) cv2.waitKey(0)
def nms(dets, thresh): """ Calculate non maximum suppression :param dets: pytorch tensor containing rects and scores :param thresh: overlapping thresh used for nms :return: indices corresponding to the found rectangles """ scores = dets[:, 4].detach().cpu().numpy() boxes = dets[:, 0:4].detach().cpu().numpy() return non_max_suppression(boxes, confidences=scores, overlap_thresh=thresh)
def detect_single_frame(faster_rcnn_net, frame, row, column, tiles_dict, **kwargs): confidence = kwargs.get('confidence') threshold = kwargs.get('threshold') start_time = time.time() faster_rcnn_net.setInput(cv2.dnn.blobFromImage(frame, size=(300, 300), swapRB=True, crop=False)) detections = faster_rcnn_net.forward() total_time = time.time() - start_time boxes = [] confidences = [] frame_with_boxes = frame.copy() height = frame.shape[0] width = frame.shape[1] for detection in detections[0, 0, :, :]: class_id = int(detection[1]) if class_id == 0: # 0 is person score = float(detection[2]) if score > confidence: left = detection[3] * width top = detection[4] * height right = detection[5] * width bottom = detection[6] * height box = [int(left), int(top), int(right), int(bottom)] confidences.append(score) boxes.append(box) filtered_boxes, probs = non_max_suppression(np.array(boxes), probs=confidences, overlapThresh=0.65) final_boxes = [] final_confidences = [] for index, box in enumerate(filtered_boxes): startX, startY, endX, endY = box box = [int(startX), int(startY), int(endX), int(endY)] if tiles_dict is not None: startX, startY, endX, endY = box_new_coords(box, row, column, tiles_dict) else: (startX, startY, endX, endY) = box final_boxes.append(box) final_confidences.append(probs[index]) cv2.rectangle(frame_with_boxes, (startX, startY), (endX, endY), (0, 255, 0), 2) return final_boxes, final_confidences, total_time, frame_with_boxes
def get_classification(self, cv_image): """Determines the color of the traffic light in the image Args: image (cv::Mat): image containing the traffic light Returns: int: ID of traffic light color (specified in styx_msgs/TrafficLight) """ #TODO implement light color prediction image = Image.fromarray(cv_image) img_resized = letter_box_image(image, options['image_size'], options['image_size'], 128) img_resized = img_resized.astype(np.float32) boxes, inputs = get_boxes_and_inputs_pb(self.frozenGraph) # with tf.Session(graph=self.frozenGraph, config=self.config) as sess: t0 = time.time() detected_boxes = self.sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=options['thresh'], iou_threshold=options['iou']) print("Predictions found in {:.2f}s".format(time.time() - t0)) inp = filtered_boxes.get(9) inp_new = dict() inp_new[9] = inp if (inp_new[9] != None): if (len(inp_new[9]) > 0): for cls, bboxs in inp_new.items(): for box, score in bboxs: box = convert_to_original_size( box, (options['image_size'], options['image_size']), np.array(image.size), True) # print(inp_new) a = analyze_color(inp_new, cv_image) # print(a) light_color = state_predict(a) print("the light color is {}".format(light_color)) if light_color: if light_color == 'YELLOW': return TrafficLight.YELLOW elif light_color == 'RED': return TrafficLight.RED elif light_color == 'GREEN': return TrafficLight.GREEN return TrafficLight.UNKNOWN
def test(self): print("iter per epochde: ", len(self.data_loader)) for i, (images, images_path) in enumerate(self.data_loader): # print(images.shape) images = self.to_var(images) predict = self.net(images) # print(predict.shape) predict = utils.non_max_suppression(predict.cpu().data, len(self.classes_tag), self.conf_thres, self.nms_thres) self.save_predict_result(predict, images_path)
def detect_from_image(self, img): img_size = self.imgsize ratio = min(img_size / img.size[0], img_size / img.size[1]) imw = round(img.size[0] * ratio) imh = round(img.size[1] * ratio) image_tensor = self.get_transforms(imw, imh)(img).float() image_tensor = image_tensor.unsqueeze_(0) input_img = Variable(image_tensor.type(torch.cuda.FloatTensor)) with torch.no_grad(): detections = self.model(input_img) detections = non_max_suppression(detections, 80, self.confthres, self.nmsthres) return detections[0]
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, # inter_op_parallelism_threads=0, # intra_op_parallelism_threads=0, # device_count={"CPU": 6} ) cap = cv2.VideoCapture(FLAGS.video_path) classes = utils.load_names(FLAGS.class_names) frozenGraph = utils.load_graph(FLAGS.frozen_model) boxes, inputs = utils.get_boxes_and_inputs_pb(frozenGraph) with tf.Session(graph=frozenGraph, config=config) as sess: while True: ret, frame = cap.read() if ret: t1 = time.time() frame1 = frame[:, :, ::-1] # from BGR to RGB # frame1 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) print('\'BGR2RGB\' time consumption:', time.time() - t1) img_resized = utils.resize_cv2( frame1, (FLAGS.size, FLAGS.size), keep_aspect_ratio=FLAGS.keep_aspect_ratio) img_resized = img_resized[np.newaxis, :] t0 = time.time() detected_boxes = sess.run( boxes, feed_dict={inputs: img_resized }) # get the boxes whose confidence > 0.005 filtered_boxes = utils.non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold)[ 0] # boxes' filter by NMS print('\'detection\' time consumption:', time.time() - t0) utils.draw_boxes_cv2(filtered_boxes, frame, classes, (FLAGS.size, FLAGS.size), FLAGS.keep_aspect_ratio) print('\n\n\n') cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break cap.release() cv2.destroyAllWindows()
def detect(model, source, out, imgsz, conf_thres, iou_thres, names, colors=[(255, 30, 0), (50, 0, 255)], device=torch.device('cpu')): img, img0 = LoadImage(source, img_size=imgsz) # Run inference img, im0 = LoadImage(source, img_size=imgsz) img = torch.from_numpy(img).to(device) img = img.float() img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference with torch.no_grad(): pred = model(img)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres) # Process detections det = pred[0] # detections if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class # Write results for *xyxy, conf, cls in det: label = '%s %.2f' % (names[int(cls)], conf) # if cls == 0: plot_fire(xyxy, im0, clas=cls, label=label, color=colors[int(cls)], line_thickness=2) # Save results (image with detections) cv2.imwrite(out, im0) return im0
def detect_single_frame(full_body_cascade, frame, row, column, tiles_dict, **kwargs): gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) scaleFactor = kwargs.get('scaleFactor') minNeighbors = kwargs.get('minNeighbors') start_time = time.time() detections = full_body_cascade.detectMultiScale3( gray, scaleFactor, minNeighbors, minSize=(30, 30), flags=cv2.CASCADE_SCALE_IMAGE, outputRejectLevels=True) bodies = detections[0] probs = detections[2] probs = [x[0] for x in probs] try: bodies, probs = non_max_suppression(bodies, probs=probs, overlapThresh=0.65) except: print(bodies) print(probs) raise ValueError total_time = time.time() - start_time boxes = [] confidences = [] frame_with_boxes = frame.copy() # Draw rectangle around the faces for idx, (x, y, w, h) in enumerate(bodies): if tiles_dict is not None: startX, startY, endX, endY = box_new_coords( [x, y, (x + w), (y + h)], row, column, tiles_dict) else: (startX, startY, endX, endY) = (x, y, (x + w), (y + h)) cv2.rectangle(frame_with_boxes, (startX, startY), (endX, endY), (0, 255, 0), 2) boxes.append([int(startX), int(startY), int(endX), int(endY)]) confidences.append(probs[idx]) return boxes, confidences, total_time, frame_with_boxes
def random_image(self, height, width): """Creates random specifications of an image with multiple shapes. Returns the background color of the image and a list of shape specifications that can be used to draw the image. """ # Pick random background color bg_color = np.array([random.randint(0, 255) for _ in range(3)]) # Generate a few random shapes and record their # bounding boxes shapes = [] boxes = [] N = random.randint(1, 4) for _ in range(N): shape, color, dims = self.random_shape(height, width) shapes.append((shape, color, dims)) x, y, s = dims boxes.append([y-s, x-s, y+s, x+s]) # Apply non-max suppression wit 0.3 threshold to avoid # shapes covering each other keep_ixs = utils.non_max_suppression(np.array(boxes), np.arange(N), 0.3) shapes = [s for i, s in enumerate(shapes) if i in keep_ixs] return bg_color, shapes
keep = np.intersect1d(keep, np.where(roi_scores >= config.DETECTION_MIN_CONFIDENCE)[0]) print("Remove boxes below {} confidence. Keep {}:\n{}".format( config.DETECTION_MIN_CONFIDENCE, keep.shape[0], keep)) # Apply per-class non-max suppression pre_nms_boxes = refined_proposals[keep] pre_nms_scores = roi_scores[keep] pre_nms_class_ids = roi_class_ids[keep] nms_keep = [] for class_id in np.unique(pre_nms_class_ids): # Pick detections of this class ixs = np.where(pre_nms_class_ids == class_id)[0] # Apply NMS class_keep = utils.non_max_suppression(pre_nms_boxes[ixs], pre_nms_scores[ixs], config.DETECTION_NMS_THRESHOLD) # Map indicies class_keep = keep[ixs[class_keep]] nms_keep = np.union1d(nms_keep, class_keep) print("{:22}: {} -> {}".format(class_names[class_id][:20], keep[ixs], class_keep)) keep = np.intersect1d(keep, nms_keep).astype(np.int32) print("\nKept after per-class NMS: {}\n{}".format(keep.shape[0], keep)) # Show final detections ixs = np.arange(len(keep)) # Display all # ixs = np.random.randint(0, len(keep), 10) # Display random sample captions = ["{} {:.3f}".format(class_names[c], s) if c > 0 else "" for c, s in zip(roi_class_ids[keep][ixs], roi_scores[keep][ixs])]