Example #1
0
def predict(model, dataloader=None, image=None):
    model.eval()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if image is not None:
        image = image[0].to(device=device)
        start_time = time.time()
        with torch.no_grad():
            outputs = model(image)
        outputs = non_max_suppression(outputs, conf_thres=0.5, nms_thres=0.2)
        elapsed_time = time.time() - start_time
        if outputs[0] is not None:
            boxes = outputs[0][:, 0:4]
            boxes = resize_boxes(boxes, (416, 416), (256, 256))
        pred_x = []
        pred_y = []
        for box in boxes:
            x0, y0, x1, y1 = box
            x = ((x0 + x1) / 2).tolist()
            y = ((y0 + y1) / 2).tolist()
            pred_x.append(x)
            pred_y.append(y)
        image = Image.fromarray(
            image.cpu().numpy()[0, 0, :, :]).convert("RGB").resize((256, 256))
        return image, pred_x, pred_y

    for i, (image, targets) in enumerate(dataloader):
        image = image[0].to(device=device)
        name = targets["name"][0]
        start_time = time.time()
        with torch.no_grad():
            outputs = model(image)
        outputs = non_max_suppression(outputs, conf_thres=0.5, nms_thres=0.2)
        elapsed_time = time.time() - start_time
        if outputs[0] is not None:
            boxes = outputs[0][:, 0:4]
            boxes = resize_boxes(boxes, (416, 416), (256, 256))
        else:
            continue
        image_copy = Image.fromarray(image.cpu().numpy()[0, 0, :, :]).resize(
            (256, 256))
        if image_copy.mode != "RGB":
            image_copy = image_copy.convert("RGB")
        draw = ImageDraw.Draw(image_copy)
        for box in boxes:
            x0, y0, x1, y1 = box
            draw.rectangle([(x0, y0), (x1, y1)], outline=(255, 0, 255))
        # image_copy.show()
        # image_copy.save(os.path.join(images_path, f"yolo_v3/{attempt}/images/{name}.png"))
        print(f"{name}, time: {elapsed_time}")
        plt.imshow(image_copy)
        plt.show()
        break
Example #2
0
    def _detect(self, image) -> List[Tuple[int]]:

        ori_image_height, ori_image_width = np.array(image).shape[:2]

        image = self._detection_image_transform(image)
        image = image.to(self.device)
        image = image.unsqueeze(0)

        with torch.no_grad():
            outputs = self.model_detect(image)
            outputs = non_max_suppression(outputs, conf_thres=self.args.conf_thres, nms_thres=self.args.nms_thres)

        bounding_boxes = []

        for detections in outputs:
            if detections is not None:
                # Rescale boxes to original image
                detections = rescale_boxes(detections, self.args.image_size, (ori_image_height, ori_image_width))
                for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
                    if cls_pred.item() in (2, 7): # 2 == car, 7 == truck
                        bounding_box = [x1, y1, x2, y2]
                        bounding_box = tuple(int(v.item()) for v in bounding_box)
                        bounding_boxes.append(bounding_box)

        return bounding_boxes
Example #3
0
    def detect(self, image):
        clone = image.copy()

        image = rgb2gray(image)

        # list to store the detections
        detections = []
        # current scale of the image
        downscale_power = 0

        # downscale the image and iterate
        for im_scaled in pyramid(image,
                                 downscale=self.downscale,
                                 min_size=self.window_size):
            # if the width or height of the scaled image is less than
            # the width or height of the window, then end the iterations
            if im_scaled.shape[0] < self.window_size[1] or im_scaled.shape[
                    1] < self.window_size[0]:
                break
            for (x, y, im_window) in sliding_window(im_scaled,
                                                    self.window_step_size,
                                                    self.window_size):
                if im_window.shape[0] != self.window_size[
                        1] or im_window.shape[1] != self.window_size[0]:
                    continue

                # calculate the HOG features
                feature_vector = hog(im_window)
                X = np.array([feature_vector])
                prediction = self.clf.predict(X)
                if prediction == 1:
                    x1 = int(x * (self.downscale**downscale_power))
                    y1 = int(y * (self.downscale**downscale_power))
                    detections.append(
                        (x1, y1, x1 + int(self.window_size[0] *
                                          (self.downscale**downscale_power)),
                         y1 + int(self.window_size[1] *
                                  (self.downscale**downscale_power))))

            # Move the the next scale
            downscale_power += 1

        # Display the results before performing NMS
        clone_before_nms = clone.copy()
        for (x1, y1, x2, y2) in detections:
            # Draw the detections
            cv2.rectangle(clone_before_nms, (x1, y1), (x2, y2), (0, 255, 0),
                          thickness=2)

        # Perform Non Maxima Suppression
        detections = non_max_suppression(np.array(detections), self.threshold)

        clone_after_nms = clone
        # Display the results after performing NMS
        for (x1, y1, x2, y2) in detections:
            # Draw the detections
            cv2.rectangle(clone_after_nms, (x1, y1), (x2, y2), (0, 255, 0),
                          thickness=2)

        return clone_before_nms, clone_after_nms
Example #4
0
def main(input_path, DEBUG):
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
    )
    classes = load_coco_names(FLAGS.class_names)
    frozenGraph = load_graph(FLAGS.frozen_model)
    boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)
    boxes_list = []
    with tf.Session(graph=frozenGraph, config=config) as sess:
        for item in input_path:
            start = clock()
            FLAGS.input_img = item
            img = Image.open(FLAGS.input_img)
            img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128)
            img_resized = img_resized.astype(np.float32)
            detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})
            filtered_boxes = non_max_suppression(
                detected_boxes,
                confidence_threshold=FLAGS.conf_threshold,
                iou_threshold=FLAGS.iou_threshold)
            boxes_list.append(filtered_boxes)
            if DEBUG:
                draw_boxes(filtered_boxes, img, classes,
                           (FLAGS.size, FLAGS.size), True)
            print(filtered_boxes)
            print("Execution Time : {} / #Symbols : {}  / Path : {}".format(
                clock() - start, len(filtered_boxes), item))
        sess.close()
    tf.reset_default_graph()
    return boxes_list, classes, FLAGS.size
    def detect(self, image, output_img='out'):
        img = Image.open(image)
        img_resized = img.resize(size=(self.size[0], self.size[1]))

        timer = Timer()
        timer.tic()

        detected_boxes = self.sess.run(
            self.boxes,
            feed_dict={self.inputs: [np.array(img_resized, dtype=np.float32)]})

        filtered_boxes = non_max_suppression(
            detected_boxes,
            confidence_threshold=self.conf_threshold,
            iou_threshold=self.iou_threshold)

        timer.toc()
        total_t = timer.total_time

        print('Detection took {:.3f}s'.format(total_t))

        draw_boxes(filtered_boxes, img, self.classes,
                   (self.size[0], self.size[1]))

        img.save((output_img if self.tiny else output_img + '-tiny') + '.jpg')
Example #6
0
 def draw_and_show(self, detected_boxes, pil_im):
     filtered_boxes = non_max_suppression(
         detected_boxes,
         confidence_threshold=FLAGS.conf_threshold,
         iou_threshold=FLAGS.iou_threshold)
     self.draw_boxes_and_objects(filtered_boxes, pil_im, classes,
                                 (FLAGS.size, FLAGS.size), True)
     img = np.array(pil_im)
     img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
     #self.writeVideo2.write(img)
     if len(self.personBox) > 0:
         y1 = int(self.personBox[0])
         y2 = int(self.personBox[1])
         x1 = int(self.personBox[2])
         x2 = int(self.personBox[3])
         if y1 > 0 and y2 > 0 and x1 > 0 and x2 > 0:
             print(x1, x2, y1, y2)
             #img[y2:x2, y1:x1] = cv2.blur(img[y2:x2, y1:x1], (23, 23))
             y = 460
             x = 1020
             h = 50
             w = 50
             #img[y:y + h, x:x + w] = cv2.blur(img[y:y + h, x:x + w], (23, 23))
     cv2.imshow('CSI Camera', img)
     if self.count % 5 == 0:
         self.writeVideo.write(img)
Example #7
0
def detection(path):
    image = Image.open(path)
    img_resized = utils.letter_box_image(image, input_size, input_size, 128)
    img_resized = img_resized.astype(np.float32)
    boxes, inputs = utils.get_boxes_and_inputs_pb(frozenGraph)
    t0 = time.time()
    detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})
    filtered_boxes = utils.non_max_suppression(detected_boxes,
                                               confidence_threshold=conf_threshold,
                                               iou_threshold=iou_threshold)
    print("Predictions found in {:.2f}s".format(time.time() - t0))
    if filtered_boxes:
        # if len(filtered_boxes[0][:]) == 1:
        img, region, score, box = utils.draw_boxes(filtered_boxes, image, classes, (input_size, input_size), True)
        # box = np.array(box)
        # print(box)
        if score > 0.90:
            person_image_height = box[0][3] - box[0][1]
            # region.save(out_image)
            print(person_image_height)
            # 计算当前用户身高
            # 可根据参照物(本例采用椅子作为参照物,其实际高度为96cm,在固定距离下该参照物在图像中像素值为230)实际高度与图像高度像素,
            # 获取人物图像像素高度。具体调参需在具体环境下进行调参
            # 此方法存在较大的误差,故结果仅供趣味输出,追求准确仍需具体输入准确值
            person_height = (person_image_height * 96) / 230
            print("person_height: %.2fcm \n" % (person_height))
Example #8
0
def main(argv=None):

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)

    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
    )

    img = Image.open(FLAGS.input_img)
    img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128)
    img_resized = img_resized.astype(np.float32)
    classes = load_coco_names(FLAGS.class_names)

    if FLAGS.frozen_model:

        t0 = time.time()
        frozenGraph = load_graph(FLAGS.frozen_model)
        print("Loaded graph in {:.2f}s".format(time.time() - t0))

        #print(frozenGraph.inputs)
        #print(frozenGraph.outputs)

        boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)

        with tf.Session(graph=frozenGraph, config=config) as sess:
            t0 = time.time()
            detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})

    else:
        if FLAGS.tiny:
            model = yolo_v3_tiny.yolo_v3_tiny
        elif FLAGS.spp:
            model = yolo_v3.yolo_v3_spp
        else:
            model = yolo_v3.yolo_v3

        boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size,
                                             FLAGS.data_format)

        saver = tf.train.Saver(var_list=tf.global_variables(scope='detector'))

        with tf.Session(config=config) as sess:
            t0 = time.time()
            saver.restore(sess, FLAGS.ckpt_file)
            print('Model restored in {:.2f}s'.format(time.time() - t0))

            t0 = time.time()
            detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})

    filtered_boxes = non_max_suppression(
        detected_boxes,
        confidence_threshold=FLAGS.conf_threshold,
        iou_threshold=FLAGS.iou_threshold)
    print("Predictions found in {:.2f}s".format(time.time() - t0))

    draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True)

    img.save(FLAGS.output_img)
Example #9
0
    def __call__(self, sample):
        preds, labels = sample
        if not isinstance(preds, np.ndarray):
            preds = np.array(preds)
        filtered_boxes = non_max_suppression(preds,
                                             self.conf_threshold,
                                             self.iou_threshold)

        det_boxes = []
        det_scores = []
        det_classes = []
        for cls, bboxs in filtered_boxes.items():
            det_classes.extend([LABEL_MAP[cls + 1]] * len(bboxs))
            for box, score in bboxs:
                rect_pos = box.tolist()
                y_min, x_min = rect_pos[1], rect_pos[0]
                y_max, x_max = rect_pos[3], rect_pos[2]
                height, width = 416, 416
                det_boxes.append(
                    [y_min / height, x_min / width, y_max / height, x_max / width])
                det_scores.append(score)

        if len(det_boxes) == 0:
            det_boxes = np.zeros((0, 4))
            det_scores = np.zeros((0, ))
            det_classes = np.zeros((0, ))

        return [np.array([det_boxes]), np.array([det_scores]), np.array([det_classes])], labels
Example #10
0
def main():
    model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    loss_fn = YoloLoss()
    train_dataset = VOCDataset("data/train.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR)
    test_dataset = VOCDataset("data/test.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR)
    train_loader=DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=1, pin_memory=PIN_MEMORY, shuffle=True,drop_last=True)
    test_loader=DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, num_workers=1, pin_memory=PIN_MEMORY, shuffle=True,drop_last=True)
    for epoch in range(EPOCHS):
        pred_boxes, target_boxes = get_bboxes(train_loader, model, iou_threshold=0.5, threshold=0.4)
        mAP = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5)
        print(f"Train mAP:{mAP}")
        train_fn(train_loader, model, optimizer, loss_fn)
    if epoch > 99:
        for x, y in test_loader:
        x = x.to(DEVICE)
        for idx in range(16):
            bboxes = cellboxes_to_boxes(model(x))
            bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4)
            plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)
        
        

if __name__  == "__main__":
    
    main()
Example #11
0
    def post_process(self, outputs):
        """
        Transforms raw output into boxes, confs, classes
        Applies NMS thresholding on bounding boxes and confs
        Parameters:
            output: raw output tensor
        Returns:
            boxes: x1,y1,x2,y2 tensor (dets, 4)
            confs: class * obj prob tensor (dets, 1)
            classes: class type tensor (dets, 1)
        """
        z = []
        for i in range(len(outputs)):
            outputs[i] = outputs[i].to(self.device)
            if self.grid[i].shape[2:4] != outputs[i].shape[2:4]:
                _, _, height, width, _ = outputs[i].shape
                self.grid[i] = self._make_grid(width,
                                               height).to(outputs[i].device)
            y = outputs[i].sigmoid()

            y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(
                outputs[i].device)) * self.strides[i]  # xy
            y[..., 2:4] = (y[..., 2:4] * 2)**2 * self.anchor_grid[i]  # wh
            z.append(y.view(1, -1, self.no))

        pred = torch.cat(z, 1)
        pred = non_max_suppression(pred,
                                   conf_thres=self.conf_thresh,
                                   iou_thres=self.iou_thresh)

        return pred
def main(argv=None):
    # GPU配置
    # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
    # config = tf.ConfigProto(gpu_options=gpu_options,log_device_placement=False,)
    # 类别、视频或图像输入
    classes = load_coco_names(args.class_names)

    vid = cv2.VideoCapture(args.input_video)
    video_frame_cnt = int(vid.get(7))  # AVI:10148  RSTP: 中无总帧数属性 视频文件中的帧数
    timeF = 10  # 分帧率 130ms配合2
    fpsnum = int(vid.get(1))  # 基于以0开始的被捕获或解码的帧索引
    if (fpsnum % timeF == 0):
        for i in range(video_frame_cnt):
            ret, img_ori = vid.read()
            # 图像填充
            img_ori = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB)
            img_ori = Image.fromarray(img_ori) # CV2图片转PIL
            img_resized = letter_box_image(img_ori,img_ori.size[1], img_ori.size[0], args.size, args.size, 128)
            img_resized = img_resized.astype(np.float32)
            # 图像插值
            # img = cv2.resize(img_ori, (args.size, args.size))
            # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # cv2默认为bgr顺序
            # img_resized = np.asarray(img, np.float32)
            # 编码方式1
            # scipy.misc.imsave(args.temp_img, img_resized)
            # _, jpeg_bytes = base64_encode_img(args.temp_img)
            # 编码方式2
            img_encode = cv2.imencode('.jpg', img_resized)[1]
            data_encode = np.array(img_encode)
            jpeg_bytes = data_encode.tostring()
            start_time = time.time()
            # 服务器通讯配置
            channel = grpc.insecure_channel(args.server)
            stub = prediction_service_pb2.PredictionServiceStub(channel)
            request = predict_pb2.PredictRequest()
            request.model_spec.name = 'yolov3_2'
            request.model_spec.signature_name = 'predict_images'
            # 等待服务器答复
            request.inputs['images'].CopyFrom(tf.contrib.util.make_tensor_proto(jpeg_bytes, shape=[1]))
            response = stub.Predict(request, 10.0)
            # 对返回值进行操作
            results = {}
            for key in response.outputs:
                tensor_proto = response.outputs[key]
                nd_array = tf.contrib.util.make_ndarray(tensor_proto)
                results[key] = nd_array
            detected_boxes = results['scores']
            # nms计算
            filtered_boxes = non_max_suppression(detected_boxes,confidence_threshold=args.conf_threshold,iou_threshold=args.iou_threshold)
            end_time = time.time()
            difference_time = end_time - start_time  # 网络运行时间
            # 画图
            draw_boxes(filtered_boxes, img_ori, classes, (args.size, args.size), True)
            # 输出图像
            cv2charimg = cv2.cvtColor(np.array(img_ori), cv2.COLOR_RGB2BGR) # PIL图片转cv2 图片
            cv2.putText(cv2charimg, '{:.2f}ms'.format((difference_time) * 1000), (40, 40), 0,
                        fontScale=1, color=(0, 255, 0), thickness=2)
            cv2.imshow('image', cv2charimg)
            if cv2.waitKey(1) & 0xFF == ord('q'): # 视频退出
                break
Example #13
0
def show_camera(sess, boxes, inputs):
    # To flip the image, modify the flip_method parameter (0 and 2 are the most common)
    classes = load_coco_names(FLAGS.class_names)
    print(gstreamer_pipeline(flip_method=0))
    cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=0),
                           cv2.CAP_GSTREAMER)
    if cap.isOpened():
        window_handle = cv2.namedWindow('CSI Camera', cv2.WINDOW_AUTOSIZE)
        while cv2.getWindowProperty('CSI Camera', 0) >= 0:
            ret_val, img = cap.read()
            cv2_im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            pil_im = Image.fromarray(cv2_im)
            img_resized = letter_box_image(pil_im, FLAGS.size, FLAGS.size, 128)
            img_resized = img_resized.astype(np.float32)
            detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})
            filtered_boxes = non_max_suppression(
                detected_boxes,
                confidence_threshold=FLAGS.conf_threshold,
                iou_threshold=FLAGS.iou_threshold)
            draw_boxes(filtered_boxes, pil_im, classes,
                       (FLAGS.size, FLAGS.size), True)
            img = np.array(pil_im)
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            cv2.imshow('CSI Camera', img)
            keyCode = cv2.waitKey(30) & 0xff
            if keyCode == 27:
                break
        cap.release()
        cv2.destroyAllWindows()
    else:
        print('Unable to open camera')
Example #14
0
def run_detection(img):
    global interpreter, inputs, output_details
    input_size = 416
    interpreter.set_tensor(inputs, img)
    interpreter.invoke()
    try:
        pred = [
            interpreter.get_tensor(output_details[i]['index'])
            for i in range(len(output_details))
        ]
    except:
        return [0, 0, 0, 0]
    # boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]))
    boxes, pred_conf = pred[1], pred[0]
    # scores_max = tf.math.reduce_max(pred_conf[0], axis=-1)
    # valid_indices,selected_scores = tf.image.non_max_suppression_with_scores(
    #     boxes=boxes[0],
    #     scores=scores_max,
    #     max_output_size=100,
    #     iou_threshold=0.45,
    #     score_threshold=0.25,
    #     soft_nms_sigma=0.0
    # )
    # boxes = tf.gather(boxes[0],valid_indices)
    # scores = tf.gather(pred_conf[0],valid_indices)
    # classes = tf.math.argmax(scores,1)
    #scores = tf.gather(scores_max,valid_indices)

    boxes, scores, classes = non_max_suppression(boxes[0], pred_conf[0])
    valid_detections = boxes.shape[0]
    pred_bbox = [boxes, scores, classes, valid_detections]
    return pred_bbox
Example #15
0
 def random_image(self, height, width):
     """Creates random specifications of an image with multiple shapes.
     Returns the background color of the image and a list of shape
     specifications that can be used to draw the image.
     创建具有多种形状的图像的随机规格。
      返回图像的背景颜色和形状列表
      可用于绘制图像的规格。
     """
     # Pick random background color
     bg_color = np.array([random.randint(0, 255) for _ in range(3)])
     # Generate a few random shapes and record their
     # bounding boxes
     # 生成几个随机形状并记录它们边界框
     shapes = []
     boxes = []
     N = random.randint(1, 4)
     for _ in range(N):
         shape, color, dims = self.random_shape(height, width)
         shapes.append((shape, color, dims))
         x, y, s = dims
         boxes.append([y - s, x - s, y + s, x + s])
     # Apply non-max suppression wit 0.3 threshold to avoid
     # shapes covering each other
     keep_ixs = utils.non_max_suppression(np.array(boxes), np.arange(N),
                                          0.3)
     shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
     return bg_color, shapes
Example #16
0
def do_test(model,
            images_path,
            labels_path,
            batch_size=32,
            progress_callback=None):
    size = 416

    t = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    data = DataLoader(YoloDataset(images_path, labels_path, t, size),
                      batch_size=batch_size,
                      shuffle=True,
                      num_workers=mp.cpu_count())

    count = [0] * 80
    correct = [0] * 80

    with torch.no_grad():
        for i, (local_batch, local_labels) in enumerate(data):

            local_batch = local_batch.to(device)
            outputs = model(local_batch)
            detections = yolo.YoloV3.get_detections(outputs)

            for j, detections in enumerate(utils.parse_detections(detections)):
                detections = utils.non_max_suppression(
                    detections, confidence_threshold=0.2)

                for target in utils.parse_labels(local_labels[0][j], size):
                    count[target['coco_idx']] += 1
                    for det in [
                            det for det in detections
                            if det.coco_idx == target['coco_idx']
                    ]:
                        if utils.iou(target['bb'], det.bb) >= 0.5:
                            correct[target['coco_idx']] += 1
                            break

            psum = 0
            for j in range(80):
                if count[j] == 0:
                    psum = -80
                    break

                p = correct[j] / count[j]
                psum += p

            if progress_callback:
                progress_callback(model,
                                  batch_number=(i + 1),
                                  batch_count=len(data),
                                  map_score=psum / 80)

    return psum / 80
Example #17
0
def ensemble_prediction(model, config, image):
    """ Test time augmentation method using non-maximum supression"""

    masks = []
    scores = []
    boxes = []

    results = {}

    result = model.detect([image],
                          verbose=0,
                          mask_threshold=config.DETECTION_MASK_THRESHOLD)[0]
    masks.append(result['masks'])
    scores.append(result['scores'])
    boxes.append(utils.extract_bboxes(result['masks']))

    temp_img = np.fliplr(image)
    result = model.detect([temp_img],
                          verbose=0,
                          mask_threshold=config.DETECTION_MASK_THRESHOLD)[0]
    mask = np.fliplr(result['masks'])
    masks.append(mask)
    scores.append(result['scores'])
    boxes.append(utils.extract_bboxes(mask))

    temp_img = np.flipud(image)
    result = model.detect([temp_img],
                          verbose=0,
                          mask_threshold=config.DETECTION_MASK_THRESHOLD)[0]
    mask = np.flipud(result['masks'])
    masks.append(mask)
    scores.append(result['scores'])
    boxes.append(utils.extract_bboxes(mask))

    angle = np.random.choice([1, -1])
    temp_img = np.rot90(image, k=angle, axes=(0, 1))
    result = model.detect([temp_img],
                          verbose=0,
                          mask_threshold=config.DETECTION_MASK_THRESHOLD)[0]
    mask = np.rot90(result['masks'], k=-angle, axes=(0, 1))
    masks.append(mask)
    scores.append(result['scores'])
    boxes.append(utils.extract_bboxes(mask))

    masks = np.concatenate(masks, axis=-1)
    scores = np.concatenate(scores, axis=-1)
    boxes = np.concatenate(boxes, axis=0)

    # config.DETECTION_NMS_THRESHOLD)
    keep_ind = utils.non_max_suppression(boxes, scores, 0.1)
    masks = masks[:, :, keep_ind]
    scores = scores[keep_ind]

    results['masks'] = masks
    results['scores'] = scores

    return results
Example #18
0
def detect_image(image_np):
    target_dimension = int(model.meta["height"])
    processed_img = utils.process_image(image_np, target_dimension)
    image_dimension = torch.FloatTensor([image_np.shape[1], image_np.shape[0]])
    scaling_factor = torch.min(target_dimension / image_dimension)
    if CUDA:
        processed_img = processed_img.cuda()
    image_var = Variable(processed_img)
    # 416 * 416 * (1/(8*8) + 1/(16*16) + 1/(32*32) )*3
    start = time.time()
    with torch.no_grad():
        output = model(image_var, CUDA)
    end = time.time()
    print("Total time: {}".format(end - start))

    # print("output", output.shape)
    thresholded_output = utils.object_thresholding(output[0])
    # print("Thresholded", thresholded_output.shape)
    # print(output[0])
    true_output = utils.non_max_suppression(thresholded_output)
    # print("True output", true_output.shape)
    original_image_np = np.copy(image_np)
    if true_output.size(0) > 0:
        # Offset for padded image
        vertical_offset = (target_dimension -
                           scaling_factor * image_dimension[0].item()) / 2
        horizontal_offset = (target_dimension -
                             scaling_factor * image_dimension[1].item()) / 2
        for output_box in true_output:
            rect_coords = utils.center_coord_to_diagonals(output_box[:4])
            rect_coords = torch.FloatTensor(rect_coords)
            # transform box detection w.r.t. boundaries of the padded image
            rect_coords[[0, 2]] -= vertical_offset
            rect_coords[[1, 3]] -= horizontal_offset
            rect_coords /= scaling_factor
            # Clamp to actual image's boundaries
            rect_coords[[0, 2]] = torch.clamp(rect_coords[[0, 2]], 0.0,
                                              image_dimension[0])
            rect_coords[[1, 3]] = torch.clamp(rect_coords[[1, 3]], 0.0,
                                              image_dimension[1])

            # print(image_np.shape)
            class_label = coco_classes[output_box[5].int()]
            print("Output Box:", output_box, "Class Label:", class_label)
            print("Rect coords:", rect_coords)
            if constants.PERFORM_FACE_DETECTION and class_label == "person":
                rc = rect_coords.int()
                person_img_np = original_image_np[rc[1]:rc[3], rc[0]:rc[2]]
                # print("person_img_np: ", person_img_np, person_img_np.shape)
                # cv2.imshow("bounded_box_img", person_img_np)
                # cv2.waitKey(0)
                face_label = face_recognition_utils.recognize_face_in_patch(
                    person_img_np)
                if face_label is not None:
                    class_label = face_label
            image_np = utils.draw_box(rect_coords, image_np, class_label)
    return image_np
Example #19
0
 def nms(pre_nms_box, scores):
     #indices = utils.non_maximum_suppression(pre_nms_box, scores, proposal_count, iou_min=0.5, sorted=True)
     #indices = tf.image.non_max_suppression(pre_nms_box, scores, proposal_count, nms_threshold, name="rpn_non_max_suppression")
     indices = utils.non_max_suppression(pre_nms_box, scores, proposal_count, nms_threshold, name="rpn_non_max_suppression")
     proposals = tf.gather(pre_nms_box, indices)
     num_pad = tf.maximum(proposal_count - tf.shape(proposals)[0], 0)
     proposals = tf.pad(proposals, [(0, num_pad), (0, 0)])
     proposals = tf.gather(proposals, tf.range(proposal_count))
     return proposals
 def draw_and_show(self,detected_boxes,pil_im):
     filtered_boxes = non_max_suppression(detected_boxes,
                                          confidence_threshold=FLAGS.conf_threshold,
                                          iou_threshold=FLAGS.iou_threshold)
     self.draw_boxes_and_objects(filtered_boxes, pil_im, classes, (FLAGS.size, FLAGS.size), True)
     img = np.array(pil_im)
     img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
     cv2.imshow('CSI Camera', img)
     self.writeVideo.write(img)
def main(argv=None):

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)

    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
    )

    classes = load_coco_names(FLAGS.class_names)

    t0 = time.time()
    frozenGraph = load_graph(FLAGS.frozen_model)
    print("Loaded graph in {:.2f}s".format(time.time() - t0))

    boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)

    with tf.Session(graph=frozenGraph, config=config) as sess:
        t0 = time.time()
        print(FLAGS.input_img)
        cap = cv2.VideoCapture(FLAGS.input_img)
        # cap = cv2.VideoCapture(0)
        fps = cap.get(cv2.CAP_PROP_FPS)
        width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
        height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
        videoWriter = cv2.VideoWriter(
            "output.mp4", cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps,
            (int(width), int(height)))
        while (cap.isOpened()):
            ret, frame = cap.read()
            if ret == True:
                frame = cv2.flip(frame, 0)
                img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                img_resized = letter_box_image(img, FLAGS.size, FLAGS.size,
                                               128)
                img_resized = img_resized.astype(np.float32)
                detected_boxes = sess.run(boxes,
                                          feed_dict={inputs: [img_resized]})
                filtered_boxes = non_max_suppression(
                    detected_boxes,
                    confidence_threshold=FLAGS.conf_threshold,
                    iou_threshold=FLAGS.iou_threshold)
                print("Predictions found in {:.2f}s".format(time.time() - t0))

                draw_boxes(filtered_boxes, img, classes,
                           (FLAGS.size, FLAGS.size), True)

                fimg = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
                cv2.imshow("show", fimg)
                videoWriter.write(fimg)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
            else:
                break
        cap.release()
        videoWriter.release()
Example #22
0
def plot_one_image(img_path, class_names, weights_file, iou_thre, con_thre):
    image = cv2.imread(img_path)
    bboxes = []
    augmentations = test_transforms(image=image, bboxes=bboxes)
    image = augmentations["image"]
    img = image
    image = image.reshape(1, image.shape[0], image.shape[1], image.shape[2])
    model = YOLO(len(class_names))
    optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-4)
    load_checkpoint(weights_file, model, optimizer, 1e-5)

    pred_bboxes = []
    with torch.no_grad():
        out = model(image)
        for i in range(3):
            scale = torch.zeros((out[i].shape[0], out[i].shape[1],
                                 out[i].shape[2], out[i].shape[3], 1))
            # here scale used to cache the scale where the box is in
            pred_bboxes.append(torch.cat((out[i], scale), -1))

    boxes = non_max_suppression(pred_bboxes, scaled_anchors, con_thre,
                                iou_thre)

    # print(names[torch.argmax(torch.sigmoid(boxes[0][..., 5:]))])
    x = boxes[0][0:4]

    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[0] = x[0] - x[2] / 2  # top left x
    y[1] = x[1] - x[3] / 2  # top left y
    y[2] = x[0] + x[2] / 2  # bottom right x
    y[3] = x[1] + x[3] / 2  # bottom right y

    S = [32, 16, 8]

    image = img.permute(1, 2, 0)

    image = image.cpu().float().numpy()
    i = int(boxes[0][5].item())
    cv2.rectangle(image, (int(y[0].item() * S[i]), int(y[1].item() * S[i])),
                  (int(y[2].item() * S[i]), int(y[3].item() * S[i])),
                  (0, 0, 255), 2)

    label = class_names[torch.argmax(torch.sigmoid(boxes[0][..., 5:]))]
    tl = 3  # line thickness
    tf = max(tl - 1, 1)  # font thickness
    t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
    cv2.putText(image,
                label,
                (int(y[2].item() * S[i] + 5), int(y[3].item() * S[i] + 5)),
                0,
                tl / 3, [220, 220, 220],
                thickness=tf,
                lineType=cv2.LINE_AA)
    cv2.imshow("fff", image)
    cv2.waitKey(0)
Example #23
0
def nms(dets, thresh):
    """
    Calculate non maximum suppression
    :param dets: pytorch tensor containing rects and scores
    :param thresh: overlapping thresh used for nms
    :return: indices corresponding to the found rectangles
    """
    scores = dets[:, 4].detach().cpu().numpy()
    boxes = dets[:, 0:4].detach().cpu().numpy()

    return non_max_suppression(boxes, confidences=scores, overlap_thresh=thresh)
def detect_single_frame(faster_rcnn_net, frame, row, column, tiles_dict, **kwargs):
    confidence = kwargs.get('confidence')
    threshold = kwargs.get('threshold')

    start_time = time.time()
    faster_rcnn_net.setInput(cv2.dnn.blobFromImage(frame, size=(300, 300), swapRB=True, crop=False))
    detections = faster_rcnn_net.forward()
    total_time = time.time() - start_time

    boxes = []
    confidences = []
    frame_with_boxes = frame.copy()

    height = frame.shape[0]
    width = frame.shape[1]

    for detection in detections[0, 0, :, :]:
        class_id = int(detection[1])
        if class_id == 0:  # 0 is person
            score = float(detection[2])
            if score > confidence:
                left = detection[3] * width
                top = detection[4] * height
                right = detection[5] * width
                bottom = detection[6] * height
                box = [int(left), int(top), int(right), int(bottom)]

                confidences.append(score)
                boxes.append(box)

    filtered_boxes, probs = non_max_suppression(np.array(boxes), probs=confidences, overlapThresh=0.65)

    final_boxes = []
    final_confidences = []

    for index, box in enumerate(filtered_boxes):


        startX, startY, endX, endY = box
        box = [int(startX), int(startY), int(endX), int(endY)]

        if tiles_dict is not None:
            startX, startY, endX, endY = box_new_coords(box,
                                                        row,
                                                        column,
                                                        tiles_dict)
        else:
            (startX, startY, endX, endY) = box

        final_boxes.append(box)
        final_confidences.append(probs[index])
        cv2.rectangle(frame_with_boxes, (startX, startY), (endX, endY), (0, 255, 0), 2)

    return final_boxes, final_confidences, total_time, frame_with_boxes
Example #25
0
    def get_classification(self, cv_image):
        """Determines the color of the traffic light in the image

        Args:
            image (cv::Mat): image containing the traffic light

        Returns:
            int: ID of traffic light color (specified in styx_msgs/TrafficLight)

        """
        #TODO implement light color prediction

        image = Image.fromarray(cv_image)
        img_resized = letter_box_image(image, options['image_size'],
                                       options['image_size'], 128)
        img_resized = img_resized.astype(np.float32)

        boxes, inputs = get_boxes_and_inputs_pb(self.frozenGraph)

        # with tf.Session(graph=self.frozenGraph, config=self.config) as sess:
        t0 = time.time()
        detected_boxes = self.sess.run(boxes,
                                       feed_dict={inputs: [img_resized]})
        filtered_boxes = non_max_suppression(
            detected_boxes,
            confidence_threshold=options['thresh'],
            iou_threshold=options['iou'])
        print("Predictions found in {:.2f}s".format(time.time() - t0))
        inp = filtered_boxes.get(9)
        inp_new = dict()
        inp_new[9] = inp

        if (inp_new[9] != None):
            if (len(inp_new[9]) > 0):
                for cls, bboxs in inp_new.items():
                    for box, score in bboxs:
                        box = convert_to_original_size(
                            box,
                            (options['image_size'], options['image_size']),
                            np.array(image.size), True)
                # print(inp_new)
                a = analyze_color(inp_new, cv_image)
                # print(a)
                light_color = state_predict(a)
                print("the light color is {}".format(light_color))
                if light_color:
                    if light_color == 'YELLOW':
                        return TrafficLight.YELLOW
                    elif light_color == 'RED':
                        return TrafficLight.RED
                    elif light_color == 'GREEN':
                        return TrafficLight.GREEN

        return TrafficLight.UNKNOWN
Example #26
0
 def test(self):
     print("iter per epochde: ", len(self.data_loader))
     for i, (images, images_path) in enumerate(self.data_loader):
         # print(images.shape)
         images = self.to_var(images)
         predict = self.net(images)
         # print(predict.shape)
         predict = utils.non_max_suppression(predict.cpu().data,
                                             len(self.classes_tag),
                                             self.conf_thres,
                                             self.nms_thres)
         self.save_predict_result(predict, images_path)
Example #27
0
 def detect_from_image(self, img):
     img_size = self.imgsize
     ratio = min(img_size / img.size[0], img_size / img.size[1])
     imw = round(img.size[0] * ratio)
     imh = round(img.size[1] * ratio)
     image_tensor = self.get_transforms(imw, imh)(img).float()
     image_tensor = image_tensor.unsqueeze_(0)
     input_img = Variable(image_tensor.type(torch.cuda.FloatTensor))
     with torch.no_grad():
         detections = self.model(input_img)
         detections = non_max_suppression(detections, 80, self.confthres,
                                          self.nmsthres)
     return detections[0]
Example #28
0
def main(argv=None):
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)

    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
        # inter_op_parallelism_threads=0,
        # intra_op_parallelism_threads=0,
        # device_count={"CPU": 6}
    )
    cap = cv2.VideoCapture(FLAGS.video_path)
    classes = utils.load_names(FLAGS.class_names)
    frozenGraph = utils.load_graph(FLAGS.frozen_model)
    boxes, inputs = utils.get_boxes_and_inputs_pb(frozenGraph)

    with tf.Session(graph=frozenGraph, config=config) as sess:
        while True:
            ret, frame = cap.read()
            if ret:
                t1 = time.time()
                frame1 = frame[:, :, ::-1]  # from BGR to RGB
                # frame1 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                print('\'BGR2RGB\' time consumption:', time.time() - t1)
                img_resized = utils.resize_cv2(
                    frame1, (FLAGS.size, FLAGS.size),
                    keep_aspect_ratio=FLAGS.keep_aspect_ratio)
                img_resized = img_resized[np.newaxis, :]
                t0 = time.time()
                detected_boxes = sess.run(
                    boxes,
                    feed_dict={inputs: img_resized
                               })  # get the boxes whose confidence > 0.005
                filtered_boxes = utils.non_max_suppression(
                    detected_boxes,
                    confidence_threshold=FLAGS.conf_threshold,
                    iou_threshold=FLAGS.iou_threshold)[
                        0]  # boxes' filter by NMS
                print('\'detection\' time consumption:', time.time() - t0)
                utils.draw_boxes_cv2(filtered_boxes, frame, classes,
                                     (FLAGS.size, FLAGS.size),
                                     FLAGS.keep_aspect_ratio)
                print('\n\n\n')
                cv2.imshow('frame', frame)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
            else:
                break

    cap.release()
    cv2.destroyAllWindows()
Example #29
0
def detect(model,
           source,
           out,
           imgsz,
           conf_thres,
           iou_thres,
           names,
           colors=[(255, 30, 0), (50, 0, 255)],
           device=torch.device('cpu')):
    img, img0 = LoadImage(source, img_size=imgsz)

    # Run inference
    img, im0 = LoadImage(source, img_size=imgsz)
    img = torch.from_numpy(img).to(device)
    img = img.float()
    img /= 255.0  # 0 - 255 to 0.0 - 1.0
    if img.ndimension() == 3:
        img = img.unsqueeze(0)

    # Inference
    with torch.no_grad():
        pred = model(img)[0]

    # Apply NMS
    pred = non_max_suppression(pred, conf_thres, iou_thres)

    # Process detections
    det = pred[0]  # detections
    if det is not None and len(det):
        # Rescale boxes from img_size to im0 size
        det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

        # Print results
        for c in det[:, -1].unique():
            n = (det[:, -1] == c).sum()  # detections per class

        # Write results
        for *xyxy, conf, cls in det:
            label = '%s %.2f' % (names[int(cls)], conf)
            # if cls == 0:
            plot_fire(xyxy,
                      im0,
                      clas=cls,
                      label=label,
                      color=colors[int(cls)],
                      line_thickness=2)

    # Save results (image with detections)

    cv2.imwrite(out, im0)
    return im0
def detect_single_frame(full_body_cascade, frame, row, column, tiles_dict,
                        **kwargs):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    scaleFactor = kwargs.get('scaleFactor')
    minNeighbors = kwargs.get('minNeighbors')

    start_time = time.time()
    detections = full_body_cascade.detectMultiScale3(
        gray,
        scaleFactor,
        minNeighbors,
        minSize=(30, 30),
        flags=cv2.CASCADE_SCALE_IMAGE,
        outputRejectLevels=True)

    bodies = detections[0]
    probs = detections[2]

    probs = [x[0] for x in probs]

    try:
        bodies, probs = non_max_suppression(bodies,
                                            probs=probs,
                                            overlapThresh=0.65)
    except:
        print(bodies)
        print(probs)
        raise ValueError

    total_time = time.time() - start_time

    boxes = []
    confidences = []
    frame_with_boxes = frame.copy()

    # Draw rectangle around the faces
    for idx, (x, y, w, h) in enumerate(bodies):
        if tiles_dict is not None:
            startX, startY, endX, endY = box_new_coords(
                [x, y, (x + w), (y + h)], row, column, tiles_dict)
        else:
            (startX, startY, endX, endY) = (x, y, (x + w), (y + h))

        cv2.rectangle(frame_with_boxes, (startX, startY), (endX, endY),
                      (0, 255, 0), 2)

        boxes.append([int(startX), int(startY), int(endX), int(endY)])
        confidences.append(probs[idx])

    return boxes, confidences, total_time, frame_with_boxes
Example #31
0
 def random_image(self, height, width):
     """Creates random specifications of an image with multiple shapes.
     Returns the background color of the image and a list of shape
     specifications that can be used to draw the image.
     """
     # Pick random background color
     bg_color = np.array([random.randint(0, 255) for _ in range(3)])
     # Generate a few random shapes and record their
     # bounding boxes
     shapes = []
     boxes = []
     N = random.randint(1, 4)
     for _ in range(N):
         shape, color, dims = self.random_shape(height, width)
         shapes.append((shape, color, dims))
         x, y, s = dims
         boxes.append([y-s, x-s, y+s, x+s])
     # Apply non-max suppression wit 0.3 threshold to avoid
     # shapes covering each other
     keep_ixs = utils.non_max_suppression(np.array(boxes), np.arange(N), 0.3)
     shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
     return bg_color, shapes
Example #32
0
keep = np.intersect1d(keep, np.where(roi_scores >= config.DETECTION_MIN_CONFIDENCE)[0])
print("Remove boxes below {} confidence. Keep {}:\n{}".format(
    config.DETECTION_MIN_CONFIDENCE, keep.shape[0], keep))

# Apply per-class non-max suppression
pre_nms_boxes = refined_proposals[keep]
pre_nms_scores = roi_scores[keep]
pre_nms_class_ids = roi_class_ids[keep]

nms_keep = []
for class_id in np.unique(pre_nms_class_ids):
    # Pick detections of this class
    ixs = np.where(pre_nms_class_ids == class_id)[0]
    # Apply NMS
    class_keep = utils.non_max_suppression(pre_nms_boxes[ixs],
                                            pre_nms_scores[ixs],
                                            config.DETECTION_NMS_THRESHOLD)
    # Map indicies
    class_keep = keep[ixs[class_keep]]
    nms_keep = np.union1d(nms_keep, class_keep)
    print("{:22}: {} -> {}".format(class_names[class_id][:20],
                                   keep[ixs], class_keep))

keep = np.intersect1d(keep, nms_keep).astype(np.int32)
print("\nKept after per-class NMS: {}\n{}".format(keep.shape[0], keep))

# Show final detections
ixs = np.arange(len(keep))  # Display all
# ixs = np.random.randint(0, len(keep), 10)  # Display random sample
captions = ["{} {:.3f}".format(class_names[c], s) if c > 0 else ""
            for c, s in zip(roi_class_ids[keep][ixs], roi_scores[keep][ixs])]