Esempio n. 1
0
    def get_frame(self):
        self.curr_time = time.time()

        ret, frame = self.cap.read()

        if ret:
            ret, jpeg = cv2.imencode('.jpg', frame)

            boxes = self.yolo.predict(frame)

            if (len(boxes) > 0):
                frame2 = draw_boxes(frame, boxes,
                                    self.config['model']['labels'])
                ret, jpeg = cv2.imencode('.jpg', frame2)

                if self.curr_time - self.last_recorded_time >= self.report_interval and boxes[
                        0].get_score() >= self.conf_threshold:
                    print(boxes[0].get_score())
                    submit_form()
                    self.last_recorded_time = self.curr_time

                return jpeg.tobytes()

            return jpeg.tobytes()

        else:
            return None
Esempio n. 2
0
def detect(img, data, ctpn, sess):
    blobs, im_scales, img, scale = data.get_blobs(img, None)
    boxes,scores = ctpn.predict(blobs, im_scales, img, sess)
    boxes = ctpn.detect(boxes, scores[:, np.newaxis], img.shape[:2])
#    text_recs, detected_img = draw_boxes_(img, boxes, scale)
    text_recs, detected_img = draw_boxes(img, boxes, caption='im_name', wait=True, is_display=True)
    return text_recs, detected_img, img
Esempio n. 3
0
def toad_game_custom():
    x1 = x2 = y1 = y2 = 0

    cap = cv2.VideoCapture(CAMERA)
    td = ToadGenerator()
    i = 0
    boxes = []

    while True:
        flag, img = cap.read()
        if i % 25 == 0:
            boxes = get_hand_prediction(img)
            boxes = non_max_suppression_fast(boxes)
        if len(boxes) > 0:
            x1, y1, x2, y2 = boxes[0]

        img = td.step(img, (x1, y1, x2, y2))

        try:
            img = draw_boxes(img, boxes)
            cv2.imshow('Toad collection', img)
        except:
            cap.release()
            raise
        i += 1

        ch = cv2.waitKey(1)
        if ch == 27:
            break

    cap.release()
    cv2.destroyAllWindows()
Esempio n. 4
0
    def detect_image(self, image):
        if self.model_image_size != (None, None):
            assert self.model_image_size[
                0] % 32 == 0, 'Multiples of 32 required'
            assert self.model_image_size[
                1] % 32 == 0, 'Multiples of 32 required'

        image_data = preprocess_image(image, self.model_image_size)

        #
        image_shape = np.array([image.size[1], image.size[0]])
        image_shape = np.expand_dims(image_shape, 0)

        start = time.time()
        out_boxes, out_classes, out_scores = self.predict(
            image_data, image_shape)
        end = time.time()

        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
        print("Inference time: {:.8f}s".format(end - start))

        # draw result on input image
        image_array = np.array(image, dtype='uint8')
        image_array = draw_boxes(image_array, out_boxes, out_classes,
                                 out_scores, self.class_names, self.colors)

        out_classnames = [self.class_names[c] for c in out_classes]
        return Image.fromarray(
            image_array), out_boxes, out_classnames, out_scores
Esempio n. 5
0
def predict_image(sess, image_in_file, image_out_file):
    image, image_data = preprocess_image(image_in_file,
                                         model_image_size=(608, 608))
    out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes],
                                                  feed_dict={
                                                      yolo_model.input:
                                                      image_data,
                                                      K.learning_phase(): 0
                                                  })
    print('Found {} boxes for {}'.format(len(out_boxes), image_in_file))
    # Generate colors for drawing bounding boxes.
    colors = generate_colors(class_names)
    # Draw bounding boxes on the image file
    draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
    # Save the predicted bounding box on the image
    image.save(image_out_file, quality=90)
    # Display the results in the notebook
    output_image = scipy.misc.imread(image_out_file)
    imshow(output_image)
    pylab.show()
    return out_scores, out_boxes, out_classes
Esempio n. 6
0
def predict_video(sess, video_file, video_out_file):
    video_in = imageio.get_reader(video_file)
    frames = []
    for i, image in enumerate(tqdm(video_in)):
        imageio.imwrite("data/cache.jpg", image)
        image_in, image_data = preprocess_image("data/cache.jpg",
                                                model_image_size=(608, 608),
                                                image_shape=(720, 1280),
                                                type="video")
        out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes],
                                                      feed_dict={
                                                          yolo_model.input:
                                                          image_data,
                                                          K.learning_phase(): 0
                                                      })
        colors = generate_colors(class_names)
        draw_boxes(image_in, out_scores, out_boxes, out_classes, class_names,
                   colors)
        image_in.save(os.path.join("data", "cache_out.jpg"), quality=90)
        frames.append(imageio.imread("data/cache_out.jpg"))
    imageio.mimsave(video_out_file, frames)
Esempio n. 7
0
def main():
    try:
        FLAG = process_config()
    except:
        print("missing or invalid arguments")
        exit(0)

    if FLAG.GPU_options:
        session_config = tf.ConfigProto()
        session_config.gpu_options.per_process_gpu_memory_fraction = 0.9
        session_config.gpu_options.allow_growth = True
        sess = tf.Session(config=session_config)
    else:
        sess = tf.Session()

    model = yolov3(FLAG)
    model.build()
    model.init_saver()
    model.load(sess)

    image_test = Image.open('images/timg.jpg')
    resized_image = image_test.resize(size=(416, 416))
    image_data = np.array(resized_image, dtype='float32') / 255.0
    img_hw = tf.placeholder(dtype=tf.float32, shape=[2])
    boxes, scores, classes = model.pedict(img_hw,
                                          iou_threshold=0.5,
                                          score_threshold=0.5)

    begin_time = time.time()
    boxes_, scores_, classes_, conv0 = sess.run(
        [boxes, scores, classes, model.feature_extractor.conv0],
        feed_dict={
            img_hw: [image_test.size[1], image_test.size[0]],
            model.x: [image_data]
        })
    end_time = time.time()
    print(end_time - begin_time)
    #    print conv0

    image_draw = draw_boxes(np.array(image_test, dtype=np.float32) / 255,
                            boxes_,
                            classes_,
                            FLAG.names,
                            scores=scores_)
    fig = plt.figure(frameon=False)
    ax = plt.Axes(fig, [0, 0, 1, 1])
    ax.set_axis_off()
    fig.add_axes(ax)
    plt.imshow(image_draw)
    fig.savefig('prediction.jpg')
    plt.show()
    sess.close()
Esempio n. 8
0
def main():
    ctpn = CTPN(cfg)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    ctpn.load_ckpt(sess)

    if cfg.ADJUST_ANGLE:
        angle_detector = VGG(cfg)
        angle_detector.load_weights()

    data = DataLoader(cfg)
    img = data.load_data('images/xuanye.jpg')

    t = time.time()

    if cfg.ADJUST_ANGLE:
        angle = angle_detector.predict(img=np.copy(img))
        print('The angel of this character is:', angle)
        im = Image.fromarray(img)
        print('Rotate the array of this img!')
        if angle == 90:
            im = im.transpose(Image.ROTATE_270)
        elif angle == 180:
            im = im.transpose(Image.ROTATE_180)
        elif angle == 270:
            im = im.transpose(Image.ROTATE_90)
        img = np.array(im)


#    img = cv2.resize(img, (2000,3000),interpolation=cv2.INTER_CUBIC)
    blobs, im_scales, resized_img, scale = data.get_blobs(img, None)
    boxes, scores = ctpn.predict(blobs, im_scales, resized_img, sess)
    boxes = ctpn.detect(boxes, scores[:, np.newaxis], resized_img.shape[:2])
    text_recs, im = draw_boxes(resized_img,
                               boxes,
                               caption='im_name',
                               wait=True,
                               is_display=True)
    #    text_recs = sort_box(text_recs)
    print("It takes time:{}s".format(time.time() - t))
    #    cv2.imshow('img',im)
    #    cv2.waitKey(0)
    cv2.imwrite('images/result.jpg', im)
Esempio n. 9
0
    boxes = decode_netout(
        netout[0],
        obj_threshold=
        0.25,  #predicted box_score should be greater than this threshold 
        nms_threshold=
        0.28,  #threshold limiting the percentage operlap between the predicted bounding boxes
        anchors=ANCHORS,
        nb_class=CLASS)
    # Boxes has x,y,w,h
    boxes = scale_boxes(image, boxes)

    print('Got ', len(boxes), ' cells!')

    #if gorund truth boxes are required to be shown in image pass gt=gt below
    image_box, final_boxes = draw_boxes(image,
                                        boxes,
                                        labels=LABELS,
                                        h_threshold=0.2,
                                        w_threshold=0.2,
                                        h_min=0.05,
                                        w_min=0.05,
                                        gt=[])

    cv2.imwrite(fileOut + f, image)

    with open(annoOut + annot_fname, 'w') as pred_file:
        for eachpred in final_boxes:
            pred_file.write(
                '%.2f %.2f %.2f %.2f\n' %
                (eachpred[0], eachpred[1], eachpred[2], eachpred[3]))
Esempio n. 10
0
def _main():

    # parse command line arguments
    parser = argparse.ArgumentParser()
    requiredNamed = parser.add_argument_group('required named arguments')
    requiredNamed.add_argument(
        '--path_to_input_image',
        type=str,
        required=True,
        help=
        'The path to the input image on which object detection will be performed on.\n\
        This argument is required.')
    parser.add_argument(
        '--path_to_trained_model',
        default='model_weights/coco_pretrained_weights.ckpt',
        type=str,
        help=
        "The path to the location of pretrained model weights, which will be loaded into\n\
        the model and then used for object detection. The default pretrained weights path is\n\
        'model_weights/coco_pretrained_weights.ckpt', which contains weights trained on\n\
        the coco dataset.")
    parser.add_argument(
        '--save_as',
        type=str,
        default=None,
        help=
        'The filename for the image on which object detection was performed. If no filename\n\
        is provided, the image will be saved as "[original_name] + _yolo_v3.jpg".'
    )
    parser.add_argument('--tensorboard_save_path',
                        default='tensorboard/tensorboard_detect/',
                        help="")
    parser.add_argument(
        '--class_path',
        default='utils/coco_classes.txt',
        type=str,
        help=
        'The path that points towards where the class names for the dataset are stored.\n\
        The default path is "utils/coco_classes.txt".')
    parser.add_argument(
        '--anchors_path',
        default='utils/anchors.txt',
        type=str,
        help=
        'The path that points towards where the anchor values for the model are stored.\n\
        The default path is "utils/anchors.txt", which contains anchors trained on the coco dataset.'
    )
    parser.add_argument(
        '--input_height',
        default=416,
        type=int,
        help=
        'The input height of the yolov3 model. The height must be a multiple of 32.\n\
        The default height is 416.')
    parser.add_argument(
        '--input_width',
        default=416,
        type=int,
        help=
        'The input width of the yolov3 model. The width must be a mutliple of 32.\n\
        The default width is 416.')
    args = vars(parser.parse_args())

    h = args['input_height']
    w = args['input_width']
    anchors = get_anchors(args['anchors_path'])
    classes = get_classes(args['class_path'])
    save_as = args['save_as']
    if save_as is None:
        filename_w_ext = os.path.basename(args['path_to_input_image'])
        filename, file_extension = os.path.splitext(filename_w_ext)
        save_as = filename + '_yolo_v3' + file_extension

    image, original_im = process_image(args['path_to_input_image'], h, w)

    tf.reset_default_graph()

    # build graph
    with tf.variable_scope('x_input'):
        X = tf.placeholder(dtype=tf.float32, shape=[None, h, w, 3])

    yolo_outputs = yolo_v3(inputs=X,
                           num_classes=len(classes),
                           anchors=anchors,
                           h=h,
                           w=w,
                           training=False)  # output

    with tf.variable_scope('obj_detections'):
        raw_outputs = tf.concat(yolo_outputs, axis=1)

    # pass image through model
    with tf.Session() as sess:

        writer = tf.summary.FileWriter(args['tensorboard_save_path'],
                                       sess.graph)
        writer.close()

        saver = tf.train.Saver()
        print('restoring model weights...')
        saver.restore(sess, save_path=args['path_to_trained_model'])
        print('feeding image found at filepath: ', args['path_to_input_image'])
        start = time.time()
        ro = sess.run(raw_outputs,
                      feed_dict={X: [np.array(image, dtype=np.float32)]})
        end = time.time()
        total_time = end - start
        print("total inference time was: " + str(round(total_time, 2)) +
              " seconds (that's " + str(round(60.0 / total_time, 2)) +
              " fps!)")

    # convert box coordinates, apply nms, and draw boxes
    boxes = convert_box_coordinates(ro)
    filtered_boxes = non_max_suppression(boxes,
                                         confidence_threshold=0.5,
                                         iou_threshold=0.4)
    draw_boxes(save_as, args['class_path'], filtered_boxes, original_im, image)

    print('image with detections saved as: ', save_as)
        detected_boxes = sess.run(boxes, feed_dict={inputs: [img_processed]})
        t1 = time.time()

        print("Amount of seconds to predict:", t1 - t0)

        # non max supression
        filtered_boxes = non_max_suppression(
            detected_boxes,
            confidence_threshold=_CONF_THRESHOLD,
            iou_threshold=_IOU_THRESHOLD
        )

        draw_boxes(
            filtered_boxes,
            img,
            classes,
            _INPUT_SIZE,
            True,
            width=2
        )

        open_cv_image = np.array(img)
        open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR)

        # show frame to user
        cv2.imshow('frame', open_cv_image)

        # close windows when pressing 'q'
        if cv2.waitKey(1) & 0xFF == ord('q'):
            retrieving_frames = False
            cv2.destroyAllWindows()
Esempio n. 12
0
            tmp = np.zeros(self.img.shape[:2], dtype='uint8')
            tmp = draw_lines(tmp, rowboxes + colboxes, color=255, lineW=2)
            labels = measure.label(tmp < 255, connectivity=2)  #8连通区域标记
            regions = measure.regionprops(labels)
            ceilboxes = minAreaRectbox(regions, False, tmp.shape[1],
                                       tmp.shape[0], True, True)
            ceilboxes = np.array(ceilboxes)
            ceilboxes[:, [0, 2, 4, 6]] += xmin
            ceilboxes[:, [1, 3, 5, 7]] += ymin
            self.tableCeilBoxes.extend(ceilboxes)
            self.childImgs.append(childImg)

    def table_ocr(self):
        pass


if __name__ == '__main__':
    import time
    from utils.utils import draw_boxes
    p = 'img/table-detect.jpg'
    img = cv2.imread(p)
    t = time.time()

    tableDetect = table(img)
    tableCeilBoxes = tableDetect.tableCeilBoxes
    img = tableDetect.img
    tmp = np.zeros_like(img)
    img = draw_boxes(tmp, tableDetect.tableCeilBoxes, color=(255, 255, 255))
    print(time.time() - t)
    cv2.imwrite('img/table-ceil.png', img)