def get_frame(self): self.curr_time = time.time() ret, frame = self.cap.read() if ret: ret, jpeg = cv2.imencode('.jpg', frame) boxes = self.yolo.predict(frame) if (len(boxes) > 0): frame2 = draw_boxes(frame, boxes, self.config['model']['labels']) ret, jpeg = cv2.imencode('.jpg', frame2) if self.curr_time - self.last_recorded_time >= self.report_interval and boxes[ 0].get_score() >= self.conf_threshold: print(boxes[0].get_score()) submit_form() self.last_recorded_time = self.curr_time return jpeg.tobytes() return jpeg.tobytes() else: return None
def detect(img, data, ctpn, sess): blobs, im_scales, img, scale = data.get_blobs(img, None) boxes,scores = ctpn.predict(blobs, im_scales, img, sess) boxes = ctpn.detect(boxes, scores[:, np.newaxis], img.shape[:2]) # text_recs, detected_img = draw_boxes_(img, boxes, scale) text_recs, detected_img = draw_boxes(img, boxes, caption='im_name', wait=True, is_display=True) return text_recs, detected_img, img
def toad_game_custom(): x1 = x2 = y1 = y2 = 0 cap = cv2.VideoCapture(CAMERA) td = ToadGenerator() i = 0 boxes = [] while True: flag, img = cap.read() if i % 25 == 0: boxes = get_hand_prediction(img) boxes = non_max_suppression_fast(boxes) if len(boxes) > 0: x1, y1, x2, y2 = boxes[0] img = td.step(img, (x1, y1, x2, y2)) try: img = draw_boxes(img, boxes) cv2.imshow('Toad collection', img) except: cap.release() raise i += 1 ch = cv2.waitKey(1) if ch == 27: break cap.release() cv2.destroyAllWindows()
def detect_image(self, image): if self.model_image_size != (None, None): assert self.model_image_size[ 0] % 32 == 0, 'Multiples of 32 required' assert self.model_image_size[ 1] % 32 == 0, 'Multiples of 32 required' image_data = preprocess_image(image, self.model_image_size) # image_shape = np.array([image.size[1], image.size[0]]) image_shape = np.expand_dims(image_shape, 0) start = time.time() out_boxes, out_classes, out_scores = self.predict( image_data, image_shape) end = time.time() print('Found {} boxes for {}'.format(len(out_boxes), 'img')) print("Inference time: {:.8f}s".format(end - start)) # draw result on input image image_array = np.array(image, dtype='uint8') image_array = draw_boxes(image_array, out_boxes, out_classes, out_scores, self.class_names, self.colors) out_classnames = [self.class_names[c] for c in out_classes] return Image.fromarray( image_array), out_boxes, out_classnames, out_scores
def predict_image(sess, image_in_file, image_out_file): image, image_data = preprocess_image(image_in_file, model_image_size=(608, 608)) out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], feed_dict={ yolo_model.input: image_data, K.learning_phase(): 0 }) print('Found {} boxes for {}'.format(len(out_boxes), image_in_file)) # Generate colors for drawing bounding boxes. colors = generate_colors(class_names) # Draw bounding boxes on the image file draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors) # Save the predicted bounding box on the image image.save(image_out_file, quality=90) # Display the results in the notebook output_image = scipy.misc.imread(image_out_file) imshow(output_image) pylab.show() return out_scores, out_boxes, out_classes
def predict_video(sess, video_file, video_out_file): video_in = imageio.get_reader(video_file) frames = [] for i, image in enumerate(tqdm(video_in)): imageio.imwrite("data/cache.jpg", image) image_in, image_data = preprocess_image("data/cache.jpg", model_image_size=(608, 608), image_shape=(720, 1280), type="video") out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], feed_dict={ yolo_model.input: image_data, K.learning_phase(): 0 }) colors = generate_colors(class_names) draw_boxes(image_in, out_scores, out_boxes, out_classes, class_names, colors) image_in.save(os.path.join("data", "cache_out.jpg"), quality=90) frames.append(imageio.imread("data/cache_out.jpg")) imageio.mimsave(video_out_file, frames)
def main(): try: FLAG = process_config() except: print("missing or invalid arguments") exit(0) if FLAG.GPU_options: session_config = tf.ConfigProto() session_config.gpu_options.per_process_gpu_memory_fraction = 0.9 session_config.gpu_options.allow_growth = True sess = tf.Session(config=session_config) else: sess = tf.Session() model = yolov3(FLAG) model.build() model.init_saver() model.load(sess) image_test = Image.open('images/timg.jpg') resized_image = image_test.resize(size=(416, 416)) image_data = np.array(resized_image, dtype='float32') / 255.0 img_hw = tf.placeholder(dtype=tf.float32, shape=[2]) boxes, scores, classes = model.pedict(img_hw, iou_threshold=0.5, score_threshold=0.5) begin_time = time.time() boxes_, scores_, classes_, conv0 = sess.run( [boxes, scores, classes, model.feature_extractor.conv0], feed_dict={ img_hw: [image_test.size[1], image_test.size[0]], model.x: [image_data] }) end_time = time.time() print(end_time - begin_time) # print conv0 image_draw = draw_boxes(np.array(image_test, dtype=np.float32) / 255, boxes_, classes_, FLAG.names, scores=scores_) fig = plt.figure(frameon=False) ax = plt.Axes(fig, [0, 0, 1, 1]) ax.set_axis_off() fig.add_axes(ax) plt.imshow(image_draw) fig.savefig('prediction.jpg') plt.show() sess.close()
def main(): ctpn = CTPN(cfg) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) ctpn.load_ckpt(sess) if cfg.ADJUST_ANGLE: angle_detector = VGG(cfg) angle_detector.load_weights() data = DataLoader(cfg) img = data.load_data('images/xuanye.jpg') t = time.time() if cfg.ADJUST_ANGLE: angle = angle_detector.predict(img=np.copy(img)) print('The angel of this character is:', angle) im = Image.fromarray(img) print('Rotate the array of this img!') if angle == 90: im = im.transpose(Image.ROTATE_270) elif angle == 180: im = im.transpose(Image.ROTATE_180) elif angle == 270: im = im.transpose(Image.ROTATE_90) img = np.array(im) # img = cv2.resize(img, (2000,3000),interpolation=cv2.INTER_CUBIC) blobs, im_scales, resized_img, scale = data.get_blobs(img, None) boxes, scores = ctpn.predict(blobs, im_scales, resized_img, sess) boxes = ctpn.detect(boxes, scores[:, np.newaxis], resized_img.shape[:2]) text_recs, im = draw_boxes(resized_img, boxes, caption='im_name', wait=True, is_display=True) # text_recs = sort_box(text_recs) print("It takes time:{}s".format(time.time() - t)) # cv2.imshow('img',im) # cv2.waitKey(0) cv2.imwrite('images/result.jpg', im)
boxes = decode_netout( netout[0], obj_threshold= 0.25, #predicted box_score should be greater than this threshold nms_threshold= 0.28, #threshold limiting the percentage operlap between the predicted bounding boxes anchors=ANCHORS, nb_class=CLASS) # Boxes has x,y,w,h boxes = scale_boxes(image, boxes) print('Got ', len(boxes), ' cells!') #if gorund truth boxes are required to be shown in image pass gt=gt below image_box, final_boxes = draw_boxes(image, boxes, labels=LABELS, h_threshold=0.2, w_threshold=0.2, h_min=0.05, w_min=0.05, gt=[]) cv2.imwrite(fileOut + f, image) with open(annoOut + annot_fname, 'w') as pred_file: for eachpred in final_boxes: pred_file.write( '%.2f %.2f %.2f %.2f\n' % (eachpred[0], eachpred[1], eachpred[2], eachpred[3]))
def _main(): # parse command line arguments parser = argparse.ArgumentParser() requiredNamed = parser.add_argument_group('required named arguments') requiredNamed.add_argument( '--path_to_input_image', type=str, required=True, help= 'The path to the input image on which object detection will be performed on.\n\ This argument is required.') parser.add_argument( '--path_to_trained_model', default='model_weights/coco_pretrained_weights.ckpt', type=str, help= "The path to the location of pretrained model weights, which will be loaded into\n\ the model and then used for object detection. The default pretrained weights path is\n\ 'model_weights/coco_pretrained_weights.ckpt', which contains weights trained on\n\ the coco dataset.") parser.add_argument( '--save_as', type=str, default=None, help= 'The filename for the image on which object detection was performed. If no filename\n\ is provided, the image will be saved as "[original_name] + _yolo_v3.jpg".' ) parser.add_argument('--tensorboard_save_path', default='tensorboard/tensorboard_detect/', help="") parser.add_argument( '--class_path', default='utils/coco_classes.txt', type=str, help= 'The path that points towards where the class names for the dataset are stored.\n\ The default path is "utils/coco_classes.txt".') parser.add_argument( '--anchors_path', default='utils/anchors.txt', type=str, help= 'The path that points towards where the anchor values for the model are stored.\n\ The default path is "utils/anchors.txt", which contains anchors trained on the coco dataset.' ) parser.add_argument( '--input_height', default=416, type=int, help= 'The input height of the yolov3 model. The height must be a multiple of 32.\n\ The default height is 416.') parser.add_argument( '--input_width', default=416, type=int, help= 'The input width of the yolov3 model. The width must be a mutliple of 32.\n\ The default width is 416.') args = vars(parser.parse_args()) h = args['input_height'] w = args['input_width'] anchors = get_anchors(args['anchors_path']) classes = get_classes(args['class_path']) save_as = args['save_as'] if save_as is None: filename_w_ext = os.path.basename(args['path_to_input_image']) filename, file_extension = os.path.splitext(filename_w_ext) save_as = filename + '_yolo_v3' + file_extension image, original_im = process_image(args['path_to_input_image'], h, w) tf.reset_default_graph() # build graph with tf.variable_scope('x_input'): X = tf.placeholder(dtype=tf.float32, shape=[None, h, w, 3]) yolo_outputs = yolo_v3(inputs=X, num_classes=len(classes), anchors=anchors, h=h, w=w, training=False) # output with tf.variable_scope('obj_detections'): raw_outputs = tf.concat(yolo_outputs, axis=1) # pass image through model with tf.Session() as sess: writer = tf.summary.FileWriter(args['tensorboard_save_path'], sess.graph) writer.close() saver = tf.train.Saver() print('restoring model weights...') saver.restore(sess, save_path=args['path_to_trained_model']) print('feeding image found at filepath: ', args['path_to_input_image']) start = time.time() ro = sess.run(raw_outputs, feed_dict={X: [np.array(image, dtype=np.float32)]}) end = time.time() total_time = end - start print("total inference time was: " + str(round(total_time, 2)) + " seconds (that's " + str(round(60.0 / total_time, 2)) + " fps!)") # convert box coordinates, apply nms, and draw boxes boxes = convert_box_coordinates(ro) filtered_boxes = non_max_suppression(boxes, confidence_threshold=0.5, iou_threshold=0.4) draw_boxes(save_as, args['class_path'], filtered_boxes, original_im, image) print('image with detections saved as: ', save_as)
detected_boxes = sess.run(boxes, feed_dict={inputs: [img_processed]}) t1 = time.time() print("Amount of seconds to predict:", t1 - t0) # non max supression filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=_CONF_THRESHOLD, iou_threshold=_IOU_THRESHOLD ) draw_boxes( filtered_boxes, img, classes, _INPUT_SIZE, True, width=2 ) open_cv_image = np.array(img) open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR) # show frame to user cv2.imshow('frame', open_cv_image) # close windows when pressing 'q' if cv2.waitKey(1) & 0xFF == ord('q'): retrieving_frames = False cv2.destroyAllWindows()
tmp = np.zeros(self.img.shape[:2], dtype='uint8') tmp = draw_lines(tmp, rowboxes + colboxes, color=255, lineW=2) labels = measure.label(tmp < 255, connectivity=2) #8连通区域标记 regions = measure.regionprops(labels) ceilboxes = minAreaRectbox(regions, False, tmp.shape[1], tmp.shape[0], True, True) ceilboxes = np.array(ceilboxes) ceilboxes[:, [0, 2, 4, 6]] += xmin ceilboxes[:, [1, 3, 5, 7]] += ymin self.tableCeilBoxes.extend(ceilboxes) self.childImgs.append(childImg) def table_ocr(self): pass if __name__ == '__main__': import time from utils.utils import draw_boxes p = 'img/table-detect.jpg' img = cv2.imread(p) t = time.time() tableDetect = table(img) tableCeilBoxes = tableDetect.tableCeilBoxes img = tableDetect.img tmp = np.zeros_like(img) img = draw_boxes(tmp, tableDetect.tableCeilBoxes, color=(255, 255, 255)) print(time.time() - t) cv2.imwrite('img/table-ceil.png', img)