def main(input_path, DEBUG): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) classes = load_coco_names(FLAGS.class_names) frozenGraph = load_graph(FLAGS.frozen_model) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) boxes_list = [] with tf.Session(graph=frozenGraph, config=config) as sess: for item in input_path: start = clock() FLAGS.input_img = item img = Image.open(FLAGS.input_img) img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) boxes_list.append(filtered_boxes) if DEBUG: draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True) print(filtered_boxes) print("Execution Time : {} / #Symbols : {} / Path : {}".format( clock() - start, len(filtered_boxes), item)) sess.close() tf.reset_default_graph() return boxes_list, classes, FLAGS.size
def detection(path): image = Image.open(path) img_resized = utils.letter_box_image(image, input_size, input_size, 128) img_resized = img_resized.astype(np.float32) boxes, inputs = utils.get_boxes_and_inputs_pb(frozenGraph) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = utils.non_max_suppression(detected_boxes, confidence_threshold=conf_threshold, iou_threshold=iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) if filtered_boxes: # if len(filtered_boxes[0][:]) == 1: img, region, score, box = utils.draw_boxes(filtered_boxes, image, classes, (input_size, input_size), True) # box = np.array(box) # print(box) if score > 0.90: person_image_height = box[0][3] - box[0][1] # region.save(out_image) print(person_image_height) # 计算当前用户身高 # 可根据参照物(本例采用椅子作为参照物,其实际高度为96cm,在固定距离下该参照物在图像中像素值为230)实际高度与图像高度像素, # 获取人物图像像素高度。具体调参需在具体环境下进行调参 # 此方法存在较大的误差,故结果仅供趣味输出,追求准确仍需具体输入准确值 person_height = (person_image_height * 96) / 230 print("person_height: %.2fcm \n" % (person_height))
def main(argv=None): # GPU配置 # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) # config = tf.ConfigProto(gpu_options=gpu_options,log_device_placement=False,) # 类别、视频或图像输入 classes = load_coco_names(args.class_names) vid = cv2.VideoCapture(args.input_video) video_frame_cnt = int(vid.get(7)) # AVI:10148 RSTP: 中无总帧数属性 视频文件中的帧数 timeF = 10 # 分帧率 130ms配合2 fpsnum = int(vid.get(1)) # 基于以0开始的被捕获或解码的帧索引 if (fpsnum % timeF == 0): for i in range(video_frame_cnt): ret, img_ori = vid.read() # 图像填充 img_ori = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB) img_ori = Image.fromarray(img_ori) # CV2图片转PIL img_resized = letter_box_image(img_ori,img_ori.size[1], img_ori.size[0], args.size, args.size, 128) img_resized = img_resized.astype(np.float32) # 图像插值 # img = cv2.resize(img_ori, (args.size, args.size)) # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # cv2默认为bgr顺序 # img_resized = np.asarray(img, np.float32) # 编码方式1 # scipy.misc.imsave(args.temp_img, img_resized) # _, jpeg_bytes = base64_encode_img(args.temp_img) # 编码方式2 img_encode = cv2.imencode('.jpg', img_resized)[1] data_encode = np.array(img_encode) jpeg_bytes = data_encode.tostring() start_time = time.time() # 服务器通讯配置 channel = grpc.insecure_channel(args.server) stub = prediction_service_pb2.PredictionServiceStub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = 'yolov3_2' request.model_spec.signature_name = 'predict_images' # 等待服务器答复 request.inputs['images'].CopyFrom(tf.contrib.util.make_tensor_proto(jpeg_bytes, shape=[1])) response = stub.Predict(request, 10.0) # 对返回值进行操作 results = {} for key in response.outputs: tensor_proto = response.outputs[key] nd_array = tf.contrib.util.make_ndarray(tensor_proto) results[key] = nd_array detected_boxes = results['scores'] # nms计算 filtered_boxes = non_max_suppression(detected_boxes,confidence_threshold=args.conf_threshold,iou_threshold=args.iou_threshold) end_time = time.time() difference_time = end_time - start_time # 网络运行时间 # 画图 draw_boxes(filtered_boxes, img_ori, classes, (args.size, args.size), True) # 输出图像 cv2charimg = cv2.cvtColor(np.array(img_ori), cv2.COLOR_RGB2BGR) # PIL图片转cv2 图片 cv2.putText(cv2charimg, '{:.2f}ms'.format((difference_time) * 1000), (40, 40), 0, fontScale=1, color=(0, 255, 0), thickness=2) cv2.imshow('image', cv2charimg) if cv2.waitKey(1) & 0xFF == ord('q'): # 视频退出 break
def show_camera(sess, boxes, inputs): # To flip the image, modify the flip_method parameter (0 and 2 are the most common) classes = load_coco_names(FLAGS.class_names) print(gstreamer_pipeline(flip_method=0)) cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=0), cv2.CAP_GSTREAMER) if cap.isOpened(): window_handle = cv2.namedWindow('CSI Camera', cv2.WINDOW_AUTOSIZE) while cv2.getWindowProperty('CSI Camera', 0) >= 0: ret_val, img = cap.read() cv2_im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im) img_resized = letter_box_image(pil_im, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) draw_boxes(filtered_boxes, pil_im, classes, (FLAGS.size, FLAGS.size), True) img = np.array(pil_im) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) cv2.imshow('CSI Camera', img) keyCode = cv2.waitKey(30) & 0xff if keyCode == 27: break cap.release() cv2.destroyAllWindows() else: print('Unable to open camera')
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) img = Image.open(FLAGS.input_img) img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) classes = load_coco_names(FLAGS.class_names) if FLAGS.frozen_model: t0 = time.time() frozenGraph = load_graph(FLAGS.frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) #print(frozenGraph.inputs) #print(frozenGraph.outputs) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) with tf.Session(graph=frozenGraph, config=config) as sess: t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) else: if FLAGS.tiny: model = yolo_v3_tiny.yolo_v3_tiny elif FLAGS.spp: model = yolo_v3.yolo_v3_spp else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size, FLAGS.data_format) saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) with tf.Session(config=config) as sess: t0 = time.time() saver.restore(sess, FLAGS.ckpt_file) print('Model restored in {:.2f}s'.format(time.time() - t0)) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True) img.save(FLAGS.output_img)
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) classes = load_coco_names(FLAGS.class_names) t0 = time.time() frozenGraph = load_graph(FLAGS.frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) with tf.Session(graph=frozenGraph, config=config) as sess: t0 = time.time() print(FLAGS.input_img) cap = cv2.VideoCapture(FLAGS.input_img) # cap = cv2.VideoCapture(0) fps = cap.get(cv2.CAP_PROP_FPS) width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) videoWriter = cv2.VideoWriter( "output.mp4", cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, (int(width), int(height))) while (cap.isOpened()): ret, frame = cap.read() if ret == True: frame = cv2.flip(frame, 0) img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True) fimg = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) cv2.imshow("show", fimg) videoWriter.write(fimg) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break cap.release() videoWriter.release()
def get_classification(self, cv_image): """Determines the color of the traffic light in the image Args: image (cv::Mat): image containing the traffic light Returns: int: ID of traffic light color (specified in styx_msgs/TrafficLight) """ #TODO implement light color prediction image = Image.fromarray(cv_image) img_resized = letter_box_image(image, options['image_size'], options['image_size'], 128) img_resized = img_resized.astype(np.float32) boxes, inputs = get_boxes_and_inputs_pb(self.frozenGraph) # with tf.Session(graph=self.frozenGraph, config=self.config) as sess: t0 = time.time() detected_boxes = self.sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=options['thresh'], iou_threshold=options['iou']) print("Predictions found in {:.2f}s".format(time.time() - t0)) inp = filtered_boxes.get(9) inp_new = dict() inp_new[9] = inp if (inp_new[9] != None): if (len(inp_new[9]) > 0): for cls, bboxs in inp_new.items(): for box, score in bboxs: box = convert_to_original_size( box, (options['image_size'], options['image_size']), np.array(image.size), True) # print(inp_new) a = analyze_color(inp_new, cv_image) # print(a) light_color = state_predict(a) print("the light color is {}".format(light_color)) if light_color: if light_color == 'YELLOW': return TrafficLight.YELLOW elif light_color == 'RED': return TrafficLight.RED elif light_color == 'GREEN': return TrafficLight.GREEN return TrafficLight.UNKNOWN
def main(argv=None): img = Image.open('out/images/19.png') # img = Image.open('city.png') img_resized = letter_box_image(img, size, size, 128) img_resized = img_resized.astype(np.float32) classes = load_coco_names('coco.names') if frozen_model: t0 = time.time() frozenGraph = load_graph(frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) with tf.Session(graph=frozenGraph) as sess: t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) else: if tiny: model = yolo_v3_tiny.yolo_v3_tiny else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), size, data_format) saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) with tf.Session() as sess: t0 = time.time() saver.restore(sess, ckpt_file) print('Model restored in {:.2f}s'.format(time.time() - t0)) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression(detected_boxes, confidence_threshold=conf_threshold, iou_threshold=iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) draw_boxes(filtered_boxes, img, classes, (size, size), True) img.save('out_check.png')
def detect(self, img): img_resized = letter_box_image(img, self.size[0], self.size[1], 128) img_resized = img_resized.astype(np.float32) run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # # import pdb # pdb.set_trace() detected_boxes = self.sess.run(self.boxes, feed_dict={self.inputs: [img_resized]}, options=run_options, run_metadata=run_metadata) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=self.conf_threshold, iou_threshold=self.iou_threshold) return filtered_boxes
def infer(self, input_image): # img = Image.open('test_images/car2.png') img = input_image.copy() img_resized = letter_box_image(img, 416, 416, 128) img_resized = img_resized.astype(np.float32) t0 = time.time() detected_boxes = self.sess.run(self.boxes, feed_dict={self.inputs: [img_resized]}) filtered_boxes = non_max_suppression(detected_boxes, confidence_threshold=0.8, iou_threshold=0.5) # print(filtered_boxes) print("Predictions found in {:.3f}s".format(time.time() - t0)) draw_boxes(filtered_boxes, img, self.classes, (416, 416), True) # img.save('out.png') return img,filtered_boxes
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, # inter_op_parallelism_threads=0, # intra_op_parallelism_threads=0, # device_count={"CPU": 6} ) img = Image.open(FLAGS.input_img) if FLAGS.keep_aspect_ratio: img_resized = utils.letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) else: img_resized = img.resize((FLAGS.size, FLAGS.size), Image.BILINEAR) img_resized = np.asarray(img_resized, dtype=np.float32) classes = utils.load_names(FLAGS.class_names) frozenGraph = utils.load_graph(FLAGS.frozen_model) boxes, inputs = utils.get_boxes_and_inputs_pb(frozenGraph) with tf.Session(graph=frozenGraph, config=config) as sess: t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) print("Predictions found in {:.2f}s".format(time.time() - t0)) filtered_boxes = utils.non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold)[0] utils.draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), FLAGS.keep_aspect_ratio) img.save(FLAGS.output_img)
def get_score_from_image(img_fp, gpu_options, config, model): img = Image.open(img_fp) img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) classes = load_coco_names(FLAGS.class_names) inference_start_time = time.time() if FLAGS.frozen_model: boxes, inputs = get_boxes_and_inputs_pb(model) with tf.Session(graph=model, config=config) as sess: detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) else: if FLAGS.tiny: model = yolo_v3_tiny.yolo_v3_tiny elif FLAGS.spp: model = yolo_v3.yolo_v3_spp else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size, FLAGS.data_format) saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) with tf.Session(config=config) as sess: saver.restore(sess, FLAGS.ckpt_file) detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) total_inference_time = time.time() - inference_start_time filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) return get_person_scores(filtered_boxes, classes), round(total_inference_time * 1000, 3)
def detect(self, frame): t0 = time() frame = letter_box_image( frame, (self.params["input_w"], self.params["input_h"]), 128) img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) #img = np.expand_dims(img, axis=0).astype(np.float32) img = img[np.newaxis, ...].astype(np.float32) img = np.uint8(img / self.scale + self.zero_point) self.interpreter.set_tensor(self.input_details[0]['index'], img) self.interpreter.invoke() # The function `get_tensor()` returns a copy of the tensor data. # Use `tensor()` in order to get a pointer to the tensor. y1 = self.interpreter.get_tensor(self.output_details[0]['index']) y2 = self.interpreter.get_tensor(self.output_details[1]['index']) y1 = self.scale1 * (np.float32(y1) - self.zero_point1) y2 = self.scale2 * (np.float32(y2) - self.zero_point2) detected_boxes = [y1, y2] filtered_boxes = non_max_suppression( detected_boxes, self.params, confidence_threshold=self.prob_threshold, iou_threshold=self.iou_threshold) draw_boxes(filtered_boxes, frame, self.classes, (self.params["input_w"], self.params["input_h"]), self.colors, True) inf_time = time() - t0 fps = 1. / inf_time cv2.putText(frame, "FPS: {:.1f}".format(fps), (10, 20), cv2.FONT_HERSHEY_DUPLEX, 0.40, (0, 0, 255), 1, cv2.LINE_AA) return frame
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) # img = Image.open(FLAGS.input_img) # img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) # img_resized = img_resized.astype(np.float32) classes = load_coco_names(FLAGS.class_names) # if FLAGS.frozen_model: t0 = time.time() frozenGraph = load_graph(FLAGS.frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) ### Start inference on Video cap = cv2.VideoCapture(FLAGS.input_video) cap.open(FLAGS.input_video) # Grab the shape of the input width = int(cap.get(3)) height = int(cap.get(4)) with tf.Session(graph=frozenGraph, config=config) as sess: while cap.isOpened(): flag, img = cap.read() if not flag: break key_pressed = cv2.waitKey(27) img = cv2.cvtColor( img, cv2.COLOR_BGR2RGB) #Image.open(FLAGS.input_video) # convert from cv2 image to PIL image img = Image.fromarray(img) img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) classes = load_coco_names(FLAGS.class_names) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) infer_time = time.time() - t0 filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True) img = np.asarray(img) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) cv2.putText( img, "infer time= " + str('{:.1f}'.format(infer_time * 1000)) + " ms", (80, 40), 0, 0.5, (250, 0, 0), 1) ### Send the frame to the FFMPEG server ### sys.stdout.buffer.write(img) sys.stdout.flush() # Break if escape key pressed if key_pressed == 27: break # Release the out capture, and destroy any OpenCV windows cap.release() cv2.destroyAllWindows()
def main(argv=None): # GPU配置 # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) # config = tf.ConfigProto(gpu_options=gpu_options,log_device_placement=False,) # 类别、视频或图像输入 classes = load_coco_names(args.class_names) # 图像填充 # img = Image.open(args.input_img) # img_resized = letter_box_image(img, img.size[1], img.size[0], args.size, args.size, 128) # img_resized = img_resized.astype(np.float32) # 图像插值 # img_ori = cv2.imread(args.input_img) # height_ori, width_ori = img_ori.shape[:2] # img = cv2.resize(img_ori, (args.size, args.size)) # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # cv2默认为bgr顺序 # img_resized = np.asarray(img, np.float32) img_ori = cv2.imread(args.input_img) img_ori = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB) img = Image.fromarray(img_ori) # CV2图片转PIL img_resized = letter_box_image(img, img.size[1], img.size[0], args.size, args.size, 128) img_resized = img_resized.astype(np.float32) scipy.misc.imsave(args.temp_img, img_resized) _, jpeg_bytes = base64_encode_img(args.temp_img) # 服务器通讯配置 channel = grpc.insecure_channel(args.server) stub = prediction_service_pb2.PredictionServiceStub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = 'yolov3_2' request.model_spec.signature_name = 'predict_images' # 等待服务器答复 t0 = time.time() request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(jpeg_bytes, shape=[1])) response = stub.Predict(request, 10.0) # 对返回值进行操作 results = {} for key in response.outputs: tensor_proto = response.outputs[key] nd_array = tf.contrib.util.make_ndarray(tensor_proto) results[key] = nd_array detected_boxes = results['scores'] # nms计算 filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=args.conf_threshold, iou_threshold=args.iou_threshold) # 画图 draw_boxes(filtered_boxes, img, classes, (args.size, args.size), True) # 输出图像 plt.figure('判断结果') font = { 'family': 'simhei', 'weight': 'normal', 'size': 18, } plt.title('判断耗时:{:.2f}ms'.format((time.time() - t0) * 1000), font) plt.imshow(img) plt.show() print('done!')
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=cfg.GPU_MEMORY_FRACTION) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) classes = load_coco_names(cfg.CLASS_NAME) if cfg.FROZEN_MODEL: pass # # t0 = time.time() # frozenGraph = load_graph(cfg.FROZEN_MODEL) # print("Loaded graph in {:.2f}s".format(time.time()-t0)) # # boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) # # with tf.Session(graph=frozenGraph, config=config) as sess: # t0 = time.time() # detected_boxes = sess.run( # boxes, feed_dict={inputs: [img_resized]}) else: if cfg.TINY: model = yolo_v3_tiny.yolo_v3_tiny else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), cfg.IMAGE_SIZE, cfg.DATA_FORMAT) # boxes : coordinates of top left and bottom right points. saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) # # for specific object recognition # vgg16_image_size = vgg_16.default_image_size s_class_names = cfg.S_CLASS_PATH s_classes = [l.split(" ") for l in open(s_class_names, "r")] if len(s_classes[0]): # classフォーマットが "id classname"の場合 s_labels = {int(l[0]): l[1].replace("\n", "") for l in s_classes} else: # classフォーマットが "classname"のみの場合 s_labels = { i: l.replace("\n", "") for i, l in enumerate(s_classes) } num_classes_s = len(s_labels.keys()) num_classes_extractor = cfg.S_EXTRACTOR_NUM_OF_CLASSES s_model = cfg.S_CKPT_FILE extractor_name = cfg.S_EXTRACTOR_NAME # specific_pred, [cropped_images_placeholder, original_images_placeholder, keep_prob, is_training] = specific_object_recognition(vgg16_image_size, num_classes_s, num_classes_extractor, extractor_name) specific_pred, [cropped_images_placeholder, keep_prob, is_training] = specific_object_recognition( vgg16_image_size, num_classes_s) variables_to_restore = slim.get_variables_to_restore( include=["vgg_16"]) restorer = tf.train.Saver(variables_to_restore) with tf.Session(config=config) as sess: t0 = time.time() saver.restore(sess, cfg.CKPT_FILE) print('YOLO v3 Model restored in {:.2f}s'.format(time.time() - t0), "from:", cfg.CKPT_FILE) t0 = time.time() restorer.restore(sess, s_model) print( 'Specific object recognition Model restored in {:.2f}s'.format( time.time() - t0), "from:", s_model) # prepare test set with open(cfg.TEST_FILE_PATH, 'r') as f: f_ = [line.rstrip().split() for line in f] data = [ [l, get_annotation(l[0], txtname=cfg.GT_INFO_FILE_NAME)] for l in f_ ] # data: [[(path_str, label), [frame, center_x, center_y, size_x, size_y]],...] data = [l for l in data if l[1] is not None] # annotationを取得できなかった画像は飛ばす def is_cropped_file_Exist(orig_filepath): d, file = os.path.split(orig_filepath) cropped_d = d + "_cropped" cropped_file = os.path.join(cropped_d, file) return os.path.exists(cropped_file) data = [l for l in data if is_cropped_file_Exist(l[0][0])] # 対となるcrop画像がない画像は飛ばす # log f = open(cfg.OUTPUT_LOG_PATH, 'w') writer = csv.writer(f, lineterminator='\n') writer.writerow([ 'image path', 'movie_name', 'IoU', 'Average Precision', 'Recall', 'is RoI detected?', 'is label correct?', 'gt label', 'pred label', 'detect time', 'recog time' ]) iou_list = [] # 画像毎のiouのリスト ap_list = [] # 画像毎のaverage precisionのリスト # iterative run for count, gt in enumerate( data ): # gt: [(path_str, label), [frame, center_x, center_y, size_x, size_y] # for evaluation gt_box = [float(i) for i in gt[1][1:]] gt_box = [ gt_box[0] - (gt_box[2] / 2), gt_box[1] - (gt_box[3] / 2), gt_box[0] + (gt_box[2] / 2), gt_box[1] + (gt_box[3] / 2) ] gt_label = int(gt[0][1]) ious = [] precisions = [] print(count, ":", gt[0][0]) img = Image.open(gt[0][0]) img_resized = letter_box_image(img, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE, 128) img_resized = img_resized.astype(np.float32) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=cfg.CONF_THRESHOLD, iou_threshold=cfg.IOU_THRESHOLD) detect_time = time.time() - t0 print("detected boxes in :{:.2f}s ".format(detect_time), filtered_boxes) # specific object recognition! np_img = np.array(img) / 255 target_label = 0 # seesaaの場合 (データセットのクラス番号毎にここを変える.) if len(filtered_boxes.keys()) != 0: # 何かしら検出された時 is_detected = True for cls, bboxs in filtered_boxes.items(): if cls == target_label: # ターゲットラベルなら print("target class detected!") bounding_boxes = [] bboxs_ = copy.deepcopy( bboxs ) # convert_to_original_size()がbboxを破壊してしまうため for box, score in bboxs: orig_size_box = convert_to_original_size( box, np.array((cfg.IMAGE_SIZE, cfg.IMAGE_SIZE)), np.array(img.size), True) # print(orig_size_box) cropped_image = np_img[ int(orig_size_box[1]):int(orig_size_box[3] ), int(orig_size_box[0]):int(orig_size_box[2] )] bounding_boxes.append(cropped_image) # input_original = cv2.resize(padding(np_img), (vgg16_image_size, vgg16_image_size)) # input_original = np.tile(input_original, (len(bounding_boxes), 1, 1, 1)) # croppedと同じ枚数分画像を重ねる cropped_images = [] for bbox in bounding_boxes: cropped_images.append( cv2.resize( padding(bbox), (vgg16_image_size, vgg16_image_size))) input_cropped = np.asarray(cropped_images) t0 = time.time() pred = sess.run(specific_pred, feed_dict={ cropped_images_placeholder: input_cropped, keep_prob: 1.0, is_training: False }) recog_time = time.time() - t0 print("Predictions found in {:.2f}s".format( recog_time)) pred_label = [s_labels[i] for i in pred.tolist() ] # idからクラス名を得る classes = [ s_labels[i] for i in range(num_classes_s) ] filtered_boxes = {} for i, n in enumerate(pred.tolist()): if n in filtered_boxes.keys(): filtered_boxes[n].extend([bboxs_[i]]) else: filtered_boxes[n] = [bboxs_[i]] # calc IoU, mAP # gt: [(path_str, label), [frame, center_x, center_y, size_x, size_y] # print(filtered_boxes) iou = 0.0 for key in filtered_boxes.keys(): for pred_box in filtered_boxes[key]: p_box = copy.deepcopy(pred_box[0]) orig_scale_p_box = convert_to_original_size( p_box, np.array( (cfg.IMAGE_SIZE, cfg.IMAGE_SIZE)), np.array(img.size), True) conf = pred_box[1] # print(gt_label, key) if key == gt_label: # 予測したクラスがGTと同じの時 # print(orig_scale_p_box, gt_box) iou = _iou( orig_scale_p_box, gt_box ) # :param box1: array of 4 values (top left and bottom right coords): [x0, y0, x1, x2] precision = calc_precision( orig_scale_p_box, gt_box) is_label_correct = True else: iou = 0.0 precision = 0.0 is_label_correct = False # print("IoU:", iou) ious.append(iou) print("Precision:", precision) precisions.append(precision) else: # ターゲットラベルじゃない時 pass else: #何も検出されなかった時 is_detected = False is_label_correct = "None" pred_label = ["None"] average_iou = sum(ious) / (len(ious) + 1e-05) # 画像一枚のiou print("average IoU:", average_iou) iou_list.append(average_iou) print("mean average IoU:", sum(iou_list) / (len(iou_list) + 1e-05)) ap = sum(precisions) / (len(precisions) + 1e-05) ap_list.append(ap) print("Average Precision:", ap) print("mean Average Precision:", sum(ap_list) / (len(ap_list) + 1e-05)) draw_boxes(filtered_boxes, img, classes, (cfg.IMAGE_SIZE, cfg.IMAGE_SIZE), True) # draw GT draw = ImageDraw.Draw(img) color = (0, 0, 0) draw.rectangle(gt_box, outline=color) draw.text(gt_box[:2], 'GT_' + s_labels[gt_label], fill=color) img.save( os.path.join( cfg.OUTPUT_IMAGE_DIR, '{0:04d}_'.format(count) + os.path.basename(gt[0][0]))) writer.writerow([ gt[0][0], os.path.basename(os.path.dirname(gt[0][0])), average_iou, ap, 'Recall', is_detected, is_label_correct, s_labels[gt_label], pred_label[0], detect_time, recog_time ]) f.close() print("proc finished.")
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=cfg.GPU_MEMORY_FRACTION) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) classes = load_coco_names(cfg.CLASS_NAME) if cfg.FROZEN_MODEL: pass else: if cfg.TINY: model = yolo_v3_tiny.yolo_v3_tiny else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), cfg.IMAGE_SIZE, cfg.DATA_FORMAT) # boxes : coordinates of top left and bottom right points. saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) with tf.Session(config=config) as sess: t0 = time.time() saver.restore(sess, cfg.CKPT_FILE) print('YOLO v3 Model restored in {:.2f}s'.format(time.time() - t0), "from:", cfg.CKPT_FILE) # prepare test set with open(cfg.TEST_FILE_PATH, 'r') as f: f_ = [line.rstrip().split() for line in f] data = [ [l, get_annotation(l[0], txtname=cfg.GT_INFO_FILE_NAME)] for l in f_ ] # data: [[(path_str, label), [frame, center_x, center_y, size_x, size_y]],...] data = [l for l in data if l[1] is not None] # annotationを取得できなかった画像は飛ばす def is_cropped_file_Exist(orig_filepath): d, file = os.path.split(orig_filepath) cropped_d = d + "_cropped" cropped_file = os.path.join(cropped_d, file) return os.path.exists(cropped_file) data = [l for l in data if is_cropped_file_Exist(l[0][0])] # 対となるcrop画像がない画像は飛ばす # log f = open(cfg.OUTPUT_LOG_PATH, 'w') writer = csv.writer(f, lineterminator='\n') writer.writerow([ 'image path', 'class/movie_name', 'IoU', 'TP', 'FP', 'FN', 'Average Precision', 'gt label', ' highest_conf_label', 'detect time' ]) total_iou = [] # 画像毎のiouのリスト total_tp = 0 # TP : IoU > 0.5かつ GT==Pred_classのPositiveの数 total_fp = 0 # FP : TPの条件以外のPositivesの数 total_fn = 0 # FN : 検出されなかったGT total_ap = [ ] # 画像毎のaverage precisionのリスト.AP : TP / total positives # iterative run for count, gt in enumerate( data ): # gt: [(path_str, label), [frame, center_x, center_y, size_x, size_y] iou = 0.0 tp = 0 fp = 0 fn = 0 ap = 0.0 # for evaluation gt_box = [float(i) for i in gt[1][1:]] gt_box = [ gt_box[0] - (gt_box[2] / 2), gt_box[1] - (gt_box[3] / 2), gt_box[0] + (gt_box[2] / 2), gt_box[1] + (gt_box[3] / 2) ] gt_label = int(gt[0][1]) # GT のクラス gt_anno = {gt_label: gt_box} print(count, ":", gt[0][0]) img = Image.open(gt[0][0]) img_resized = letter_box_image(img, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE, 128) img_resized = img_resized.astype(np.float32) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=cfg.CONF_THRESHOLD, iou_threshold=cfg.IOU_THRESHOLD) detect_time = time.time() - t0 print("detected boxes in :{:.2f}s ".format(detect_time), filtered_boxes) print(filtered_boxes) if len(filtered_boxes.keys()) != 0: # 何かしら検出された時 [tp, fp, fn], iou, precision, highest_conf_label = evaluate( filtered_boxes, gt_anno, img, thresh=0.5) # 一枚の画像の評価を行う else: # 何も検出されなかった時 iou = 0.0 precision = 0.0 tp = 0 fp = 0 fn = len(gt_anno.values()) highest_conf_label = -1 # # print(filtered_boxes) # if len(filtered_boxes.keys()) != 0: # 何かしら検出された時 # is_detected = True # [tp, fp, fn], iou, ap, highest_conf_label = evaluate(filtered_boxes, gt_anno, img, thresh=0.1) #一枚の画像の評価を行う # # else: # 何も検出されなかった時 # is_detected = False # iou = 0.0 # ap = 0.0 # tp = 0 # fp = 0 # fn = len(gt_anno.values()) # highest_conf_label = -1 # total_iou.append(iou) total_ap.append(precision) total_tp += tp total_fp += fp total_fn += fn print("IoU:", iou) print("mean average IoU:", sum(total_iou) / (len(total_iou) + 1e-05)) print("AP:", precision) print("mAP:", sum(total_ap) / (len(total_ap) + 1e-05)) # # # # draw pred_bbox # draw_boxes(filtered_boxes, img, classes, (cfg.IMAGE_SIZE, cfg.IMAGE_SIZE), True) # # draw GT # draw = ImageDraw.Draw(img) # color = (0, 0, 0) # draw.rectangle(gt_box, outline=color) # draw.text(gt_box[:2], 'GT_'+classes[gt_label], fill=color) # # img.save(os.path.join(cfg.OUTPUT_DIR, '{0:04d}_'.format(count)+os.path.basename(gt[0][0]))) movie_name = os.path.basename(os.path.dirname(gt[0][0])) movie_parant_dir = os.path.basename( os.path.dirname(os.path.dirname(gt[0][0]))) pred_label = classes[ highest_conf_label] if highest_conf_label != -1 else "None" save_messe = [ gt[0][0], os.path.join(movie_name, movie_parant_dir), iou, tp, fp, fn, precision, classes[gt_label], pred_label, detect_time ] writer.writerow(save_messe) print(save_messe) print("total tp :", total_tp) print("total fp :", total_fp) print("total fn :", total_fn) f.close() print("proc finished.")
output_img = args.output class_names = args.labels params_ = args.params gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) with open(params_, "r") as readFile: params = json.load(readFile) origin_img = cv2.imread(input_img) resized_img = letter_box_image(origin_img, (params["input_w"], params["input_h"]), 128) img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB) img = img.astype(np.float32) classes = load_coco_names(class_names) colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for _ in range(len(classes))] frozenGraph = load_graph(frozen_model_path) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) outputs = {} with tf.Session(graph=frozenGraph, config=config) as sess: for i in range(len(boxes)): outputs[boxes[i].name] = sess.run(boxes[i], feed_dict={inputs: [img]}) detected_boxes = list(outputs.values())
def main(argv=None): img = Image.open('city.png') img_resized = letter_box_image(img, size, size, 128) img_resized = img_resized.astype(np.float32) classes = load_coco_names('coco.names') fake_boxes = {2: [(np.array([300, 200, 370, 250]), 1.)]} generated_boxes, g_indices = generate_ground_truth(fake_boxes, size, 0.4) draw_boxes(copy.deepcopy(generated_boxes), img, classes, (size, size), True) draw_boxes(copy.deepcopy(fake_boxes), img, classes, (size, size), True) # draw_boxes(filtered_boxes, img, classes, (size, size), True) img.save('out_fakeboxes.jpg') mask = np.zeros([1, 10647]) for cls, indices in g_indices.items(): mask[0, indices] = 1 gt_tensor = np.zeros([1, 10647, 4 + 1 + len(classes)]) for cls, boxes in generated_boxes.items(): for i, box in enumerate(boxes): class_mask = np.zeros([len(classes)]) class_mask[cls] = 1 gt_row = [*np.asarray(box[0]), 1., *class_mask] gt_tensor[0, g_indices[cls][i]] = gt_row if frozen_model: t0 = time.time() frozenGraph = load_graph(frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) with frozenGraph.as_default(): fake_gt = tf.constant(gt_tensor, dtype=tf.float32) mask_tensor = tf.constant(mask, dtype=tf.float32) fake_loss = mse(fake_gt, boxes) * mask_tensor fake_loss = tf.reduce_mean(fake_loss, axis=-1) grad_op = tf.gradients(fake_loss, inputs) with tf.Session(graph=frozenGraph) as sess: t0 = time.time() for iters in range(num_iterations): grads = sess.run(grad_op, feed_dict={inputs: [img_resized]}) grad = grads[0][0] sigma = (iters * 4.0) / num_iterations + 0.5 grad_smooth1 = gaussian_filter(grad, sigma=sigma) grad_smooth2 = gaussian_filter(grad, sigma=sigma * 2) grad_smooth3 = gaussian_filter(grad, sigma=sigma * 0.5) grad = (grad_smooth1 + grad_smooth2 + grad_smooth3) step_size_scaled = step_size / (np.std(grad) + 1e-8) # Update the image by following the gradient. mod = grad * step_size_scaled grad_img = Image.fromarray(np.uint8(mod + 128)) grad_img.save('out/grads/{}.png'.format(iters)) img_resized = np.clip(img_resized - mod, 0, 255) new_img = Image.fromarray(np.uint8(img_resized)) new_img.save('out/images/{}.png'.format(iters)) else: if tiny: model = yolo_v3_tiny.yolo_v3_tiny else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), size, data_format) saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) with tf.Session() as sess: t0 = time.time() saver.restore(sess, ckpt_file) print('Model restored in {:.2f}s'.format(time.time() - t0)) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})
def prepare_image(self,img): cv2_im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im) img_resized = letter_box_image(pil_im, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) return img_resized,pil_im
def main(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) #----------- Initialization -------------- # Settings data+ following initializations classes = load_coco_names(class_names) cap = cv2.VideoCapture('video.avi') ret, _ = cap.read() plt.ion() frame_index = 0 # defining model if frozen_model: #The protobuf file contains the graph definition as well as the weights of the model. t0 = time.time() # loading model and related weights frozenGraph = load_graph(frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) with tf.device("/GPU:0"): with tf.Session(graph=frozenGraph, config=config) as sess: # Is there any frame to read? while ret: frame_index += 1 ret, frame = cap.read() # applying transformation and apropriate changes to frame to feed the loaded model img = Image.fromarray( cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) img_resized = letter_box_image(img, size, size, 128) img_resized = img_resized.astype(np.float32) t0 = time.time() # feeding tensor to loaded model detected_boxes = sess.run( boxes, feed_dict={inputs: [img_resized]}) #obtaining the bounding boxes of detected objects filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=conf_threshold, iou_threshold=iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) #croping and extracting bounding boxes of detected objects in frame rois = draw_boxes(filtered_boxes, img, classes, (size, size), True) if len(rois) > 0: for i in range(len(rois)): # saving the cropped images in Hard Disk = './extracted_regions/' Directory rois[i].save('./extracted_regions/frame' + str(frame_index) + '_ExtObj_' + str(i) + '.jpg') plt.imshow(np.array(img)) plt.pause(0.02) plt.show() else: # using ckpt file for loading the model weights #----------- Initialization -------------- saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) cap = cv2.VideoCapture('video.avi') ret, _ = cap.read() plt.ion() t0 = time.time() frame_index = 0 # loading model and related weights if tiny: model = yolo_v3_tiny.yolo_v3_tiny else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), size, data_format) t0 = time.time() saver.restore(sess, ckpt_file) print('Model restored in {:.2f}s'.format(time.time() - t0)) with tf.Session(config=config) as sess: # is there any frame to read? while ret: frame_index += 1 ret, frame = cap.read() # applying transformation and apropriate changes to frame to feed the loaded model img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) img_resized = letter_box_image(img, size, size, 128) img_resized = img_resized.astype(np.float32) t0 = time.time() # feeding tensor to loaded model detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) #obtaining the bounding boxes of detected objects filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=conf_threshold, iou_threshold=iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) #croping and extracting bounding boxes of detected objects rois = draw_boxes(filtered_boxes, img, classes, (size, size), True) if len(rois) > 0: for i in range(len(rois)): # saving the cropped images in Hard Disk = './extracted_regions/' Directory rois[i].save('./extracted_regions/frame' + str(frame_index) + '_ExtObj_' + str(i) + '.jpg') plt.imshow(np.array(img)) plt.pause(0.02) plt.show()
conf_threshold = 0.5 iou_threshold = 0.4 classes = utils.load_coco_names(class_names) out_image = './person.jpg' t0 = time.time() frozenGraph = utils.load_graph(frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) sess = tf.Session(graph=frozenGraph) # image = cv2.imread(input_image) # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # image = Image.fromarray(image.astype('uint8')).convert('RGB') # 上面三步等同于下面的Image.open()操作 image = Image.open(input_image) img_resized = utils.letter_box_image(image, input_size, input_size, 128) img_resized = img_resized.astype(np.float32) boxes, inputs = utils.get_boxes_and_inputs_pb(frozenGraph) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = utils.non_max_suppression(detected_boxes, confidence_threshold=conf_threshold, iou_threshold=iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) if filtered_boxes: # if len(filtered_boxes[0][:]) == 1: img, region, score, box = utils.draw_boxes(filtered_boxes, image, classes, (input_size, input_size), True) # box = np.array(box) # print(box)
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) # import sys # result=[] # with open("VOC2007/ImageSets/Main/test.txt",'r') as f: # for line in f: # result.append(f) # print(result) # with open("VOC2007/ImageSets/Main/test.txt", 'r') as f: # line =[] # while True: # line = f.readline() # 逐行读取 # if not line: # break # print(line) # 这里加了 ',' 是为了避免 print 自动换行 results = [] f = open("VOC2007/ImageSets/Main/test.txt", "r") lines = f.readlines() #读取全部内容 ,并以列表方式返回 for line in lines: results.append(line.strip('\n').split(',')[0]) # if FLAGS.frozen_model: # t0 = time.time() # frozenGraph = load_graph(FLAGS.frozen_model) # print("Loaded graph in {:.2f}s".format(time.time()-t0)) # boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) # with tf.Session(graph=frozenGraph, config=config) as sess: # t0 = time.time() # detected_boxes = sess.run( # boxes, feed_dict={inputs: [img_resized]}) # else: # if FLAGS.tiny: # model = yolo_v3_tiny.yolo_v3_tiny # elif FLAGS.spp: # model = yolo_v3.yolo_v3_spp # else: model = yolo_v3.yolo_v3 classes = load_coco_names(FLAGS.class_names) boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size, FLAGS.data_format) saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) with tf.Session(config=config) as sess: t0 = time.time() saver.restore(sess, FLAGS.ckpt_file) print('Model restored in {:.2f}s'.format(time.time() - t0)) t0 = time.time() # file_list = os.listdir('input/') for file in results: try: print('VOC2007/JPEGImages/' + str(file) + '.jpg') image = cv2.imread('VOC2007/JPEGImages/' + str(file) + '.jpg') print(image.shape) img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # img = Image.open('VOC2007/JPEGImages/'+str(file)+'.jpg') img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True) img.save('output/' + file + '.jpg') except ValueError: pass