def obj_detect(image): input_size = cfg['input_size'] image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) original_image = image original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] # model infer pred_sbbox, pred_mbbox, pred_lbbox = obj_detect_infer.infer(image_data) # post process get final bboxes num_classes = cfg['num_classes'] pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.5) bboxes = utils.nms(bboxes, 0.45, method='nms') res = {} objs = [] for bbox in bboxes: objs.append({'x1':bbox[0], 'y1':bbox[1], 'x2':bbox[2], 'y2':bbox[3], 'confidence':bbox[4], 'class':bbox[5]}) res['objs'] = objs return res
def predict(self, image): org_image = np.copy(image) org_h, org_w, _ = org_image.shape # 将 image 转成需要的格式和 大小 image_data = utils.image_preporcess(image, [self.input_size, self.input_size]) # 给图片加一个维度,加在最前面 image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = self.sess.run( [self.pred_sbbox, self.pred_mbbox, self.pred_lbbox], feed_dict={ self.input_data: image_data, self.trainable: False }) # concatenate() 多个数组的拼接 # reshape(data, newshape) # newshape=(-1, nums) 表示生成一个 (rows, cols)的形状,cols = nums, rows的值根据cols的值计算 pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + self.num_classes)), np.reshape(pred_mbbox, (-1, 5 + self.num_classes)), np.reshape(pred_lbbox, (-1, 5 + self.num_classes)) ], axis=0) # 筛选出有效,在范围内的框 bboxes = utils.postprocess_boxes(pred_bbox, (org_h, org_w), self.input_size, self.score_threshold) # 筛选出一个最合适的框 bboxes = utils.nms(bboxes, self.iou_threshold) return bboxes
def find_bbox(path, confidence_score=0.7, iou=0.5): bbox_list = [] images = [] model = tf.keras.models.load_model('SavedModel/YOLOv3_model', compile=False) INPUT_SIZE = cfg.TEST.INPUT_SIZE SCORE_THRESHOLD = confidence_score IOU_THRESHOLD = iou for filename in os.listdir(path): full_path_image = path + '/' + filename images.append(full_path_image) for path_to_image in images: image = cv2.imread(path_to_image) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_size = image.shape[:2] image_data = utils.image_preporcess(np.copy(image), [INPUT_SIZE, INPUT_SIZE]) image_data = image_data[np.newaxis, ...].astype( np.float32) # (1, width, height, 3) pred_bbox = model.predict(image_data) pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, SCORE_THRESHOLD) bboxes = utils.nms(bboxes, IOU_THRESHOLD, method='nms') bboxes.append(path_to_image) bbox_list.append(bboxes) return bbox_list
def predict(self, image): org_image = np.copy(image) org_h, org_w, _ = org_image.shape image_data = utils.image_preporcess(image, [self.input_size, self.input_size]) image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = self.sess.run( [self.pred_sbbox, self.pred_mbbox, self.pred_lbbox], feed_dict={ self.input_data: image_data, self.trainable: False }) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + self.num_classes)), np.reshape(pred_mbbox, (-1, 5 + self.num_classes)), np.reshape(pred_lbbox, (-1, 5 + self.num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, (org_h, org_w), self.input_size, self.score_threshold) bboxes = utils.nms(bboxes, self.iou_threshold) return bboxes
def test_image(image_path, model_path): input_size = 416 original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) model = yolov3.build_for_test() # 加载tf model:model.load_weights(model_path);加载darknet model: utils.load_weights(model, model_path) utils.load_weights(model, model_path) model.summary() start_time = time.time() pred_bbox = model.predict(image_data) print('pred_bbox>>>>>>>>>>>>>>>>>', pred_bbox) end_time = time.time() print("time: %.2f ms" %(1000*(end_time-start_time))) pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) # 将416×416下的bbox坐标转换为原图上的坐标并删除部分无效box bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') # 构建原图和bbox画出坐标框 image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) image.show()
def predict(self, image): org_image = np.copy(image) org_h, org_w, _ = org_image.shape image_data = utils.image_preporcess(image, [self.input_size, self.input_size]) image_data = image_data[np.newaxis, ...] options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() pred_sbbox, pred_mbbox, pred_lbbox, conv_sbbox, conv_mbbox, conv_lbbox = self.sess.run( [ self.pred_sbbox, self.pred_mbbox, self.pred_lbbox, self.conv_sbbox, self.conv_mbbox, self.conv_lbbox ], feed_dict={ self.input_data: image_data, self.trainable: False }, options=options, run_metadata=run_metadata) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 9 + self.num_classes)), np.reshape(pred_mbbox, (-1, 9 + self.num_classes)), np.reshape(pred_lbbox, (-1, 9 + self.num_classes)) ], axis=0) bboxes1 = utils.postprocess_boxes(pred_bbox, (org_h, org_w), self.input_size, self.score_threshold) bboxes = utils.nms(bboxes1, self.iou_threshold) return bboxes
def predict(self, image_path): original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [self.input_size, self.input_size]) image_data = image_data[np.newaxis, ...] with tf.Session(graph=self.graph) as sess: pred_sbbox, pred_mbbox, pred_lbbox = self.sess.run( [ self.return_tensors[1], self.return_tensors[2], self.return_tensors[3] ], feed_dict={self.return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + self.num_classes)), np.reshape(pred_mbbox, (-1, 5 + self.num_classes)), np.reshape(pred_lbbox, (-1, 5 + self.num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, self.input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') image, label = utils.draw_bbox(original_image, bboxes) # image = Image.fromarray(image) # print(image) cv2.imwrite('static/images/test.jpg', image) # image.show() return label
def batch_bboxes(model, frames): all_image_data = [None] * len(frames) sizes = [None] * len(frames) bbbb = [[], []] for i, frame in enumerate(frames): #move frame to GPU all_image_data[i] = frame_to_gpu(frame) sizes[i] = frame.shape[:2] stacked = tf.stack(all_image_data, axis=1) trimmed = tf.squeeze(stacked) # print(trimmed.shape) # dataset = tf.data.Dataset.from_tensor_slices(stacked) # dataset = dataset.batch(2) # for im_data in dataset.as_numpy_iterator(): pred_bbox = model.predict(trimmed) print('aww yeah') pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) all_bboxes, probs, classes, image_nums = utils.postprocess_boxes( pred_bbox, sizes[0], INPUT_SIZE, 0.25) #.25 bboxes = utils.filter_people(all_bboxes, probs, classes) bboxes = utils.nms(bboxes, 0.213, method='nms') bboxes = np.array(bboxes) # bboxes2 = utils.nms(bboxes2, 0.213, method='nms') # bboxes2 = np.array(bboxes2) print('frame1')
def predicate(original_image, pb_file): if pb_file == None: pb_file = DEFAULT_PB return_elements = ["input/input_data:0", "pred_sbbox/concat_2:0", "pred_mbbox/concat_2:0", "pred_lbbox/concat_2:0"] num_classes = 66 input_size = 608 graph = tf.Graph() original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements) with tf.compat.v1.Session(graph=graph) as sess: pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={ return_tensors[0]: image_data}) pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') return bboxes, original_image
def predict(self, input_rgb,input_lwir): org_image_rgb = np.copy(input_rgb) org_image_lwir = np.copy(input_lwir) org_h, org_w, _ = org_image_rgb.shape img_rgb,img_lwir = utils.image_preporcess(org_image_rgb,org_image_lwir, [self.input_size, self.input_size]) img_rgb = img_rgb[np.newaxis, ...] img_lwir = img_lwir[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = self.sess.run( [self.pred_sbbox, self.pred_mbbox, self.pred_lbbox], feed_dict={ self.input_rgb: img_rgb, self.input_lwir: img_lwir, self.trainable: False } ) pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + self.num_classes)), np.reshape(pred_mbbox, (-1, 5 + self.num_classes)), np.reshape(pred_lbbox, (-1, 5 + self.num_classes))], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, (org_h, org_w), self.input_size, self.score_threshold) bboxes = utils.nms(bboxes, self.iou_threshold) return bboxes
def calculate(self, frame): start = int(round(time.time() * 1000)) # Image to be processed original_image = frame # Read class names class_names = {} with open(cfg.YOLO.CLASSES, 'r') as data: for ID, name in enumerate(data): class_names[ID] = name.strip('\n') original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [self.input_size, self.input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) pred_bbox = self.model.predict(image_data) pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, self.input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') # We have our objects detected and boxed, lets move the class name into a list objects_detected = [] for x0, y0, x1, y1, prob, class_id in bboxes: objects_detected.append(class_names[class_id]) # Lets show the user a nice picture - should be erased in production #image = utils.draw_bbox(original_image, bboxes) #image = Image.fromarray(image) #image.show() exec_time = int(round(time.time() * 1000)) - start print(f"Objects Detected: {objects_detected}") return objects_detected, exec_time
def detect_img(fim, start_h, start_w, fpredict_bboxes): img_size = fim.shape[:2] image_data = utils.image_preporcess(np.copy(fim), [input_size, input_size]) image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, img_size, input_size, score_thresh) bboxes = utils.nms(bboxes, iou_type, iou_thresh, method='nms') for i, bbox in enumerate(bboxes): # bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates coor = np.array(bbox[:4], dtype=np.int32) bbox[0] = int(start_w + coor[0]) bbox[1] = int(start_h + coor[1]) bbox[2] = int(start_w + coor[2]) bbox[3] = int(start_h + coor[3]) fpredict_bboxes.append(bbox)
def alone_bbox(image_path, save_path): class_list = ["bench", "roadblock", "babycar", "wheelchair"] return_elements = [ "input/input_data:0", "pred_sbbox/concat_2:0", "pred_mbbox/concat_2:0", "pred_lbbox/concat_2:0" ] input_size = cfg.TEST.INPUT_SIZE graph = tf.Graph() save_file_name = osp.basename(image_path).split('.')[0] + '.txt' original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] with open(osp.join(save_path, save_file_name), 'a') as fp: name_dict_list = [] bboxes_list = [] for newlabel in class_list: pb_file = "./yolov3_{}.pb".format(newlabel) name_dict = utils.read_class_names( "./data/classes/{}.names".format(newlabel)) name_dict_list.append(name_dict) num_classes = len(name_dict) return_tensors = utils.read_pb_return_tensors( graph, pb_file, return_elements) with tf.Session(graph=graph) as sess: pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.9) bboxes = utils.nms(bboxes, 0.90, method='nms') bboxes_list.append(bboxes) o = str(' ' + str(0)) for bbox in bboxes: min_x, min_y, max_x, max_y = [ str(int(bbox[i])) for i in range(4) ] label_name = name_dict[int(bbox[5])] fp.writelines(label_name + o + o + o + ' ' + min_x + ' ' + min_y + ' ' + max_x + ' ' + max_y + o + o + o + o + o + o + o) fp.writelines('\n') # for i,bboxes_ in enumerate(bboxes_list): # image = utils.draw_bbox(original_image, bboxes_, name_dict_list[i]) # image = Image.fromarray(image) # image.show() return bboxes_list, name_dict_list
def predict(image_path): original_image = cv2.imread(image_path) # 读取图片 original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements) with tf.Session(graph=graph) as sess: pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.35) bboxes = utils.nms(bboxes, 0.45, method='nms') image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) image.show() image.save(output_path)
def coco_bbox(image_path, save_path, return_tensorsm, graph): t1 = time.time() # return_elements = ["input/input_data:0", "pred_sbbox/concat_2:0", "pred_mbbox/concat_2:0", "pred_lbbox/concat_2:0"] input_size = cfg.TEST.INPUT_SIZE # graph = tf.Graph() save_file_name = osp.basename(image_path).split('.jp')[0] + '.txt' original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] # pb_file = cfg.TEST.PB_FILE name_dict = utils.read_class_names(cfg.TEST.CLASSES) num_classes = len(name_dict) choose_classes = cfg.TEST.CHOOSE_CLASSES with open(osp.join(save_path, save_file_name), 'w') as fp: # return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements) with tf.compat.v1.Session(graph=graph) as sess: pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) # t2 = time.time() # print("time2: ", t2 - t1) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.45) bboxes = utils.nms(bboxes, 0.35, method='nms') o = str(' ' + str(0)) # with open(osp.join(save_path, save_file_name), 'w') as fp: for bbox in bboxes: min_x, min_y, max_x, max_y = [str(int(bbox[i])) for i in range(4)] score = str(round(bbox[4], 2)) label_name = name_dict[int(bbox[5])] if label_name == 'person': label_name_choose = 'Pedestrian' elif label_name == 'bicycle' or label_name == 'motorbike': label_name_choose = 'Cyclist' elif label_name == 'car' or label_name == 'bus' or label_name == 'truck': label_name_choose = 'Car' else: label_name_choose = 'DontCare' if label_name in choose_classes: fp.writelines(label_name + o + o + o + ' ' + min_x + ' ' + min_y + ' ' + max_x + ' ' + max_y + o + o + o + o + o + o + o + " " + score) fp.writelines('\n') # image = utils.draw_bbox(original_image, bboxes,name_dict) # image = Image.fromarray(image) # image.show() return original_image, bboxes, name_dict
def predict_logo_boxes(image_path, input_size, num_classes, original_image, return_tensors, sess): original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_yolo_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0) post_yolo_boxes = utils.postprocess_boxes(pred_yolo_bbox, original_image_size, input_size, 0.1) post_yolo_boxes = utils.nms(post_yolo_boxes, 0.2, method='nms') post_yolo_boxes = ipm.logo_filter(post_yolo_boxes) ocr_boxes = ipm.text_detection(image_path) # cost_match_matrix refers to the distance pairs between the yolo boxes and ocr boxes cost_match_matrix = ipm.cost_calculating(post_yolo_boxes, ocr_boxes) row_ind, col_ind = linear_sum_assignment(cost_match_matrix) ocr_boxes = [ocr_boxes[t] for t in col_ind] yolo_boxes = [post_yolo_boxes[t] for t in row_ind] distance = cost_match_matrix[row_ind, col_ind] mask = [d < 100 for d in distance] yolo_boxes = np.array(yolo_boxes) ocr_boxes = np.array(ocr_boxes) yolo_boxes = yolo_boxes[mask] yolo_boxes = yolo_boxes.tolist() ocr_boxes = ocr_boxes[mask] ocr_boxes = ocr_boxes.tolist() return ocr_boxes, post_yolo_boxes
def on_run(image): image_data = utils.image_preporcess(np.copy(image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) pred_bbox = model.predict(image_data) # sys.stdout.write(f"[yolo_detect] pred_bbox {pred_bbox}") # sys.stdout.write(f"[yolo_detect] pred_bbox[0].shape {pred_bbox[0].shape}") # sys.stdout.write(f"[yolo_detect] anchors {anchors}") # sys.stdout.flush() if model_name == "yolov3": pred_bbox = utils.postprocess_bbbox(pred_bbox, anchors, STRIDES) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, anchors, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, image.shape[:-1], input_size, conf_threshold) bboxes = utils.nms(bboxes, iou_threshold, method='nms') # bboxes[[xmin, ymin, xmax, ymax, score, class]] # sys.stdout.write(f"[yolov4 detect] bboxes {bboxes}") # sys.stdout.flush() return {'bboxes': np.array(bboxes)}
def process(image_path, targetFolder, sess, return_tensors): head, tail = os.path.split(image_path) localFileName = tail original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, score_threshold) bboxes = utils.nms(bboxes, 0.45, method='nms') image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) exportName = "out_" + localFileName filepath = targetFolder + "/" + exportName image.save(filepath)
def predict(self, images): org_h = [0 for i in range(self.batch_size)] org_w = [0 for i in range(self.batch_size)] for i in range(self.batch_size): org_h[i], org_w[i], _ = images[i].shape image_data = utils.images_preporcess( images, [self.input_size, self.input_size]) start = time.time() pred_sbbox, pred_mbbox, pred_lbbox = self.sess.run( [self.pred_sbbox, self.pred_mbbox, self.pred_lbbox], feed_dict={ self.input_data: image_data, }) end = time.time() print("inference time exclude postprocess is: ", (end - start) * 1000) batch_bboxes = [] for idx in range(self.batch_size): pred_bbox = np.concatenate([ np.reshape(pred_sbbox[idx], (-1, 5 + self.num_classes)), np.reshape(pred_mbbox[idx], (-1, 5 + self.num_classes)), np.reshape(pred_lbbox[idx], (-1, 5 + self.num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, (org_h[idx], org_w[idx]), self.input_size, self.score_threshold) batch_bboxes.append(utils.nms(bboxes, self.iou_threshold)) end_ = time.time() print("inference time include postprocess is: ", (end_ - start) * 1000) return batch_bboxes, (end - start)
def detect_images(model, image_path, box=None, output_path="", id=0, write_file=True, show=False): """Object classification of the given image. Run the yolo model on the given image. With post process including nms. Save the output image to file or show the image if specified. Args: model: The yolo model to be used. image_path: path to the image. box: bounding box coordinates. Should be a list like: [x1, y1, x2, y2]. output_path: path to write the output image. id: index of bounding box for a given frame. show: whether to show the image for display. """ original_image = cv2.imread(image_path) if box: original_image = original_image[box[1]:box[3], box[0]:box[2]] original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data, old_image_size, new_image_size = utils.image_preprocess( np.copy(original_image)) image_data = image_data[np.newaxis, ...].astype(np.float32) # pred_bbox = model.predict(image_data) pred_bbox = model.predict_on_batch(image_data) pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, old_image_size, new_image_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') image = utils.draw_bbox(original_image, bboxes) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if id: i = output_path.rfind('.') output_path = output_path[:i] + '_' + str(id) + output_path[i:] if output_path != '' and write_file: i = output_path.rfind('/') output_directory = output_path[:i] if not os.path.exists(output_directory): os.makedirs(output_directory) cv2.imwrite(output_path, image) if show: # Show the image cv2.imshow("predicted image", image) # Load and hold the image cv2.waitKey(0) # To close the window after the required kill value was provided cv2.destroyAllWindows()
def mul_image(watch_dir="./docs/images", output_path='./output'): imageDir = os.path.abspath(watch_dir) imageList = glob.glob(os.path.join(imageDir, '*.jpg')) # print(imageList) graph = tf.Graph() pb_file = "./yolov3_coco_v3.pb" return_elements = [ "input/input_data:0", "pred_sbbox/concat_2:0", "pred_mbbox/concat_2:0", "pred_lbbox/concat_2:0" ] return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements) with tf.Session(graph=graph) as sess: for item in imageList: image_path = item # print('item',item) end = "/" name = item[item.rfind(end):] # print(name) num_classes = 80 input_size = 608 out = output_path + name original_image = cv2.imread(image_path) # original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.45) # print('bboxes:',bboxes) # bboxes: [[301.13088989 118.44700623 346.95623779 172.39486694 0.97461057 0]...] bboxes = utils.nms(bboxes, 0.45, method='nms') # print('bboxes:',bboxes) # bboxes: [array([105.31238556, 54.51167679, 282.53552246, 147.27146912, 0.99279714, 0. ])] image = utils.draw_bbox(original_image, bboxes) cv2.imwrite(out, image)
def vehicle_detection(image): protocol = ProtocolType.from_str('http') input_name = 'input/input_data' sboxes = 'pred_sbbox/concat_2' mboxes = 'pred_mbbox/concat_2' lboxes = 'pred_lbbox/concat_2' model_name = 'vehicle-detector' ctx = InferContext('localhost:8000', protocol, model_name, -1, False) ori_size = image.shape[:2] image_preprocess = utils.image_preporcess(image, [416, 416]) image_data = [] image_data.append(image_preprocess) result = [] image_idx = 0 request_ids = [] batch_size = 4 last_request = False input_batch = [] while not last_request: input_batch = [] for idx in range(batch_size): input_batch.append(image_data[image_idx]) image_idx = (image_idx + 1) % len(image_data) if image_idx == 0: last_request = True batch_size = len(input_batch) break request_ids.append( ctx.async_run({input_name: input_batch}, { sboxes: (InferContext.ResultFormat.RAW), mboxes: (InferContext.ResultFormat.RAW), lboxes: (InferContext.ResultFormat.RAW) }, batch_size)) raw_results = [] num_classes = 80 # # For async, retrieve results according to the send order for request_id in request_ids: raw_results.append(ctx.get_async_run_results(request_id, True)) for ix in range(len(raw_results)): pred_sbbox, pred_mbbox, pred_lbbox = raw_results[ix][ 'pred_sbbox/concat_2'], raw_results[ix][ 'pred_mbbox/concat_2'], raw_results[ix]['pred_lbbox/concat_2'] pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, ori_size, 416, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') return bboxes
def video_without_saving(): classes = utils.read_class_names(cfg.YOLO.CLASSES) num_classes = len(classes) return_elements = [ "input/input_data:0", "pred_sbbox/concat_2:0", "pred_mbbox/concat_2:0", "pred_lbbox/concat_2:0" ] pb_file = "./yolov3_coco.pb" video_path = "docs/images/racoon.mp4" video_path = 0 input_size = 416 graph = tf.Graph() return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements) with tf.Session(graph=graph) as sess: vid = cv2.VideoCapture(video_path) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image!") frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] prev_time = time.time() pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') image = utils.draw_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break
def gen(): for i in range(5): count = 0 cap = cv2.VideoCapture("full_video.mp4") start_frame_number = 1000 cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame_number) while True: cap.set(cv2.CAP_PROP_POS_MSEC, (count * 125)) ret, frame = cap.read() count += 1 if not ret: break frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] image_data = image_data.astype(np.float32) prev_time = time.time() request.inputs['input'].CopyFrom( tf.contrib.util.make_tensor_proto( image_data, shape=[1, input_size, input_size, 3])) result_future = stub.Predict.future(request, 10.25) pred_sbbox = np.asarray( result_future.result().outputs['pred_sbbox'].float_val) pred_mbbox = np.asarray( result_future.result().outputs['pred_mbbox'].float_val) pred_lbbox = np.asarray( result_future.result().outputs['pred_lbbox'].float_val) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') image = utils.draw_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time print(exec_time) result = np.asarray(image) frame = cv2.cvtColor(result, cv2.COLOR_RGB2BGR) #result = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #format should be bgr ret, jpeg = cv2.imencode('.jpg', frame) frame = jpeg.tobytes() yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n')
def show(pred_bbox): pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') image = utils.draw_bbox(original_image, bboxes, show_label=True) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow('sample', image) cv2.waitKey(0)
def detection(vid): with tf.Session(graph=graph) as sess: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image!") frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] prev_time = time.time() pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') image, detected = utils.draw_bbox(frame, bboxes) detected = np.asarray(detected) # print("------- frame i ---------") class_count = [] for i in range(len(obj_classes)): # 80 obj_count = 0 for j in range(len(detected)): if int(detected[j][5]) == i: obj_count += 1 class_count = np.append(class_count, obj_count) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) # cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) return result, class_count
def get_object(video_url, threshold=0.45): input_layer = tf.keras.layers.Input([input_size, input_size, 3]) feature_maps = YOLOv3(input_layer) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, "yolov3_union_10000.weights") model.summary() vid = cv2.VideoCapture(video_url) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) else: return "No image" frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() pred_bbox = model.predict_on_batch(image_data) curr_time = time.time() exec_time = curr_time - prev_time pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3) bboxes = utils.nms(bboxes, threshold, method='nms') image = utils.draw_bbox(frame, bboxes) result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) cv2.putText(result, text=info, org=(50, 70), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 0, 0), thickness=2) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) retval, buffer = cv2.imencode(".jpeg", image) yield ((b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + buffer.tobytes() + b'\r\n'))
def export_yolo_video(video_path, output_path): vid = cv2.VideoCapture(video_path) out = cv2.VideoWriter( str(Path(output_path, Path(video_path).stem + '_output.mp4')), cv2.VideoWriter_fourcc(*'mp4v'), float(vid.get(cv2.CAP_PROP_FPS)), (round(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))), ) outputs = [] print(Path(output_path, Path(video_path).stem + '_output.mp4')) print(Path(output_path, Path(video_path).stem + '_output.npz')) max_frames = vid.get(cv2.CAP_PROP_FRAME_COUNT) with tf.Session(graph=graph) as sess: for _ in trange(int(max_frames)): return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) else: raise ValueError("No image!") frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') outputs.append(bboxes) image = utils.draw_bbox(frame, bboxes) result = np.asarray(image) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) out.write(result) vid.release() out.release() np.savez(str(Path(output_path, Path(video_path).stem + '_output.npz')), outputs)
def do_video(self): vid = cv2.VideoCapture(self.video_path) while True: # frame 是 RGB 颜色空间 return_value, frame = vid.read() if return_value: # utils.image_preporcess 这个方法里面有 cv2.COLOR_BGR2RGB 方法 # 如果自己写的模型,可以调一下,也许不需要这里 frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) pass else: raise ValueError("No image!") pass frame_size = frame.shape[:2] # 之前训练的时候,转了一次颜色空间 image_data = utils.image_preporcess( np.copy(frame), [self.input_size, self.input_size]) image_data = image_data[np.newaxis, ...] pred_start_time = datetime.now() pred_sbbox, pred_mbbox, pred_lbbox = self.sess.run( [ self.return_tensors[1], self.return_tensors[2], self.return_tensors[3] ], feed_dict={self.return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + self.class_name_len)), np.reshape(pred_mbbox, (-1, 5 + self.class_name_len)), np.reshape(pred_lbbox, (-1, 5 + self.class_name_len)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, self.input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') image = utils.draw_bbox(frame, bboxes) pred_end_time = datetime.now() print("一帧耗时: {}".format(pred_end_time - pred_start_time)) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) # 退出按键 if cv2.waitKey(1) & 0xFF == ord('q'): break pass pass
def detect(image_path): #original_image=Image.open(BytesIO(original_image)).convert("RGBA") original_image = cv2.imread( image_path ) #you can and should replace this line to receive the image directly (not from a file) #original_image = base64.b64decode(dec_image) # Read class names class_names = {} with open(cfg.YOLO.CLASSES, 'r') as data: for ID, name in enumerate(data): class_names[ID] = name.strip('\n') # Setup tensorflow, keras and YOLOv3 input_size = 416 input_layer = tf.keras.layers.Input([input_size, input_size, 3]) feature_maps = YOLOv3(input_layer) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) load_weights(model, "./yolov3.weights") pred_bbox = model.predict(image_data) pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') # We have our objects detected and boxed, lets move the class name into a list objects_detected = [] for x0, y0, x1, y1, prob, class_id in bboxes: objects_detected.append(class_names[class_id]) # Lets show the user a nice picture - should be erased in production #image = utils.draw_bbox(original_image, bboxes) #image = Image.fromarray(image) #image.show() return objects_detected