def detect_image(self, image): if self.model_image_size != (None, None): assert self.model_image_size[ 0] % 32 == 0, 'Multiples of 32 required' assert self.model_image_size[ 1] % 32 == 0, 'Multiples of 32 required' boxed_image = letterbox_image( image, tuple(reversed(self.model_image_size))) else: new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension.' out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], #K.learning_phase(): False }) # print('Found {} boxes for {}'.format(len(out_boxes), 'img')) return out_boxes, out_scores, out_classes
def process_image(img_path, input_shape): """ 将输入的图片处理成模型的标准输入形状 """ image = Image.open(img_path) image_shape = (image.width, image.height) # 改变image的形状 if input_shape != (None, None): assert input_shape[0]%32 == 0, 'Multiples of 32 required' assert input_shape[1]%32 == 0, 'Multiples of 32 required' boxed_image = letterbox_image(image, tuple(reversed(input_shape))) else: new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') # 归一化 image_data /= 255. # Add batch dimension. (w, h, 3) -> (m, w, h, 3) image_data = np.expand_dims(image_data, 0) print(image_data.shape) return image_data, image_shape
def get_classification(self, im): start = timer() image = Image.fromarray(im[..., ::-1]) if self.model_image_size != (None, None): assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required' assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required' boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size))) else: new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) max_class = None max_score = 0.0 if out_scores.size > 0: max_score_idx = out_scores.argmax() max_class = out_classes[max_score_idx] max_score = out_scores[max_score_idx] # print(max_class, max_score, max_score_idx) end = timer() # print('Found {} boxes for {}'.format(len(out_boxes), 'img')) # for i, score in enumerate(out_scores): # print(i, out_classes[i], score) if max_class is not None: predicted_class = self.class_names[max_class] dt = end - start # print("Found traffic light: {light:%s score:%.3f dt:%.3f}"%(predicted_class, max_score, dt)) rtn = TrafficLight.UNKNOWN if max_class == 0: rtn = TrafficLight.RED elif max_class == 1: rtn = TrafficLight.YELLOW elif max_class == 2: rtn = TrafficLight.GREEN self.last_pred = rtn self.image_count += 1 return rtn, max_score
def prepare_infer_input(img_path, input_size, transform): t = time.time() img = cv2.imread(img_path) # print('read file time:', time.time() - t) # keep ratio resize to input size new_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) t = time.time() new_img, scale, shift = letterbox_image(new_img, input_size) # print('letterbox_image time:', time.time() - t) # t = time.time() # model_input = torch.Tensor(new_img).cuda() # print('image to cuda time:', time.time() - t) # mean = torch.Tensor(mean).cuda() # std = torch.Tensor(std).cuda() t = time.time() # model_input = (model_input / 255.0 - mean) / std # model_input = model_input.permute([2, 0, 1]).unsqueeze(0) model_input = transform(new_img).unsqueeze(0) # print('transform time:', time.time() - t) return img, model_input, scale, shift
def table_line(img, size=(512, 512), hprob=0.5, vprob=0.5, row=50, col=30, alph=15): sizew, sizeh = size inputBlob, fx, fy = letterbox_image(img[..., ::-1], (sizew, sizeh)) with Timer('predict table lines'): pred = model.predict(np.array([np.array(inputBlob) / 255.0])) pred = pred[0] vpred = pred[..., 1] > vprob # 竖线 boolean hpred = pred[..., 0] > hprob # 横线 boolean vpred = vpred.astype(int) hpred = hpred.astype(int) colboxes = get_table_line(vpred, axis=1, lineW=col) rowboxes = get_table_line(hpred, axis=0, lineW=row) ccolbox = [] crowlbox = [] if len(rowboxes) > 0: rowboxes = np.array(rowboxes) rowboxes[:, [0, 2]] = rowboxes[:, [0, 2]] / fx rowboxes[:, [1, 3]] = rowboxes[:, [1, 3]] / fy xmin = rowboxes[:, [0, 2]].min() xmax = rowboxes[:, [0, 2]].max() ymin = rowboxes[:, [1, 3]].min() ymax = rowboxes[:, [1, 3]].max() ccolbox = [[xmin, ymin, xmin, ymax], [xmax, ymin, xmax, ymax]] rowboxes = rowboxes.tolist() if len(colboxes) > 0: colboxes = np.array(colboxes) colboxes[:, [0, 2]] = colboxes[:, [0, 2]] / fx colboxes[:, [1, 3]] = colboxes[:, [1, 3]] / fy xmin = colboxes[:, [0, 2]].min() xmax = colboxes[:, [0, 2]].max() ymin = colboxes[:, [1, 3]].min() ymax = colboxes[:, [1, 3]].max() colboxes = colboxes.tolist() crowlbox = [[xmin, ymin, xmax, ymin], [xmin, ymax, xmax, ymax]] rowboxes += crowlbox colboxes += ccolbox # rboxes_row_, rboxes_col_ = adjust_lines(rowboxes, colboxes, alph=alph) rowboxes += rboxes_row_ colboxes += rboxes_col_ nrow = len(rowboxes) ncol = len(colboxes) for i in range(nrow): for j in range(ncol): rowboxes[i] = line_to_line(rowboxes[i], colboxes[j], 10) colboxes[j] = line_to_line(colboxes[j], rowboxes[i], 10) return rowboxes, colboxes
def detect_image(self, image): print('get detect_image--------------------') if self.model_image_size != (None, None): assert self.model_image_size[ 0] % 32 == 0, 'Multiples of 32 required' assert self.model_image_size[ 1] % 32 == 0, 'Multiples of 32 required' boxed_image = letterbox_image( image, tuple(reversed(self.model_image_size))) else: new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') print(image_data.shape) image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. model_input = self.graph.get_tensor_by_name("input_1:0") out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ model_input: image_data, self.input_image_shape: [image.size[1], image.size[0]] }) results = [] for i, c in reversed(list(enumerate(out_classes))): prdicted_class = self.class_names[c] box = out_boxes[i] score = out_scores[i] result = [] result.append(prdicted_class) result.append(score) box[1] = max(0, np.floor(box[1] + 0.5).astype('int32')) box[0] = max(0, np.floor(box[0] + 0.5).astype('int32')) box[3] = min(image.size[0], np.floor(box[3] + 0.5).astype('int32')) box[2] = min(image.size[1], np.floor(box[2] + 0.5).astype('int32')) result.append(box) results.append(result) return results
def detect_image(weights, image_url, img_size, conf_thres, iou_thres): start_time = time.time() #image = cv2.imread(image_url) image = Image.open(image_url) original_size = image.size[:2] size = (img_size, img_size) image_resized = letterbox_image(image, size) img = np.asarray(image) #image = ImageOps.fit(image, size, Image.ANTIALIAS) image_array = np.asarray(image_resized) normalized_image_array = image_array.astype(np.float32) / 255.0 yolov5_tflite_obj = yolov5_tflite(weights, img_size, conf_thres, iou_thres) result_boxes, result_scores, result_class_names = yolov5_tflite_obj.detect( normalized_image_array) if len(result_boxes) > 0: result_boxes = scale_coords(size, np.array(result_boxes), (original_size[1], original_size[0])) font = cv2.FONT_HERSHEY_SIMPLEX # org org = (20, 40) # fontScale fontScale = 0.5 # Blue color in BGR color = (0, 255, 0) # Line thickness of 1 px thickness = 1 for i, r in enumerate(result_boxes): org = (int(r[0]), int(r[1])) cv2.rectangle(img, (int(r[0]), int(r[1])), (int(r[2]), int(r[3])), (255, 0, 0), 1) cv2.putText( img, str(int(100 * result_scores[i])) + '% ' + str(result_class_names[i]), org, font, fontScale, color, thickness, cv2.LINE_AA) save_result_filepath = image_url.split('/')[-1].split( '.')[0] + 'yolov5_output.jpg' cv2.imwrite(save_result_filepath, img[:, :, ::-1]) end_time = time.time() print('FPS:', 1 / (end_time - start_time)) print('Total Time Taken:', end_time - start_time)
def detect(image_path, model_path, yolo_weights=None): """ Introduction ------------ 加载模型,进行预测 Parameters ---------- model_path: 模型路径 image_path: 图片路径 """ image = Image.open(image_path) resize_image = letterbox_image(image, (416, 416)) image_data = np.array(resize_image, dtype=np.float32) image_data /= 255. image_data = np.expand_dims(image_data, axis=0) pb_graph = tf.Graph() with pb_graph.as_default(): input_image_shape = tf.placeholder(dtype=tf.int32, shape=(2, ), name="pred_im_shape") input_image = tf.placeholder(shape=[None, 416, 416, 3], dtype=tf.float32, name='pred_input_img') predictor = yolo_predictor(config.obj_threshold, config.nms_iou_threshold, config.classes_path, config.anchors_path) boxes, scores, classes = predictor.predict(input_image, input_image_shape) print(input_image_shape) print(input_image) print(boxes) print(scores) print(classes) with tf.Session(graph=pb_graph) as sess: saver = tf.train.Saver() saver.restore(sess, model_path) out_boxes, out_scores, out_classes = sess.run( [boxes, scores, classes], feed_dict={ input_image: image_data, input_image_shape: [image.size[1], image.size[0]] }) graph_def = tf.get_default_graph().as_graph_def() out_put_name_list = [ 'predict/pred_boxes', 'predict/pred_scores', 'predict/pred_classes' ] out_put_grah_def = tf.graph_util.convert_variables_to_constants( sess, graph_def, out_put_name_list) pb_file_path = 'F:\\github_working\\version_2_190114\\alsochen-tensorflow-yolo3-threeoutput\\tensorflow-yolo3\\pb_file\\model.pb' with tf.gfile.GFile(pb_file_path, 'wb') as f: f.write(out_put_grah_def.SerializeToString()) print("pb save done") print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
def table_detect(img, sc=(416, 416), thresh=0.5, NMSthresh=0.3): """ 表格检测 img:GBR """ scale = sc[0] img_height, img_width = img.shape[:2] inputBlob, fx, fy = letterbox_image(img[..., ::-1], (scale, scale)) inputBlob = cv2.dnn.blobFromImage(inputBlob, scalefactor=1.0, size=(scale, scale), swapRB=True, crop=False) tableDetectNet.setInput(inputBlob / 255.0) outputName = tableDetectNet.getUnconnectedOutLayersNames() outputs = tableDetectNet.forward(outputName) class_ids = [] confidences = [] boxes = [] for output in outputs: for detection in output: scores = detection[5:] class_id = np.argmax(scores) confidence = scores[class_id] if confidence > thresh: center_x = int(detection[0] * scale / fx) center_y = int(detection[1] * scale / fy) width = int(detection[2] * scale / fx) height = int(detection[3] * scale / fy) left = int(center_x - width / 2) top = int(center_y - height / 2) if class_id == 1: class_ids.append(class_id) confidences.append(float(confidence)) xmin, ymin, xmax, ymax = left, top, left + width, top + height xmin = max(xmin, 1) ymin = max(ymin, 1) xmax = min(xmax, img_width - 1) ymax = min(ymax, img_height - 1) boxes.append([xmin, ymin, xmax, ymax]) boxes = np.array(boxes) confidences = np.array(confidences) if len(boxes) > 0: boxes, confidences = nms_box(boxes, confidences, score_threshold=thresh, nms_threshold=NMSthresh) boxes, adBoxes = fix_table_box_for_table_line(boxes, confidences, img) return boxes, adBoxes, confidences
def forward(self, bottom, top): pic_name = self.fp.readline() if pic_name == "": self.fp.seek(0) pic_name = self.fp.readline() pic_name = pic_name.strip('\n') print(pic_name) img = cv2.imread(pic_name) transformed_image = letterbox_image(img, self.input_hw, self.input_hw) # Reshape net's input blobs top[0].reshape(1, 3, self.input_hw, self.input_hw) # Copy data into net's input blobs top[0].data[...] = transformed_image
def eval_neg(model_path, neg_path, yolo_weights=None): """ Introduction ------------ 计算模型在negtive datasets验证集上的MAP, 用于评价模型 """ input_image_shape = tf.placeholder(dtype=tf.int32, shape=(2, )) input_image = tf.placeholder(shape=[None, 416, 416, 3], dtype=tf.float32) predictor = yolo_predictor(config.obj_threshold, config.nms_threshold, config.classes_path, config.anchors_path) boxes, scores, classes = predictor.predict(input_image, input_image_shape) image_files = os.listdir(neg_path) tp_nums = 0 all_nums = len(image_files) with tf.Session() as sess: if yolo_weights is not None: with tf.variable_scope('predict'): boxes, scores, classes = predictor.predict( input_image, input_image_shape) load_op = load_weights(tf.global_variables(scope='predict'), weights_file=yolo_weights) sess.run(load_op) else: saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(model_path) saver.restore(sess, ckpt.model_checkpoint_path) # saver.restore(sess, model_path) for image_file in image_files: image = Image.open(neg_path + image_file) resize_image = letterbox_image(image, (416, 416)) image_data = np.array(resize_image, dtype=np.float32) image_data /= 255. image_data = np.expand_dims(image_data, axis=0) out_boxes, out_scores, out_classes = sess.run( [boxes, scores, classes], feed_dict={ input_image: image_data, input_image_shape: [image.size[1], image.size[0]] }) print(image_file) print(out_classes) print(out_scores) if len(out_classes) == 0: tp_nums += 1 print(tp_nums / all_nums)
def __getitem__(self, index): data_anno = self.data_list[index] img_path = data_anno['img_path'] img_path = os.path.join(self.root_dir, img_path) bboxes = data_anno['bboxes'] cls_ids = data_anno['cls_ids'] img = cv2.imread(img_path) if img is None: print("Error: read %s fail" % img_path) exit() img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # keep ratio resize to input size img, scale, shift = letterbox_image(img, self.size) bboxes = np.array(bboxes, dtype=np.float) bboxes[:, :4] = bboxes[:, :4] * scale bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + shift[0] bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + shift[1] # augument if self.phase == 'train': transformed = self.aug(image=img, bboxes=bboxes, cls_ids=cls_ids) img = Image.fromarray(transformed['image']) bboxes = np.array(transformed['bboxes']) cls_ids = np.array(transformed['cls_ids']) obj_num = len(bboxes) if obj_num == 0: print('obj_num == 0') bboxes = np.zeros((MAX_OBJ_NUM, 4), dtype=float) cls_ids = np.zeros(MAX_OBJ_NUM, dtype=int) elif obj_num < MAX_OBJ_NUM: bboxes = np.pad(bboxes, ((0, MAX_OBJ_NUM - obj_num), (0, 0))) cls_ids = np.pad(cls_ids, (0, MAX_OBJ_NUM - obj_num)) scale_shift = torch.Tensor([scale] + shift) # to tensor and normalize img = self.to_tensor(img) targets = { 'bboxes': bboxes, 'cls': cls_ids, 'obj_num': obj_num, 'transform': scale_shift, 'img_path': img_path } return img, targets
def predict(self, image_path): img = cv2.imread(image_path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) new_img = letterbox_image(img, self.nw, self.nh) self.yolo.blobs['data'].reshape(1, 3, self.nw, self.nh) self.yolo.blobs['data'].data[...] = new_img out_feats = self.yolo.forward() layer82_conv = out_feats['layer82-conv'] layer94_conv = out_feats['layer94-conv'] layer106_conv = out_feats['layer106-conv'] batch_out = {} feat = [layer82_conv[0], layer94_conv[0], layer106_conv[0]] output = self.yolo_out(feat, img.shape) if not output: batch_out[0] = [] else: batch_out[0] = output return batch_out
def ret_frame(cls, image): # Generate colors for drawing bounding boxes. hsv_tuples = [(x / 10, 1., 1.) for x in range(10)] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) np.random.seed(10101) # Fixed seed for consistent colors across runs. np.random.shuffle( colors) # Shuffle colors to decorrelate adjacent classes. np.random.seed(None) # Reset seed to default. start = timer() model_image_size = (608, 608) class_names = cls._get_class() new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. image_shape = [image.size[1], image.size[0]] out_boxes, out_scores, out_classes = cls.compute_output( image_data, image_shape) Car_result_ALL = [] Pedestrian_result_ALL = [] all_result = [] font = ImageFont.truetype(font='../box_font/FiraMono-Medium.otf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = (image.size[0] + image.size[1]) // 300 for i, c in reversed(list(enumerate(out_classes))): predicted_class = class_names[c] box = out_boxes[i] score = out_scores[i] label = '{}_{:.2f}_{}'.format( predicted_class, score, str(cls.IDvalue)) #put the ID for each obj draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) #JSON 形式の時はint32()未対応のため -> int()に変換する top = int(top) left = int(left) bottom = int(bottom) right = int(right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) #1 予測結果より次のFrameの物体位置を予測 #nxt_result_txt = ' {},{},{},{},{}'.format(left, top, right, bottom, c) #2 検出したbox_sizeを計算する 設定した閾値1024pix**2 sq_bdbox = (bottom - top) * (right - left) if sq_bdbox >= 1024: #矩形サイズの閾値 if predicted_class == 'Car' or predicted_class == 'Pedestrian': # Car or Pedes # My kingdom for a good redistributable image drawing library. for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=colors[c]) draw.text(text_origin, label, fill=(0, 0, 0), font=font) del draw end = timer() print("1フレームの処理時間 = ", end - start) return image
def detect(image, yolo_weights = config.yolo3_weights_path,image_size=(416,416)): """ Introduction ------------ 加载模型,进行预测 Parameters ---------- model_path: 模型路径 image_path: 图片路径 """ image = Image.open(image) if image_size != (None, None): assert image_size[0] % 32 == 0, 'Multiples of 32 required' assert image_size[1] % 32 == 0, 'Multiples of 32 required' resize_image = letterbox_image(image, tuple(reversed(image_size))) else: new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) resize_image = letterbox_image(image, new_image_size) image_data = np.array(resize_image, dtype = 'float32') image_data /= 255. image_data = np.expand_dims(image_data, axis = 0) print(image_data.shape) input_image_shape = tf.placeholder(dtype = tf.int32, shape = (2,)) input_image = tf.placeholder(shape = [None, 416, 416, 3], dtype = tf.float32) predictor = yolo_predictor(config.obj_threshold, config.nms_threshold, config.classes_path, config.anchors_path) boxes, scores, classes = predictor.predict(input_image, input_image_shape) with tf.Session() as sess: if yolo_weights is not None: print("yes") with tf.variable_scope('predict'): boxes, scores, classes = predictor.predict(input_image, input_image_shape) load_op = load_weights(tf.global_variables(scope = 'predict'), weights_file = yolo_weights) sess.run(load_op) else: saver = tf.train.Saver() saver.restore(sess, config.yolo3_weights_path) out_boxes, out_scores, out_classes = sess.run( [boxes, scores, classes], feed_dict={ input_image: image_data, input_image_shape: [image.size[1], image.size[0]] }) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) font = ImageFont.truetype(font = 'font/FiraMono-Medium.otf', size = np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = (image.size[0] + image.size[1]) // 300 for i, c in reversed(list(enumerate(out_classes))): predicted_class = predictor.class_names[c] box = out_boxes[i] score = out_scores[i] label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) print(label, (left, top), (right, bottom)) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) # My kingdom for a good redistributable image drawing library. for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline = predictor.colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill = predictor.colors[c]) draw.text(text_origin, label, fill=(0, 0, 0), font=font) del draw result = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR) result = np.asarray(result) cv2.imwrite("./output.png", result)
def detect_image(cls, image, frame_num, all_posinf, old_posinf): start = timer() model_image_size = (608, 608) class_names = cls._get_class() new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. image_shape = [image.size[1], image.size[0]] out_boxes, out_scores, out_classes = cls.compute_output(image_data, image_shape) Car_result_ALL = [] Pedestrian_result_ALL = [] all_result = [] # フレーム単位の処理 if frame_num > 1: old_posinf.clear() old_posinf = copy.copy(all_posinf) all_posinf.clear() #オブジェクト単位の処理 for i, c in reversed(list(enumerate(out_classes))): predicted_class = class_names[c] box = out_boxes[i] score = out_scores[i] label = '{} {:.2f}'.format(predicted_class, score) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) #JSON 形式の時はint32()未対応のため -> int()に変換する top = int(top) left = int(left) bottom = int(bottom) right = int(right) #2 検出したbox_sizeを計算する 設定した閾値1024pix**2 sq_bdbox = (bottom - top)*(right - left) #3 検出したboxの中心点の座標を計算する center_bdboxX = int((bottom - top)/2) + top center_bdboxY = int((right - left)/2) + left if sq_bdbox >= 1024:#矩形サイズの閾値 if predicted_class == 'Car': ObjID_set = 0 if frame_num == 1:#1フレーム目は全て登録する cls.IDvalue_car = cls.IDvalue_car + 1 #車を検出した時 ObjID_set = cls.IDvalue_car Car_result = {'id': ObjID_set, 'box2d': [left,top,right,bottom]}#予測結果 #予測結果より次のFrameの物体位置を予測する情報を作成 tmp_car = {'frame':frame_num,'id':ObjID_set, 'left':left, 'top':top, 'right':right, 'bottom':bottom} all_posinf.append(tmp_car) else: #current_pos check cls.matches_cnt = 0 for kt in range(len(old_posinf)): tmp_old_pos = old_posinf[kt] tmp_ObjID = 0 tmp_left = 0 tmp_top = 0 tmp_right = 0 tmp_bottom = 0 for k, v in tmp_old_pos.items(): # k= Tanaka v= 80 // Tanaka: 80 if k == "id": print("Key = ", k) print("Value = ",v) tmp_ObjID = v elif k == "left": print("Key = ", k) print("Value = ",v) tmp_left = v elif k == "top": print("Key = ", k) print("Value = ",v) tmp_top = v elif k == "right": print("Key = ", k) print("Value = ",v) tmp_right = v elif k == "bottom": print("Key = ", k) print("Value = ",v) tmp_bottom = v if (tmp_left <= center_bdboxX <= tmp_right) and (tmp_top <= center_bdboxY <= tmp_bottom): ObjID_set = tmp_ObjID cls.matches_cnt = cls.matches_cnt + 1 #該当する #else: #もしどのIDにも当てはまらない場合 if cls.matches_cnt == 0: cls.IDvalue_car = cls.IDvalue_car + 1 ObjID_set = cls.IDvalue_car #else: #ObjID_set = tmp_ObjID #更新したObjIDを登録する tmp_car = {'frame':frame_num,'id':ObjID_set, 'left':left, 'top':top, 'right':right, 'bottom':bottom} all_posinf.append(tmp_car) #車を検出した時 Car_result = {'id': ObjID_set, 'box2d': [left,top,right,bottom]}#予測結果 #検出したオブジェクトを格納 検出しない場合は空欄が格納される Car_result_ALL.append(Car_result)#車 elif predicted_class == 'Pedestrian': cls.IDvalue_ped = cls.IDvalue_ped + 1 #歩行者を検出した時 Pedestrian_result = {'id': int(cls.IDvalue_ped), 'box2d': [left,top,right,bottom]}#予測結果 #予測結果より次のFrameの物体位置を予測する情報を作成 tmp_ped = {'frame':frame_num,'id':int(cls.IDvalue_ped), 'left':left, 'top':top, 'right':right, 'bottom':bottom} cls.all_ObjectID_pos.append(tmp_ped) #検出したオブジェクトを格納 検出しない場合は空欄が格納される Pedestrian_result_ALL.append(Pedestrian_result)#歩行者 all_result = {'Car': Car_result_ALL, 'Pedestrian': Pedestrian_result_ALL} end = timer() print("1フレームの処理時間 = ", end - start) return all_result
def detect_image(self, image): start = timer() # 将图片大小缩放到32的倍数,便于在卷积神经网络中进行运算 if self.model_image_size != (None, None): assert self.model_image_size[ 0] % 32 == 0, 'Multiples of 32 required' assert self.model_image_size[ 1] % 32 == 0, 'Multiples of 32 required' boxed_image = letterbox_image( image, tuple(reversed(self.model_image_size))) else: new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') print(image_data.shape) # 归一化 image_data /= 255. # Add batch dimension. (w, h, 3) -> (m, w, h, 3) image_data = np.expand_dims(image_data, 0) out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) font = ImageFont.truetype(font='font/FiraMono-Medium.otf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = (image.size[0] + image.size[1]) // 300 for i, c in reversed(list(enumerate(out_classes))): predicted_class = self.class_names[c] box = out_boxes[i] score = out_scores[i] label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) print(label, (left, top), (right, bottom)) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) # My kingdom for a good redistributable image drawing library. for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, label, fill=(0, 0, 0), font=font) del draw end = timer() print(end - start) return image
def ret_frame(cls, image, cv2image, frame_num): # Generate colors for drawing bounding boxes. hsv_tuples = [(x / 10, 1., 1.) for x in range(10)] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) np.random.seed(10101) # Fixed seed for consistent colors across runs. np.random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. np.random.seed(None) # Reset seed to default. start = timer() model_image_size = (608, 608) class_names = cls._get_class() new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. image_shape = [image.size[1], image.size[0]] out_boxes, out_scores, out_classes = cls.compute_output(image_data, image_shape) font = ImageFont.truetype(font='../box_font/FiraMono-Medium.otf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = (image.size[0] + image.size[1]) // 300 # Check new object or not # cls.all_ObjectID_pos.append(tmp_ped) # tmp_ped = {'frame':frame_num,'id':int(cls.IDvalue), 'left':left, 'top':top, 'right':right, 'bottom':bottom} #print("len(cls.all_ObjectID_pos) = ", len(cls.all_ObjectID_pos))# #for kt in range(len(cls.all_ObjectID_pos)): # tmp_current_pos = cls.all_ObjectID_pos[kt]#ObjectID毎に抽出 # # for k, v in tmp_current_pos.items(): # # k= Tanaka v= 80 // Tanaka: 80 # if k == "frame": # print("Key = ", k) # print("Value = ",v) # # #if (frame_num > 1) and ((frame_num-1) == k):#一つ前のフレームNoの時 # # id: # # if # if k == "id": # print("Key = ", k) # print("Value = ",v) for i, c in reversed(list(enumerate(out_classes))): predicted_class = class_names[c] box = out_boxes[i] score = out_scores[i] top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) #JSON 形式の時はint32()未対応のため -> int()に変換する top = int(top) left = int(left) bottom = int(bottom) right = int(right) #2 検出したbox_sizeを計算する 設定した閾値1024pix**2 sq_bdbox = (bottom - top)*(right - left) if sq_bdbox >= 1024:#矩形サイズの閾値 if predicted_class == 'Car'or predicted_class == 'Pedestrian':# Car or Pedes cls.IDvalue = cls.IDvalue + 1 label = '{}_{:.2f}_{}'.format(predicted_class, score, str(cls.IDvalue))#put the ID for each obj draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) # My kingdom for a good redistributable image drawing library. for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=colors[c]) draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=colors[c]) draw.text(text_origin, label, fill=(0, 0, 0), font=font) del draw end = timer() print("1フレームの処理時間 = ", end - start) return image
def ret_frame(cls, image, frame_num): hsv_tuples = [(x / 10, 1., 1.) for x in range(10)] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) np.random.seed(10101) # Fixed seed for consistent colors across runs. np.random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. np.random.seed(None) # Reset seed to default. start = timer() model_image_size = (608, 608) class_names = cls._get_class() new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. image_shape = [image.size[1], image.size[0]] out_boxes, out_scores, out_classes = cls.compute_output(image_data, image_shape) font = ImageFont.truetype(font='../box_font/FiraMono-Medium.otf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = (image.size[0] + image.size[1]) // 300 # フレーム単位の処理 if frame_num > 1: cls.all_ObjectID_oldpos = copy.copy(cls.all_ObjectID_pos) #cls.all_ObjectID_pos.clear() cls.all_ObjectID_pos = [] # オブジェクト単位の処理 for i, c in reversed(list(enumerate(out_classes))): predicted_class = class_names[c] box = out_boxes[i] score = out_scores[i] #JSON 形式の時はint32()未対応のため -> int()に変換する top, left, bottom, right = box top = int(max(0, np.floor(top + 0.5).astype('int32'))) left = int(max(0, np.floor(left + 0.5).astype('int32'))) bottom = int(min(image.size[1], np.floor(bottom + 0.5).astype('int32'))) right = int(min(image.size[0], np.floor(right + 0.5).astype('int32'))) #2 検出したbox_sizeを計算する 設定した閾値1024pix**2 sq_bdbox = (bottom - top)*(right - left) cls.ObjID_setimg = 0 cls.matches_cnt = 0 print("len(cls.all_ObjectID_oldpos) = ", len(cls.all_ObjectID_oldpos)) if sq_bdbox >= 1024:#矩形サイズの閾値 1024 #if predicted_class == 'Car'or predicted_class == 'Pedestrian':# Car or Pedes if predicted_class == 'Car': #print("cls.all_ObjectID_oldpos = ", cls.all_ObjectID_oldpos) #3 検出したboxの中心点座標を計算する center_bdboxX = int((bottom - top)/2) + top center_bdboxY = int((right - left)/2) + left if frame_num == 1:#1フレーム目は全て登録する cls.IDvalue_car = cls.IDvalue_car + 1 cls.ObjID_setimg = cls.IDvalue_car else: for kt in range(len(cls.all_ObjectID_oldpos)): tmp_old_pos = cls.all_ObjectID_oldpos[kt] #print("tmp_old_pos = ", tmp_old_pos) cls.ObjID_setimg = cls.getValue('id', tmp_old_pos) cls.old_left = cls.getValue('left', tmp_old_pos) cls.old_top = cls.getValue('top', tmp_old_pos) cls.old_right = cls.getValue('right', tmp_old_pos) cls.old_bottom = cls.getValue('bottom', tmp_old_pos) #print("ObjID_setimg = ", cls.ObjID_setimg) #print("old_left = ", cls.old_left) #print("old_top = ",cls.old_top) #print("old_right = ",cls.old_right) #print("old_bottom = ",cls.old_bottom) band_value = 15 exp_old_left = int(cls.old_left - band_value) exp_old_top = int(cls.old_top - band_value) exp_old_right = int(cls.old_right + band_value) exp_old_bottom = int(cls.old_bottom + band_value) #print("center_bdboxX = ", center_bdboxX) #print("center_bdboxY = ", center_bdboxY) #print("exp_old_left = ", exp_old_left) #print("exp_old_top = ", exp_old_top) #print("exp_old_right = ",exp_old_right) #print("exp_old_bottom = ",exp_old_bottom) if(( center_bdboxX >= exp_old_left ) or ( center_bdboxX <= exp_old_right )): if(( center_bdboxY >= exp_old_top) or ( center_bdboxY <= exp_old_bottom )): cls.matches_cnt = cls.matches_cnt + 1 #前回フレームより過去のオブジェクトを全てチェックした結果を出力 print("cls.matches_cnt = ", cls.matches_cnt) #もしどのIDにも当てはまらない場合 if cls.matches_cnt == 0: cls.old_top = 0 cls.old_left = 0 cls.old_bottom = 0 cls.old_right = 0 cls.IDvalue_car = cls.IDvalue_car + 1 cls.ObjID_setimg = cls.IDvalue_car #更新したObjIDを登録する tmp_car = [{'Key':'frame', 'Value':frame_num}, {'Key':'id', 'Value':cls.ObjID_setimg}, {'Key':'left', 'Value':left}, {'Key':'top', 'Value':top}, {'Key':'right', 'Value':right}, {'Key':'bottom', 'Value':bottom}] cls.all_ObjectID_pos.append(tmp_car) label = '{}_{:.2f}_{}'.format(predicted_class, score, str(cls.ObjID_setimg))#put the ID for each obj draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) if cls.old_top - label_size[1] >= 0: text_origin2 = np.array([cls.old_left, cls.old_top - label_size[1]]) else: text_origin2 = np.array([cls.old_left, cls.old_top + 1]) # My kingdom for a good redistributable image drawing library. for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=colors[0]) draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=colors[0]) draw.text(text_origin, label, fill=(0, 0, 0), font=font) for i in range(thickness): draw.rectangle([cls.old_left + i, cls.old_top + i, cls.old_right - i, cls.old_bottom - i], outline=colors[1]) draw.rectangle([tuple(text_origin2), tuple(text_origin2 + label_size)], fill=colors[1]) draw.text(text_origin2, label, fill=(0, 0, 0), font=font) del draw end = timer() print("1フレームの処理時間 = ", end - start) return image
def detect_image(cls, image, frame_num): start = timer() model_image_size = (608, 608) class_names = cls._get_class() new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. image_shape = [image.size[1], image.size[0]] out_boxes, out_scores, out_classes = cls.compute_output(image_data, image_shape) Car_result_ALL = [] Pedestrian_result_ALL = [] all_result = [] for i, c in reversed(list(enumerate(out_classes))): predicted_class = class_names[c] box = out_boxes[i] score = out_scores[i] label = '{} {:.2f}'.format(predicted_class, score) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) #JSON 形式の時はint32()未対応のため -> int()に変換する top = int(top) left = int(left) bottom = int(bottom) right = int(right) #2 検出したbox_sizeを計算する 設定した閾値1024pix**2 sq_bdbox = (bottom - top)*(right - left) if sq_bdbox >= 1024:#矩形サイズの閾値 if predicted_class == 'Car': cls.IDvalue = cls.IDvalue + 1 #車を検出した時 Car_result = {'id': int(cls.IDvalue), 'box2d': [left,top,right,bottom]}#予測結果 #予測結果より次のFrameの物体位置を予測する情報を作成 tmp_car = {'frame':frame_num,'id':int(cls.IDvalue), 'left':left, 'top':top, 'right':right, 'bottom':bottom} cls.all_ObjectID_pos.append(tmp_car) #検出したオブジェクトを格納 検出しない場合は空欄が格納される Car_result_ALL.append(Car_result)#車 elif predicted_class == 'Pedestrian': cls.IDvalue = cls.IDvalue + 1 #歩行者を検出した時 Pedestrian_result = {'id': int(cls.IDvalue), 'box2d': [left,top,right,bottom]}#予測結果 #予測結果より次のFrameの物体位置を予測する情報を作成 tmp_ped = {'frame':frame_num,'id':int(cls.IDvalue), 'left':left, 'top':top, 'right':right, 'bottom':bottom} cls.all_ObjectID_pos.append(tmp_ped) #検出したオブジェクトを格納 検出しない場合は空欄が格納される Pedestrian_result_ALL.append(Pedestrian_result)#歩行者 all_result = {'Car': Car_result_ALL, 'Pedestrian': Pedestrian_result_ALL} end = timer() print("1フレームの処理時間 = ", end - start) return all_result
def ret_frame(cls, image, cv2image, frame_num): # Generate colors for drawing bounding boxes. hsv_tuples = [(x / 10, 1., 1.) for x in range(10)] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) np.random.seed(10101) # Fixed seed for consistent colors across runs. np.random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. np.random.seed(None) # Reset seed to default. start = timer() model_image_size = (608, 608) class_names = cls._get_class() new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. image_shape = [image.size[1], image.size[0]] out_boxes, out_scores, out_classes = cls.compute_output(image_data, image_shape) if frame_num == 1:# cls.old_out_boxes = out_boxes cls.old_out_scores = out_scores cls.old_out_classes = out_classes backward_out_boxes = cls.old_out_boxes backward_out_scores = cls.old_out_scores backward_out_classes = cls.old_out_classes else: backward_out_boxes = cls.old_out_boxes backward_out_scores = cls.old_out_scores backward_out_classes = cls.old_out_classes #cls.old_out_boxes = 0 #クリアする #cls.old_out_scores = 0 #クリアする #cls.old_out_classes = 0 #クリアする cls.old_out_boxes = out_boxes#新しい検出結果に更新する cls.old_out_scores = out_scores#新しい検出結果に更新する cls.old_out_classes = out_classes#新しい検出結果に更新する current_pos = [] font = ImageFont.truetype(font='../box_font/FiraMono-Medium.otf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = (image.size[0] + image.size[1]) // 300 #Check new object or not for it, ct in reversed(list(enumerate(out_classes))): predicted_class = class_names[ct] box = out_boxes[it] score = out_scores[it] print("box = ", box) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) boxcent_xpos = int((bottom - top)/2) boxcent_ypos = int((right - left)/2) if frame_num == 1:#1フレーム目は全て追加 cls.IDvalue = cls.IDvalue + 1#全てObject IDを付与 tmp = {'ID':cls.IDvalue, 'box_ord':it, 'x':boxcent_xpos, 'y':boxcent_ypos} current_pos.append(tmp) #LOGGING tmp = {'FRNUM':frame_num, 'ID':cls.IDvalue, 'box_ord':it, 'x':boxcent_xpos, 'y':boxcent_ypos} cls.all_ObjectID_pos.append(tmp) print("No.1 frame box center = ", current_pos) else:#それ以外前のフレームとの差分で新しい objet IDがあるかチェックする #cls.old_out_boxes #cls.old_out_scores #cls.old_out_classes for iold, cold in reversed(list(enumerate(cls.old_out_classes))): predicted_class_old = class_names[cold] box_old = out_boxes[iold] score_old = out_scores[iold] top_old, left_old, bottom_old, right_old = box_old top_old = max(0, np.floor(top_old + 0.5).astype('int32')) left_old = max(0, np.floor(left_old + 0.5).astype('int32')) bottom_old = min(image.size[1], np.floor(bottom_old + 0.5).astype('int32')) right_old = min(image.size[0], np.floor(right_old + 0.5).astype('int32')) #今回検出結果が前フレームで検出したBOX範囲内かチェックする if not top_old < boxcent_ypos < bottom_old: if not left_old < boxcent_xpos < right_old: cls.IDvalue = cls.IDvalue + 1 tmp = {'ID':cls.IDvalue, 'box_ord':it, 'x':boxcent_xpos, 'y':boxcent_ypos} current_pos.append(tmp) #LOGGING tmp = {'FRNUM':frame_num, 'ID':cls.IDvalue, 'box_ord':it, 'x':boxcent_xpos, 'y':boxcent_ypos} cls.all_ObjectID_pos.append(tmp) print("New object in frame ::box center = ", current_pos) #current_pos check print("current_pos = ", len(current_pos))# for kt in range(len(current_pos)): tmp_current_pos = current_pos[kt] for k, v in tmp_current_pos.items(): # k= Tanaka v= 80 // Tanaka: 80 if k == "ID": print("Key = ", k) print("Value = ",v) elif k == "box_ord": print("Key = ", k) print("Value = ",v) for i, c in reversed(list(enumerate(out_classes))): predicted_class = class_names[c] box = out_boxes[i] score = out_scores[i] label = '{}_{:.2f}_{}'.format(predicted_class, score, str(cls.IDvalue))#put the ID for each obj top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) #JSON 形式の時はint32()未対応のため -> int()に変換する top = int(top) left = int(left) bottom = int(bottom) right = int(right) #1 予測結果より次のFrameの物体位置を予測 if len(current_pos) > 0: t_tuple = (left, top, int(right - left), int(bottom - top)) bbox = t_tuple #tracker = cv2.TrackerMedianFlow_create() tracker = cv2.TrackerKCF_create() cls.trackers.add(tracker, cv2image, bbox) track, boxes = cls.trackers.update(cv2image) if track:#trackingに成功したら for bbox in boxes: #(x, y, w, h) = [int(v) for v in box] #IDvalue_track = top_track = int(bbox[1]) left_track = int(bbox[0]) bottom_track = int(bbox[1] + bbox[3]) right_track = int(bbox[0] + bbox[2]) chksq_bdbox = (bottom_track - top_track)*(right_track - left_track) if chksq_bdbox >= 1024:#矩形サイズの閾値 if predicted_class == 'Car'or predicted_class == 'Pedestrian':# Car or Pedes draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) if top - label_size[1] >= 0: text_origin = np.array([left_track, top_track - label_size[1]]) else: text_origin = np.array([left_track, top_track + 1]) for i in range(thickness): draw.rectangle([left_track + i, top_track + i, right_track - i, bottom_track - i], outline=colors[9]) del draw #else:#trackingに失敗したら # #del draw #2 検出したbox_sizeを計算する 設定した閾値1024pix**2 #sq_bdbox = (bottom - top)*(right - left) #if sq_bdbox >= 1024:#矩形サイズの閾値 # if predicted_class == 'Car'or predicted_class == 'Pedestrian':# Car or Pedes # My kingdom for a good redistributable image drawing library. #for i in range(thickness): # draw.rectangle([left + i, top + i, right - i, bottom - i], outline=colors[c]) #draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=colors[c]) #draw.text(text_origin, label, fill=(0, 0, 0), font=font) #del draw end = timer() print("1フレームの処理時間 = ", end - start) return image
def main(video_path, model_path, track_target=0, visualize=True): """run video prediction Args: video_path: video path model_path: model path track_target: 0-person; 1-bicycle; 2-car; 7-truck visualize: whether visualize tracking list """ detector = Detector(model_path=model_path) kalman_filter = KalmanFilter() capture = cv2.VideoCapture(video_path) height = capture.get(cv2.CAP_PROP_FRAME_HEIGHT) width = capture.get(cv2.CAP_PROP_FRAME_WIDTH) # tracking list tracking_list = [] label_count = 0 is_first_frame = True while True: success, frame = capture.read() if not success: capture.release() break frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # convert to Image object frame_pil = Image.fromarray(np.uint8(frame)) new_frame = letterbox_image(frame_pil, INPUT_SIZE) image_array = np.expand_dims(np.array(new_frame, dtype='float32') / 255.0, axis=0) image_shape = np.expand_dims(np.array([height, width], dtype='float32'), axis=0) image_constant = tf.constant(image_array, dtype=tf.float32) image_shape = tf.constant(image_shape, dtype=tf.float32) # detect image results = detector.detect(image_constant, image_shape) pred_results = [] for key, value in results.items(): pred_results.append(value) boxes = pred_results[0].numpy() # scores = scores.numpy classes = pred_results[2].numpy() # find tracking targets track_id = np.where(classes == track_target)[0] track_boxes = boxes[track_id] num_tracks = len(track_boxes) if num_tracks > 0: track_boxes = box2xyah(track_boxes) track_boxes = [track_box for track_box in track_boxes] if not is_first_frame: # start tracking tracking_list, label_count = matching_cascade(tracking_list, track_boxes, kalman_filter, label_count) if is_first_frame and (num_tracks > 0): is_first_frame = False for i in range(num_tracks): # initialize first frame mean_init, cov_init = kalman_filter.initiate(measurement=track_boxes[i]) # create tracker new_tracker = create_tracker(mean=mean_init, cov=cov_init, detection=track_boxes[i]) tracking_list.append(new_tracker) if visualize: # visulize results img = visualize_results(tracking_list, height, frame) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) cv2.imshow('avoid invasion', img) key = cv2.waitKey(30) & 0xff if key == 27: capture.release() break
def eval(model_path, min_Iou=0.5, yolo_weights=None): """ Introduction ------------ 计算模型在coco验证集上的MAP, 用于评价模型 """ ground_truth = {} class_pred = defaultdict(list) gt_counter_per_class = defaultdict(int) input_image_shape = tf.placeholder(dtype=tf.int32, shape=(2, )) input_image = tf.placeholder(shape=[None, 416, 416, 3], dtype=tf.float32) predictor = yolo_predictor(config.obj_threshold, config.nms_threshold, config.classes_path, config.anchors_path) boxes, scores, classes = predictor.predict(input_image, input_image_shape) val_Reader = Reader("val", config.data_dir, config.anchors_path, config.num_classes, input_shape=config.input_shape, max_boxes=config.max_boxes) image_files, bboxes_data = val_Reader.read_annotations() allBBox = 0 with tf.Session() as sess: if yolo_weights is not None: with tf.variable_scope('predict'): boxes, scores, classes = predictor.predict( input_image, input_image_shape) load_op = load_weights(tf.global_variables(scope='predict'), weights_file=yolo_weights) sess.run(load_op) else: saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(model_path) #saver.restore(sess, model_path) saver.restore(sess, ckpt.model_checkpoint_path) for index in range(len(image_files)): val_bboxes = [] image_file = image_files[index] file_id = os.path.split(image_file)[-1].split('.')[0] for bbox in bboxes_data[index]: left, top, right, bottom, class_id = bbox[0], bbox[1], bbox[ 2], bbox[3], bbox[4] class_name = val_Reader.class_names[int(class_id)] bbox = [float(left), float(top), float(right), float(bottom)] val_bboxes.append({ "class_name": class_name, "bbox": bbox, "used": False }) gt_counter_per_class[class_name] += 1 ground_truth[file_id] = val_bboxes image = Image.open(image_file) resize_image = letterbox_image(image, (416, 416)) image_data = np.array(resize_image, dtype=np.float32) image_data /= 255. image_data = np.expand_dims(image_data, axis=0) out_boxes, out_scores, out_classes = sess.run( [boxes, scores, classes], feed_dict={ input_image: image_data, input_image_shape: [image.size[1], image.size[0]] }) allBBox += len(out_boxes) print("detect {}/{} found boxes: {},allBBox:{}".format( index, len(image_files), len(out_boxes), allBBox)) for o, c in enumerate(out_classes): predicted_class = val_Reader.class_names[c] box = out_boxes[o] score = out_scores[o] top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) bbox = [left, top, right, bottom] class_pred[predicted_class].append({ "confidence": str(score), "file_id": file_id, "bbox": bbox }) # 计算每个类别的AP sum_AP = 0.0 sum_rec = 0.0 sum_prec = 0.0 count_true_positives = {} for class_index, class_name in enumerate( sorted(gt_counter_per_class.keys())): count_true_positives[class_name] = 0 predictions_data = class_pred[class_name] # 该类别总共有多少个box nd = len(predictions_data) tp = [0] * nd # true positive fp = [0] * nd # false positive for idx, prediction in enumerate(predictions_data): file_id = prediction['file_id'] ground_truth_data = ground_truth[file_id] bbox_pred = prediction['bbox'] Iou_max = -1 gt_match = None for obj in ground_truth_data: if obj['class_name'] == class_name: bbox_gt = obj['bbox'] bbox_intersect = [ max(bbox_pred[0], bbox_gt[0]), max(bbox_gt[1], bbox_pred[1]), min(bbox_gt[2], bbox_pred[2]), min(bbox_gt[3], bbox_pred[3]) ] intersect_weight = bbox_intersect[2] - bbox_intersect[0] + 1 intersect_high = bbox_intersect[3] - bbox_intersect[1] + 1 if intersect_high > 0 and intersect_weight > 0: union_area = (bbox_pred[2] - bbox_pred[0] + 1) * ( bbox_pred[3] - bbox_pred[1] + 1) + (bbox_gt[2] - bbox_gt[0] + 1) * (bbox_gt[3] - bbox_gt[1] + 1) - intersect_weight * intersect_high Iou = intersect_high * intersect_weight / union_area if Iou > Iou_max: Iou_max = Iou gt_match = obj if Iou_max > min_Iou: if not gt_match['used'] and gt_match is not None: tp[idx] = 1 gt_match['used'] = True else: fp[idx] = 1 else: fp[idx] = 1 # 计算精度和召回率 sum_class = 0 for idx, val in enumerate(fp): fp[idx] += sum_class sum_class += val sum_class = 0 for idx, val in enumerate(tp): tp[idx] += sum_class sum_class += val rec = tp[:] for idx, val in enumerate(tp): rec[idx] = tp[idx] / gt_counter_per_class[class_name] prec = tp[:] for idx, val in enumerate(tp): prec[idx] = tp[idx] / (fp[idx] + tp[idx]) ap, mrec, mprec = voc_ap(rec, prec) sum_AP += ap sum_rec += (mrec[-2]) sum_prec += sum(mprec) / (allBBox + 2) f1 = 2 * sum_rec * sum_prec / (sum_rec + sum_prec) MAP = sum_AP / len(gt_counter_per_class) * 100 #rec = sum_rec / len(gt_counter_per_class) * 100 #prec = sum_prec / len(gt_counter_per_class) * 100 print("The Model Eval MAP: {},prec:{},rec:{},f1:{}".format( MAP, sum_prec, sum_rec, f1))
def detect_video(weights, video_filepath, img_size, conf_thres, iou_thres): start_time = time.time() fourcc = cv2.VideoWriter_fourcc(*'mp4v') video = cv2.VideoCapture(video_filepath) fps = video.get(cv2.CAP_PROP_FPS) h = int(video.get(3)) w = int(video.get(4)) print(w, h) #h = 1280 #w = 720 result_video_filepath = video_filepath.split('/')[-1].split( '.')[0] + 'yolov5_output.mp4' out = cv2.VideoWriter(result_video_filepath, fourcc, int(fps), (h, w)) yolov5_tflite_obj = yolov5_tflite(weights, img_size, conf_thres, iou_thres) size = (img_size, img_size) no_of_frames = 0 while True: check, frame = video.read() if not check: break #frame = cv2.resize(frame,(h,w)) no_of_frames += 1 image_resized = letterbox_image(Image.fromarray(frame), size) image_array = np.asarray(image_resized) normalized_image_array = image_array.astype(np.float32) / 255.0 result_boxes, result_scores, result_class_names = yolov5_tflite_obj.detect( normalized_image_array) if len(result_boxes) > 0: result_boxes = scale_coords(size, np.array(result_boxes), (w, h)) font = cv2.FONT_HERSHEY_SIMPLEX # org org = (20, 40) # fontScale fontScale = 0.5 # Blue color in BGR color = (0, 255, 0) # Line thickness of 1 px thickness = 1 for i, r in enumerate(result_boxes): org = (int(r[0]), int(r[1])) cv2.rectangle(frame, (int(r[0]), int(r[1])), (int(r[2]), int(r[3])), (255, 0, 0), 1) cv2.putText( frame, str(int(100 * result_scores[i])) + '% ' + str(result_class_names[i]), org, font, fontScale, color, thickness, cv2.LINE_AA) out.write(frame) print('FPS:', no_of_frames / (time.time() - start_time)) out.release()
# print(data) # print(lr_img) model = yolo(config.norm_epsilon, config.norm_decay, '../model_data/yolo_anchors.txt', '../model_data/coco_classes.txt', config.pre_train) g1 = model.GAN_g1(lr_img) g2 = model.GAN_g2(g1) out = model.yolo_inference(g2.outputs, config.num_anchors / 3, config.num_classes, training=True) # tf.summary.scalar('out', out) # tf.summary.scalar('g1', g1.outputs) # merged_summary_op = tf.summary.merge_all() data1 = Image.open("../dog.jpg") data1 = utils.letterbox_image(data1, (104, 104)) data1 = np.array(data1, dtype=np.float32) data1 /= 255. data1 = np.expand_dims(data1, axis=0) # print(data1.shape) # loss = model.yolo_loss(output, bbox_true, model.anchors, config.num_classes, config.ignore_thresh) with tf.Session() as sess: # data1 = cv2.imread('../dog.jpg') # data1 = cv2.cvtColor(data1, cv2.COLOR_BGR2RGB) # data1 = cv2.resize(data1, (416, 416)) # lr_img1 = cv2.resize(data1, (104, 104), interpolation=cv2.INTER_CUBIC) # data1 = tf.cast(tf.expand_dims(tf.constant(data1), 0), tf.float32) # lr_img1 = tf.cast(tf.expand_dims(tf.constant(lr_img1), 0), tf.float32)
def table_detect(img, sc=(416, 416), thresh=0.5, NMSthresh=0.3): """ 表格检测 :param img: GBR, 要检测的图片 :param sc: 预处理后图像的目标尺寸,一般有几个建议的值 :param thresh: 置信度阈值,大于此置信度的才保留 :param NMSthresh: 极大值抑制阈值 :return: """ scale = sc[0] #获取img的前2位,图片的高度和宽度 img_height, img_width = img.shape[:2] # 输入的Blob bbox, 新的宽度和原宽度的比值, 新的高度和原高度的比值 inputBlob, fx, fy = letterbox_image(img[..., ::-1], (scale, scale)) # 对输入图像进行预处理,均值,缩放,通道交互[H,W,C]-->[B,C,H,W] inputBlob = cv2.dnn.blobFromImage(inputBlob, scalefactor=1.0, size=(scale, scale), swapRB=True, crop=False); #设置模型的输入 tableDetectNet.setInput(inputBlob / 255.0) # 返回没有连接的输出的layer的名字, outputName = tableDetectNet.getUnconnectedOutLayersNames() # 运行前向计算,计算OutputName的layers的输出, outputs输出结果的列表 outputs = tableDetectNet.forward(outputName) #存放类别id,置信度,bbox class_ids = [] confidences = [] boxes = [] #对于多个输出结果过滤 for output in outputs: #处理每个结果, detection输出格式是[centerx,centery,w,h,xxxx, class1_confidence, class2_confidence] # centerx 是bbox中心点坐标,w,h是bbox的宽和高 for detection in output: #第5个和第6个是对每个类别的预测的置信度 scores = detection[5:] #置信度最大的index是对应的是类别id class_id = np.argmax(scores) #获取对应的置信度 confidence = scores[class_id] #检查置信度是否大于阈值 if confidence > thresh: #还原到原图像的x,y,w,h center_x = int(detection[0] * scale / fx) center_y = int(detection[1] * scale / fy) width = int(detection[2] * scale / fx) height = int(detection[3] * scale / fy) #bbox左顶点(x,y),这里用left是x,top是y left = int(center_x - width / 2) top = int(center_y - height / 2) # 如果类别id是1 if class_id == 1: class_ids.append(class_id) confidences.append(float(confidence)) #计算bbox左上角和右下角的点的坐标 xmin, ymin, xmax, ymax = left, top, left + width, top + height xmin = max(xmin, 1) ymin = max(ymin, 1) xmax = min(xmax, img_width - 1) ymax = min(ymax, img_height - 1) boxes.append([xmin, ymin, xmax, ymax]) #bboxes的列表 boxes = np.array(boxes) #对应的confidences列表 confidences = np.array(confidences) #NMS非极大值抑制过滤bbox if len(boxes) > 0: boxes, confidences = nms_box(boxes, confidences, score_threshold=thresh, nms_threshold=NMSthresh) boxes, adBoxes = fix_table_box_for_table_line(boxes, confidences, img) return boxes, adBoxes, confidences
def detect(model_path, yolo_weights=None, image_path=None): """ Introduction ------------ 加载模型,进行预测 Parameters ---------- model_path: 模型路径 image_path: 图片路径 """ cap = None if image_path == None: cap = cv2.VideoCapture(0) input_image_shape = tf.placeholder(dtype=tf.int32, shape=(2, )) input_image = tf.placeholder(shape=[None, 416, 416, 3], dtype=tf.float32) predictor = yolo_predictor(config.obj_threshold, config.nms_threshold, config.classes_path, config.anchors_path) boxes, scores, classes = predictor.predict(input_image, input_image_shape) with tf.Session() as sess: if yolo_weights is not None: with tf.variable_scope('predict'): boxes, scores, classes = predictor.predict( input_image, input_image_shape) load_op = load_weights(tf.global_variables(scope='predict'), weights_file=yolo_weights) sess.run(load_op) else: saver = tf.train.Saver() saver.restore( sess, "./test_model/model.ckpt-192192/model.ckpt-44865") # emotion # saver.restore(sess, "./test_model/model.ckpt-192192/model.ckpt-19940") # detection while True: start_time = time.time() if image_path == None: ret, image = cap.read() if ret == 0: break [h, w] = image.shape[:2] print(h, w) image = cv2.flip(image, 1) image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = Image.fromarray(image_np) else: image = Image.open(image_path) resize_image = letterbox_image(image, (416, 416)) image_data = np.array(resize_image, dtype=np.float32) image_data /= 255. image_data = np.expand_dims(image_data, axis=0) out_boxes, out_scores, out_classes = sess.run( [boxes, scores, classes], feed_dict={ input_image: image_data, input_image_shape: [image.size[1], image.size[0]] }) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) font = ImageFont.truetype(font='font/FiraMono-Medium.otf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = (image.size[0] + image.size[1]) // 300 for i, c in reversed(list(enumerate(out_classes))): c = int(c[0]) print("i:{}, c:{}, type:{}".format(i, c, type(c))) if c > 2: continue predicted_class = predictor.class_names[c] box = out_boxes[i] score = out_scores[i] label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) print(label, (left, top), (right, bottom)) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) duration = time.time() - start_time # My kingdom for a good redistributable image drawing library. for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=predictor.colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=predictor.colors[c]) frame_rate = '{:.2f}'.format(1.0 / duration) draw.text(text_origin, label, fill=(0, 0, 0), font=font) draw.text(np.array([0, 0]), frame_rate, fill=(0, 0, 0), font=font) del draw # image.show() # image.save('./result1.jpg') # cv_img = cv2.CreateImageHeader(image.size, cv2.IPL_DEPTH_8U, 3) # RGB image # cv2.SetData(cv_img, image.tostring(), image.size[0]*3) if image_path != None: print('just one image') image.show() image.save('./result1.jpg') break else: open_cv_image = np.array(image)[:, :, ::-1].copy() cv2.imshow('cimage', open_cv_image) k = cv2.waitKey(1) & 0xff if k == ord('q') or k == 27: break
def detect(image_path, model_path, yolo_weights=None): """ Introduction ------------ 加载模型,进行预测 Parameters ---------- model_path: 模型路径 image_path: 图片路径 """ image = Image.open(image_path) resize_image = letterbox_image(image, (192, 192)) image_data = np.array(resize_image, dtype=np.float32) image_data /= 255. image_data = np.expand_dims(image_data, axis=0) input_image_shape = tf.placeholder(dtype=tf.int32, shape=(2, )) input_image = tf.placeholder(shape=[None, 192, 192, 3], dtype=tf.float32) with tf.variable_scope("model_gd"): predictor = yolo_predictor(config.obj_threshold, config.nms_threshold, config.classes_path, config.anchors_path2) boxes, scores, classes = predictor.predict(input_image, input_image_shape) with tf.Session() as sess: if yolo_weights is not None: with tf.variable_scope('predict'): boxes, scores, classes = predictor.predict( input_image, input_image_shape) load_op = load_weights(tf.global_variables(scope='predict'), weights_file=yolo_weights) sess.run(load_op) else: saver = tf.train.Saver() model_file = tf.train.latest_checkpoint(model_path) saver.restore(sess, model_file) out_boxes, out_scores, out_classes = sess.run( [boxes, scores, classes], feed_dict={ input_image: image_data, input_image_shape: [image.size[1], image.size[0]] }) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) font = ImageFont.truetype(font='font/FiraMono-Medium.otf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = (image.size[0] + image.size[1]) // 300 for i, c in reversed(list(enumerate(out_classes))): predicted_class = predictor.class_names[c] box = out_boxes[i] score = out_scores[i] label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) data = str(label) + "," + str(left) + "." + str(top) + "," + str( right) + "," + str(bottom) + "\n" with open('./res/data.txt', "a") as f: f.write(data) print(label, (left, top), (right, bottom)) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) # My kingdom for a good redistributable image drawing library. for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=predictor.colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=predictor.colors[c]) draw.text(text_origin, label, fill=(0, 0, 0), font=font) draw.text(text_origin, label, fill=(0, 0, 0), font=font) del draw image.show() image.save('./res/1.jpg')
def detect_image(cls, image): start = timer() model_image_size = (608, 608) class_names = cls._get_class() new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. image_shape = [image.size[1], image.size[0]] out_boxes, out_scores, out_classes = cls.compute_output(image_data, image_shape) Car_result_ALL = [] Pedestrian_result_ALL = [] all_result = [] for i, c in reversed(list(enumerate(out_classes))): predicted_class = class_names[c] box = out_boxes[i] score = out_scores[i] label = '{} {:.2f}'.format(predicted_class, score) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) #JSON 形式の時はint32()未対応のため -> int()に変換する top = int(top) left = int(left) bottom = int(bottom) right = int(right) #1 予測結果より次のFrameの物体位置を予測 #nxt_result_txt = ' {},{},{},{},{}'.format(left, top, right, bottom, c) #center = (int((bottom - top)/2), int((right - left)/2)) #center = np.array([int((bottom - top)/2), 1, int((right - left)/2)], dtype=np.int32) #cls.tracker.update(center) #for j in range(len(cls.tracker.tracks)): # x = int(cls.tracker.tracks[j].trace[-1][0,0]) # y = int(cls.tracker.tracks[j].trace[-1][0,1]) # print("x=",x) # print("y=",y) #2 検出したbox_sizeを計算する 設定した閾値1024pix**2 sq_bdbox = (bottom - top)*(right - left) if sq_bdbox >= 1024:#矩形サイズの閾値 #検出しない時の初期値 #Car_result = {'id': int(0), 'box2d': [int(0),int(0),int(image.height),int(image.width)]} #Pedestrian_result = {'id': int(0), 'box2d': [int(0),int(0),int(image.height),int(image.width)]} if predicted_class == 'Car': #車を検出した時 Car_result = {'id': int(cls.IDvalue), 'box2d': [left,top,right,bottom]}#予測結果 #検出したオブジェクトを格納 検出しない場合は初期値0が格納される Car_result_ALL.append(Car_result)#車 elif predicted_class == 'Pedestrian': #歩行者を検出した時 Pedestrian_result = {'id': int(cls.IDvalue), 'box2d': [left,top,right,bottom]}#予測結果 #検出したオブジェクトを格納 検出しない場合は初期値0が格納される Pedestrian_result_ALL.append(Pedestrian_result)#歩行者 all_result = {'Car': Car_result_ALL, 'Pedestrian': Pedestrian_result_ALL} end = timer() print("1フレームの処理時間 = ", end - start) return all_result
import os import config import argparse import numpy as np import colorsys import tensorflow as tf from yolo_predict import yolo_predictor from PIL import Image, ImageFont, ImageDraw from utils import letterbox_image, load_weights with tf.Session() as sess: image_path = "F:\\deeplearning_dataset\\new_ribbon\\split_imge\\1109_(98)_1.jpg" ##########数据准备阶段################ image = Image.open(image_path) resize_image = letterbox_image(image, (416, 416)) image_data = np.array(resize_image, dtype=np.float32) image_data /= 255. image_data = np.expand_dims(image_data, axis=0) ##################################### pb_file_path = 'F:\\github_working\\version_2_190114\\alsochen-tensorflow-yolo3-threeoutput\\tensorflow-yolo3\\pb_file\\model.pb' with tf.gfile.GFile(pb_file_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) pred_im_shape, pred_input_img, boxes, scores, classes = tf.graph_util.import_graph_def( graph_def, return_elements=[ 'pred_im_shape:0', 'pred_input_img:0', 'predict/pred_boxes:0', 'predict/pred_scores:0', 'predict/pred_classes:0' ]) out_boxes, out_scores, out_classes = sess.run(