def parse_annotation(self, annotation): line = annotation.split() image_path = line[0] if not os.path.exists(image_path): raise KeyError("%s does not exist ... " % image_path) image = np.array(cv2.imread(image_path)) bboxes = np.array([list(map(int, box.split(','))) for box in line[1:]]) if self.data_aug: image, bboxes = self.random_horizontal_flip( np.copy(image), np.copy(bboxes)) image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) image, bboxes = utils.image_preporcess( np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) return image, bboxes
def representative_data_gen(): fimage = open(FLAGS.dataset).read().split() input_width, input_height = utils.input_size(FLAGS.input_size) batched_input = np.zeros((FLAGS.loop, input_height, input_width, 3), dtype=np.float32) for input_value in range(FLAGS.loop): if os.path.exists(fimage[input_value]): original_image = cv2.imread(fimage[input_value]) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = utils.image_preporcess(np.copy(original_image), [input_height, input_width]) img_in = image_data[np.newaxis, ...].astype(np.float32) batched_input[input_value, :] = img_in # batched_input = tf.constant(img_in) print(input_value) # yield (batched_input, ) # yield tf.random.normal((1, 416, 416, 3)), else: continue batched_input = tf.constant(batched_input) yield (batched_input, )
def parse_annotation(self, annotation): line = annotation.split() image_path = line[0] #image_path = image_path.replace("E","D",1) if not os.path.exists(image_path): raise KeyError("%s does not exist ... " %image_path) image = np.array(cv2.imread(image_path)) #bboxes = np.array([list(map(int, box.split(','))) for box in line[1:]]) #bboxes = bboxes.T bboxes = np.array([list(map(lambda x: int(float(x)), box.split(','))) for box in line[1:]]) #print("Now executing...", image_path) if self.data_aug: image, bboxes = self.random_horizontal_flip(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) #print("bboxes=", bboxes) image, bboxes = utils.image_preporcess(np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) return image, bboxes
def parse_annotation(self, annotation): if 'str' not in str(type(annotation)): annotation = annotation.decode() line = annotation.split() image_path = line[0] if not os.path.exists(image_path): raise KeyError("%s does not exist ... " % image_path) image = np.array(cv2.imread(image_path)) bboxes = np.array([ list(map(lambda x: int(float(x)), box.split(','))) for box in line[1:] ]) # if self.data_aug: # image, bboxes = self.random_horizontal_flip(np.copy(image), np.copy(bboxes)) # image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) # image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) image, bboxes = utils.image_preporcess( np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) return image, bboxes
def predict(self, image): org_image = np.copy(image) org_h, org_w, _ = org_image.shape image_data = utils.image_preporcess(image, [self.input_size, self.input_size]) image_data = image_data[np.newaxis, ...] pred_mbbox, pred_lbbox = self.sess.run([self.pred_mbbox, self.pred_lbbox], feed_dict={ self.input_data: image_data, self.trainable: False } ) pred_bbox = np.concatenate([np.reshape(pred_mbbox, (-1, 5 + self.num_classes)), np.reshape(pred_lbbox, (-1, 5 + self.num_classes))], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, (org_h, org_w), self.input_size, self.score_threshold) bboxes = utils.nms(bboxes, self.iou_threshold) return bboxes
def parse_annotation(self, annotation): line = annotation.split() image_path = line[0] if not os.path.exists(image_path): raise KeyError("%s does not exist ... " % image_path) image = np.array(cv2.imread(image_path)) if line[1:]: bboxes = np.array([ list(map(lambda x: int(float(x)), box.split(','))) for box in line[1:] ]) # shape:[n,5] n表示bbox的个数 else: bboxes = None if self.data_aug: # image, bboxes = self.random_horizontal_flip(np.copy(image), bboxes) # image, bboxes = self.random_crop(np.copy(image), bboxes) # image, bboxes = self.random_translate(np.copy(image), bboxes) image, bboxes = self.img_augment(np.copy(image), bboxes) image, bboxes = utils.image_preporcess( np.copy(image), [self.input_size, self.input_size], bboxes) return image, bboxes
def parse_annotation(self, annotation): line = annotation.split() image_path = line[0] if not os.path.exists(image_path): raise KeyError("%s does not exist ... " % image_path) image = np.array(cv2.imread(image_path)) bboxes = np.array([ list(map(lambda x: int(float(x)), box.split(','))) for box in line[1:] ]) if self.data_aug: image, bboxes = self.random_horizontal_flip( np.copy(image), np.copy(bboxes)) image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) image, bboxes = utils.image_preporcess( np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) updated_bb = [] for bb in bboxes: x1, y1, x2, y2, cls_label = bb if x2 <= x1 or y2 <= y1: # dont use such boxes as this may cause nan loss. continue x1 = int(np.clip(x1, 0, image.shape[1])) y1 = int(np.clip(y1, 0, image.shape[0])) x2 = int(np.clip(x2, 0, image.shape[1])) y2 = int(np.clip(y2, 0, image.shape[0])) # clipping coordinates between 0 to image dimensions as negative values # or values greater than image dimensions may cause nan loss. updated_bb.append([x1, y1, x2, y2, cls_label]) return image, np.array(updated_bb)
def make_vid_output(): with tf.Session(graph=graph) as sess: vid = cv2.VideoCapture(video_path) while True: return_value, frame = vid.read() if frame is not None: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image!") frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] prev_time = time.time() pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') image = utils.draw_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break
def obj_detect(image): input_size = cfg['input_size'] image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) original_image = image original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] # model infer pred_sbbox, pred_mbbox, pred_lbbox = obj_detect_infer.infer(image_data) # post process get final bboxes num_classes = cfg['num_classes'] pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.5) bboxes = utils.nms(bboxes, 0.45, method='nms') res = {} objs = [] for bbox in bboxes: objs.append({ 'x1': bbox[0], 'y1': bbox[1], 'x2': bbox[2], 'y2': bbox[3], 'confidence': bbox[4], 'class': bbox[5] }) res['objs'] = objs return res
def gen(): myrtmp_addr = "rtmp://localhost/live/indycar live=1" cap = cv2.VideoCapture(myrtmp_addr) infer_flag=True while True: ret, frame = cap.read() if not ret: print('Input source error!') break frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) if infer_flag: frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = sess.run([return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={ return_tensors[0]: image_data}) pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') infer_flag = not infer_flag image = utils.draw_bbox(frame, bboxes) result = np.asarray(image) frame = cv2.cvtColor(result, cv2.COLOR_RGB2BGR) ret, jpeg = cv2.imencode('.jpg', frame) frame = jpeg.tobytes() yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n')
def run(self): bbox_tensors = [] for i, fm in enumerate(self._feature_maps): bbox_tensor = decode(fm, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(self._input_layer, bbox_tensors) utils.load_weights(model, "./checkpoint/yolov3.ckpt") model.summary() while True: return_value, frame = self._vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) else: raise ValueError("No image!") frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [self._input_size, self._input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() pred_bbox = model.predict(image_data) curr_time = time.time() exec_time = curr_time - prev_time pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, self._input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') image = utils.draw_bbox(frame, bboxes) result = np.asarray(image) info = "time: %.2f ms" %(1000*exec_time) cv2.putText(result, text=info, org=(50, 70), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 0, 0), thickness=2) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break
def __hit_model(self): """This function tests the tf model and returns the predict function handler """ #temp_inp_yolo = cv.imread("C:\\Users\\developer\\Anaconda_3\\rdt-reader\\1.jpg") temp_inp_yolo = cv.imread(jpg1Path) org_image = np.copy(temp_inp_yolo) org_h, org_w, _ = org_image.shape image_data = utils.image_preporcess(temp_inp_yolo, [self.input_size, self.input_size]) image_data = image_data[np.newaxis, ...] try: temp_inp_yolo = np.array(image_data, dtype=np.float32) predict_fn = tf.contrib.predictor.from_saved_model( self.weightsPath) output_data = predict_fn({"input": temp_inp_yolo}) return predict_fn except IOError: print("Unable to read either array or weight path") output_data = predict_fn({"input": temp_inp_yolo}) return predict_fn
def representative_data_gen(): batched_input = np.zeros((FLAGS.loop, FLAGS.input_size, FLAGS.input_size, 3), dtype=np.float32) if FLAGS.dataset: # fill batched_input with real data, otherwise just mock up with a 0-valued array fimage = open(FLAGS.dataset).read().split() for input_value in range(FLAGS.loop): if os.path.exists(fimage[input_value]): original_image=cv2.imread(fimage[input_value]) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = utils.image_preporcess(np.copy(original_image), [FLAGS.input_size, FLAGS.input_size]) img_in = image_data[np.newaxis, ...].astype(np.float32) batched_input[input_value, :] = img_in # batched_input = tf.constant(img_in) print(input_value) # yield (batched_input, ) # yield tf.random.normal((1, 416, 416, 3)), else: continue batched_input = tf.constant(batched_input) yield (batched_input,)
def process(frame, input_size, model, object_to_find, FLAGS, ANCHORS, STRIDES, XYSCALE): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray((frame * 255).astype(np.uint8)) frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image, was_found, coords = utils.draw_bbox(frame, bboxes, object_to_find) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) #print(info) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) return result, was_found, coords
def test_video(video_path, model_path): num_classes = 80 input_size = 416 model = yolov3.build_for_test() # model.load_weights(model_path) utils.load_weights(model, model_path) # model.summary() vid = cv2.VideoCapture(video_path) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) else: raise ValueError("No image!") frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() pred_bbox = model.predict_on_batch(image_data) curr_time = time.time() exec_time = curr_time - prev_time pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') image = utils.draw_bbox(frame, bboxes) result = np.asarray(image) info = "time: %.2f ms" %(1000*exec_time) cv2.putText(result, text=info, org=(50, 70), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 0, 0), thickness=2) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break
def parse_annotation(self, annotation): # print('annotation', annotation) line = annotation.split() # print('line', line) str_tp = line[1] if str_tp.find('.jpg') >= 0: # print('line[1]: ', line[1]) line[0] = line[0] + ' ' + line[1] len_tp = len(line) for i in range(1, len_tp - 1): line[i] = line[i + 1] del line[-1] # print('line af:', line) # image_path = line[0] image_path_tp = line[0] # image_path = ''.join(x.decode('utf-8') for x in image_path_tp.split()) image_path = str(image_path_tp) if not os.path.exists(image_path): raise KeyError("%s does not exist ... " % image_path) image = np.array(cv2.imread(image_path)) bboxes = np.array([ list(map(lambda x: int(float(x)), box.split(','))) for box in line[1:] ]) if self.data_aug: image, bboxes = self.random_horizontal_flip( np.copy(image), np.copy(bboxes)) image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) image, bboxes = utils.image_preporcess( np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) return image, bboxes
def predict(self, image, annotation): org_image = np.copy(image) org_h, org_w, _ = org_image.shape image_data = utils.image_preporcess(image, [self.input_size, self.input_size]) image_data = image_data[np.newaxis, ...] _, gt_bboxes = Dataset('train').parse_annotation(annotation) label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = Dataset('train').preprocess_true_boxes(gt_bboxes) label_sbbox = label_sbbox[np.newaxis, ...] label_mbbox = label_mbbox[np.newaxis, ...] label_lbbox = label_lbbox[np.newaxis, ...] sbboxes = sbboxes[np.newaxis, ...] mbboxes = mbboxes[np.newaxis, ...] lbboxes = lbboxes[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox, loss = self.sess.run( [self.pred_sbbox, self.pred_mbbox, self.pred_lbbox, self.loss], feed_dict={ self.input_data: image_data, self.label_sbbox: label_sbbox, self.label_mbbox: label_mbbox, self.label_lbbox: label_lbbox, self.sbboxes: sbboxes, self.mbboxes: mbboxes, self.lbboxes: lbboxes, self.trainable: False } ) pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + self.num_classes)), np.reshape(pred_mbbox, (-1, 5 + self.num_classes)), np.reshape(pred_lbbox, (-1, 5 + self.num_classes))], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, (org_h, org_w), self.input_size, self.score_threshold) bboxes = utils.nms(bboxes, self.iou_threshold) return bboxes, loss
def parse_annotation(self, annotation): # 将 "./data/images\Anime_180.jpg 388,532,588,729,0 917,154,1276,533,0" # 根据空格键切成 ['./data/images\\Anime_180.jpg', '388,532,588,729,0', '917,154,1276,533,0'] line = annotation.split() image_path = line[0] if not os.path.exists(image_path): raise KeyError("%s does not exist ... " % image_path) image = np.array(cv2.imread(image_path)) # 将 bboxes 做成 [[388, 532, 588, 729, 0], [917, 154, 1276, 533, 0]] bboxes = np.array([list(map(int, box.split(','))) for box in line[1:]]) # 训练数据,进行仿射变换,让训练模型更好 if self.train_flag: image, bboxes = self.random_horizontal_flip( np.copy(image), np.copy(bboxes)) image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) image, bboxes = utils.image_preporcess( np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) return image, bboxes
def get_boxes(model, original_image, input_size=608): original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) pred_bbox = model.predict(image_data) pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) if bboxes is None: return [], [], [] bboxes = utils.nms(bboxes, 0.213, method='nms') boxs = list(np.array(bboxes)[:, 0:4]) confidence = list(np.array(bboxes)[:, 4]) class_idx = list(np.array(bboxes)[:, 5]) # image = utils.draw_bbox(original_image, bboxes) # image = Image.fromarray(image) return boxs, confidence, class_idx
def parse_annotation(self, annotation): line = annotation.split() image_path = line[0].split("\\") # if not os.path.exists(image_path): # raise KeyError("%s does not exist ... " %image_path) #two image input im1_file = os.path.join(r"D:\training_data",self.dataset_type,"JPEGImages", 'rgb',image_path[-1]) img_rgb = cv2.imread(im1_file) im2_file = os.path.join(r"D:\training_data",self.dataset_type,"JPEGImages", 'lwir',image_path[-1]) img_lwir = cv2.imread(im2_file) #two image input # image = np.array(cv2.imread(image_path)) bboxes = np.array([list(map(lambda x: int(float(x)), box.split(','))) for box in line[1:]]) if self.data_aug: img_rgb,img_lwir, bboxes = self.random_horizontal_flip(np.copy(img_rgb),np.copy(img_lwir), np.copy(bboxes)) img_rgb,img_lwir, bboxes = self.random_crop(np.copy(img_rgb),np.copy(img_lwir), np.copy(bboxes)) img_rgb,img_lwir, bboxes = self.random_translate(np.copy(img_rgb),np.copy(img_lwir), np.copy(bboxes)) img_rgb,img_lwir, bboxes = utils.image_preporcess(np.copy(img_rgb),np.copy(img_lwir), [self.train_input_size, self.train_input_size], np.copy(bboxes)) return img_rgb,img_lwir, bboxes
def parse_annotation(self, annotation): line = annotation.split() # print(line) image_path = line[0] if not os.path.exists(image_path): raise KeyError("%s does not exist ... " % image_path) image = np.array(cv2.imread(image_path)) bboxes = np.array([ list(map(self.convert_to_zero, box.split(','))) for box in line[1:] ]) # bboxes = np.reshape(bboxes, [-1, 5]) if self.data_aug: if not self.new_aug: image, bboxes = self.random_horizontal_flip( np.copy(image), np.copy(bboxes)) image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) else: h, w, c = image.shape scale = np.array([w, h, w, h], dtype=np.float64) bboxes_aug = bboxes[:, :4] bboxes_aug = bboxes_aug / scale bboxes_aug = bboxes_aug[:, [1, 0, 3, 2]] image, bboxes_aug = self.aug_fun(image, bboxes_aug) bboxes_aug = bboxes_aug[:, [1, 0, 3, 2]] h, w, c = image.shape scale = np.array([w, h, w, h], dtype=np.float64) bboxes_aug = bboxes_aug * scale bboxes[:, :4] = bboxes_aug image, bboxes = utils.image_preporcess( np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) return image, bboxes
def test_image(): with tf.Session(graph=graph) as sess: val_txt_path = '/home/amax/workspace-fire/tensorflow-yolov3/data/my_data/fire_val_add_longmao.txt' with open(val_txt_path) as f: images = f.readlines() image_path_list = [i.strip().split(' ')[0] for i in images] for image_path in image_path_list: original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={ return_tensors[0]: image_data}) pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.1) bboxes = utils.nms(bboxes, 0.5, method='nms') # image = utils.draw_bbox(original_image, bboxes) # image = Image.fromarray(image) # image.show() detection_txt = os.path.join(metrics_path,os.path.basename(image_path)).split('.')[0]+'.txt' txt_path = os.path.dirname(detection_txt) if not os.path.exists(txt_path): os.makedirs(txt_path) with open(detection_txt,'w') as f: for bbox in bboxes: score = bbox[4] class_ind = int(bbox[5]) x1,y1,x2,y2 = bbox[0:4] writer_str = ' '.join([name_map_dict[class_ind], str(score), str(x1),str(y1), str(x2), str(y2)]) f.write(writer_str+'\n')
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) input_layer = tf.keras.layers.Input([FLAGS.size, FLAGS.size, 3]) feature_maps = YOLOv4(input_layer) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) logging.info('weights loaded') # Test the TensorFlow Lite model on random input data. for i in range(1000): img_raw = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) original_image = cv2.imread(FLAGS.image) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [FLAGS.size, FLAGS.size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() pred_bbox = model.predict(image_data) # pred_bbox = pred_bbox.numpy() curr_time = time.time() exec_time = curr_time - prev_time info = "time:" + str(round(1000 * exec_time, 2)) + " ms, FPS: " + str( round((1000 / (1000 * exec_time)), 1)) print(info)
def parse_annotation(self, annotation): line = annotation.split() image_path = line[0] if not os.path.exists(image_path): raise KeyError("%s does not exist ... " % image_path) image = cv2.imread(image_path) bboxes = np.array([list(map(int, box.split(','))) for box in line[1:]]) if self.data_aug: image, bboxes = self.random_horizontal_flip( np.copy(image), np.copy(bboxes)) image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image, bboxes = utils.image_preporcess( np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) ''' print('dataset.py/parse_annotation') print(bboxes.shape) dataset.py/parse_annotation (9, 5) dataset.py/parse_annotation (4, 5) dataset.py/parse_annotation (8, 5) dataset.py/parse_annotation (2, 5) Four images per batch. Each contain 9, 4, 8, 2 objects respectively. ''' return image, bboxes
def export_yolo_video(video_path, output_path): vid = cv2.VideoCapture(video_path) max_frames = vid.get(cv2.CAP_PROP_FRAME_COUNT) outputs = [] with tf.Session(graph=graph) as sess: for _ in trange(int(max_frames)): return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) else: raise ValueError("No image!") frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') outputs.append(bboxes) vid.release() np.savez( str(Path(output_path, Path(video_path).stem + '_yolo_output.npz')), outputs)
import cv2 import numpy as np import core.utils as utils import tensorflow as tf from core.yolov3 import YOLOv3, decode from PIL import Image from core.config import cfg num_class = 80 input_size = 416 image_path = "./docs/kite.jpg" original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) input_layer = tf.keras.layers.Input([input_size, input_size, 3]) feature_maps = YOLOv3(input_layer) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, i) bbox_tensors.append(tf.reshape(bbox_tensor, (-1, 5+num_class))) bbox_tensors = tf.concat(bbox_tensors, axis=0) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, "./yolov3.weights") pred_bbox = model(image_data)
frameId = vid.get(1) return_value, frame = vid.read() if return_value: if frameId % frameRate == 0: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: continue else: raise ValueError("No image!") break line = fp.readline() gps_lines = line.split(",") frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] prev_time = time.time() pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size,
def getBboxes_for_video(image_dir, image_name, draw_bbox=False): return_elements = [ "input/input_data:0", "pred_sbbox/concat_2:0", "pred_mbbox/concat_2:0", "pred_lbbox/concat_2:0" ] pb_file = "./yolov3_coco.pb" video_path = os.path.join(image_dir, image_name) # video_path = 0 num_classes = 80 input_size = 416 graph = tf.Graph() return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements) frame_number = 0 with tf.Session(graph=graph) as sess: vid = cv2.VideoCapture(video_path) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image!") frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] prev_time = time.time() pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.4) bboxes = utils.nms(bboxes, 0.45, method='nms') bboxes.append(frame_number) if draw_bbox: image = utils.draw_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) frame_number += 1 if cv2.waitKey(1) & 0xFF == ord('q'): break return bboxes
print('=> ground truth of %s:' % image_name) num_bbox_gt = len(bboxes_gt) with open(ground_truth_path, 'w') as f: for i in range(num_bbox_gt): class_name = CLASSES[classes_gt[i]] xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i])) bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax ]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print('=> predict result of %s:' % image_name) predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt') # Predict Process image_size = image.shape[:2] image_data = utils.image_preporcess(np.copy(image), [INPUT_SIZE, INPUT_SIZE]) image_data = image_data[np.newaxis, ...].astype(np.float32) pred_bbox = model.predict(image_data) pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) # pred_bbox = tf.concat(pred_bbox, 1) bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD) bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms') if cfg.TEST.DECTECTED_IMAGE_PATH is not None: image = utils.draw_bbox(image, bboxes) cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image) with open(predict_result_path, 'w') as f:
def loop(): global byte_frame global current_buffer_index global current_frame_index myrtmp_addr = "rtmp://j-093.juliet.futuresystems.org/live/indycar live=1" cap = cv2.VideoCapture(myrtmp_addr) infer_flag = True t1 = time.time() while True: # cap.set(cv2.CAP_PROP_POS_MSEC,(count*125)) ret, frame = cap.read() if not ret: print('Input source error!') cap = cv2.VideoCapture(myrtmp_addr) continue frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) if infer_flag: frame_size = frame.shape[:2] image_data = utils.image_preporcess( np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] prev_time = time.time() pred_sbbox, pred_mbbox, pred_lbbox = sess.run([return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) #print('time:',time.time() - prev_time) pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape( pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0) bboxes = utils.postprocess_boxes( pred_bbox, frame_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') infer_flag = not infer_flag image = utils.draw_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time # print(exec_time) result = np.asarray(image) frame = cv2.cvtColor(result, cv2.COLOR_RGB2BGR) #result = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # format should be bgr ret, jpeg = cv2.imencode('.jpg', frame) frame = jpeg.tobytes() #with condtition: byte_frame[current_buffer_index] = (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n') current_frame_index = current_frame_index+1 #condtition.notifyAll() current_buffer_index = current_buffer_index+1 if current_buffer_index >= buffer_size: current_buffer_index = 0