def main(_argv): import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) model.summary() else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) while True: frames = pipeline.wait_for_frames() depth_frame = frames.get_depth_frame() # Align the depth frame to color frame aligned_frames = align.process(frames) # Get aligned frames depth_frame = aligned_frames.get_depth_frame() color_frame = aligned_frames.get_color_frame() if not depth_frame or not color_frame: continue depth_intrin = depth_frame.profile.as_video_stream_profile().intrinsics color_intrin = color_frame.profile.as_video_stream_profile().intrinsics depth_to_color_extrin = depth_frame.profile.get_extrinsics_to( color_frame.profile) depth_image = np.asanyarray(depth_frame.get_data()) color_image = np.asanyarray(color_frame.get_data()) frame = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() scaled_depth = cv2.convertScaleAbs(depth_image, alpha=0.08) depth_colormap = cv2.applyColorMap(scaled_depth, cv2.COLORMAP_JET) if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') view2d = np.zeros((480, 640, 3), np.uint8) for box in bboxes: x_mid = int((box[0] + box[2]) / 2) y_mid = int((box[1] + box[3]) / 2) pixel_depths = [] for i in range(3): for j in range(3): pixel_depths.append( depth_frame.get_distance(int(x_mid + i - 1), int(y_mid + j - 1))) object_depth = statistics.median(pixel_depths) object_point = rs.rs2_deproject_pixel_to_point( depth_intrin, [x_mid, y_mid], object_depth) if box[5] == 67.0: print('found phone') if object_depth == 0.0: print('depth not found') depth_colormap[max(0, min(y_mid, 479)), max(0, min(x_mid, 639))] = [0, 255, 0] view2d[max(0, min(480 - int(object_point[2] * 350), 479)), max(0, min(int(object_point[0] * 350) + 320, 639))] = [0, 255, 0] #print('x_min', box[0]) #print('y_min', box[1]) #print('x_max', box[2]) #print('y_max', box[3]) #print('probability', box[4]) #print('object_id', box[5]) #print('point', object_point) #print('-----') #curr_time = time.time() #exec_time = curr_time - prev_time #info = "time: %.2f ms" %(1000*exec_time) #print(info) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) image_color = utils.draw_bbox(frame, bboxes) result = cv2.cvtColor(image_color, cv2.COLOR_RGB2BGR) image_depth = utils.draw_bbox(depth_colormap, bboxes) images = np.hstack((view2d, image_depth)) cv2.imshow("result", images) print('-----') if cv2.waitKey(1) & 0xFF == ord('q'): pipeline.stop() break
def main(_argv): #Yolo-tiny버전이 아닌지 if문을 통해 구분한다. if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) #tiny버전이 아닐 경우 Yolo-v4모델을 가져오고 anchor박스의 정보도 함께 가져온다. if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) #클래스개수, 박스의 XYSCALE을 Yolo-v4의 cfg파일에서 불러오고 input_size와 image_path를 미리 정의한 flags객체의 size와 image값으로 정의한다. NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size image_path = FLAGS.image #cv2모듈을 통해 이미지를 불러오고 불러온 이미지를 BGR이미지를 RGB로 바꿔준다. #이는 컬러 사진을 opencv에서는 BGR순서로 저장하는데 matplotlib에서는 RGB로 저장하기 때문이다. original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] #이미지 데이터들을 배열로 바꿔주고 데이터타입을 float32로 변환해준다. image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) #framework가 tf로 정의된 경우 FLAGS.model이 어떻게 정의되었는지에 따라 불러오는 모델이 다르다. #지금의 경우는 Yolo-v4를 다루고 있으므로 FLAGS.model이 yolov4로 정의된 경우만 보겠다. if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) #YOLOv4에 input 레이어와 클래스를 넣어주어 feature map을 생성하고 바운딩 박스를 예측하기 위한 리스트를 선언해준다. #이후 반복문을 통해 예측된 바운딩박스의 좌표를 리스트에 넣어준 뒤 이것을 model에 input레이어와 함께 넣어 model을 생성해준다. #그 다음 미리 학습된 weights값들을 load해온다. elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) model.summary() #이후 원래 이미지 데이터에서 예측된 바운딩 박스를 표시해준다. pred_bbox = model.predict(image_data) else:. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] #이후 표시된 바운딩 박스 중 유효한 바운딩 박스들만 남기는 작업을 한 후 최종적으로 pred_bbox에 저장한다. if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') #cv2모듈을 사용하여 예측한 바운딩박스가 표시된 이미지를 출력한다. image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) image.show()
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size video_path = FLAGS.video print("Video from: ", video_path) vid = cv2.VideoCapture(video_path) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) model.summary() else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image! Try with another video format") frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) print(info) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size image_path = FLAGS.image original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() pred_bbox = model.predict(image_data) else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) #image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output, image)
def main(argv): NUM_CLASS = 2 ANCHORS = [ 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 ] ANCHORS = np.array(ANCHORS, dtype=np.float32) ANCHORS = ANCHORS.reshape(3, 3, 2) STRIDES = [8, 16, 32] XYSCALE = [1.2, 1.1, 1.05] input_size = FLAGS.size video_path = FLAGS.video_path score_thresh = FLAGS.score_thresh iou_thresh = FLAGS.iou_thresh save_path = FLAGS.save_path print(f'[DEBUG][video] input_size : {input_size}') print(f'[DEBUG][video] video_path : {video_path}') print(f'[DEBUG][video] score_thresh : {score_thresh}') print(f'[DEBUG][video] iou_thresh : {iou_thresh}') print(f'[DEBUG][video] save_path : {save_path}') print('[INFO] Bulding Yolov4 architecture') tic = time.perf_counter() input_layer = tf.keras.layers.Input([input_size, input_size, 3]) print(f'[INFO][video] Created input_layer of size {input_size}') print(f'[DEBUG][video] input_layer : {input_layer}') feature_maps = YOLOv4(input_layer, NUM_CLASS) print(f'[DEBUG][video] feature_maps : {feature_maps}') bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensors.append(decode(fm, NUM_CLASS, i)) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) toc = time.perf_counter() print(f'[INFO] Architecture built.') print(f'[DEBUG][video] Execution took {(1000 * (toc - tic)):0.4f} ms') vid = cv2.VideoCapture(video_path) if save_path: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) print(f"[DEBUG][video] Video CODEC : {FLAGS.save_path.split('.')[1]}") codec = cv2.VideoWriter_fourcc(*'MJPEG') out = cv2.VideoWriter(FLAGS.save_path, codec, fps, (width, height)) while True: return_value, frame = vid.read() if return_value: print(f'[DEBUG] Got video capture') frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print(f"[DEBUG][video] Video Over") vid.release() if save_path: out.release() break #raise ValueError("No image! Try with another video format") frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.perf_counter() pred_bbox = model.predict(image_data) print(f'[INFO][video] Finished initial predication on image') pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, score_thresh) bboxes = utils.nms(bboxes, iou_thresh, method='nms') image = utils.draw_bbox(frame, bboxes) curr_time = time.perf_counter() exec_time = curr_time - prev_time result = np.asarray(image) info = "fdpms: %.2f ms" % (1000 * exec_time) print(info) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) print(result.shape) if save_path: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break
import tensorflow as tf import core.utils as utils from core.config import cfg from core.yolov4 import YOLOv4, decode from PIL import Image from matplotlib.pyplot import imshow from urllib.request import urlopen from scipy.misc import imread INPUT_SIZE = 320 NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) print(NUM_CLASS) print(CLASSES) # Build Model input_layer = tf.keras.layers.Input([INPUT_SIZE, INPUT_SIZE, 3]) feature_maps = YOLOv4(input_layer) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, i) bbox_tensors.append(bbox_tensor) print(bbox_tensors) model = tf.keras.Model(input_layer, bbox_tensors) model.load_weights("./checkpoints/yolov4") model.summary() model.save("./e4.h5")
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size video_path = FLAGS.video print("Video from: ", video_path ) vid = cv2.VideoCapture(video_path) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) fps = int(vid.get(cv2.CAP_PROP_FPS)) fourcc = cv2.VideoWriter_fourcc(*'XVID') output_movie = cv2.VideoWriter('output' + str(round(time.time()))+ '.avi', fourcc, fps, (width, height)) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) total_passed_vehicle = 0 speed = "waiting..." direction = "waiting..." size = "waiting..." color = "waiting..." counting_mode = "..." width_heigh_taken = True while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image! Try with another video format") frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) boxes = bboxes[:, 0:4] scores = bboxes[:, 4] classes = bboxes[:, 5] #bboxes = utils.nms(bboxes, 0.213, method='nms') roi = 450 category_index = utils.read_class_names(cfg.YOLO.CLASSES) counter, csv_line, counting_mode = vis_util.visualize_boxes_and_labels_on_image_array_y_axis(vid.get(1), frame, 1, False, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, y_reference = roi, use_normalized_coordinates=True, line_thickness=4) if counter == 1: cv2.line(frame, (roi, 0), (roi, height), (0, 0xFF, 0), 5) else: cv2.line(frame, (roi, 0), (roi, height), (0, 0, 0xFF), 5) total_passed_vehicle = total_passed_vehicle + counter # insert information text to video frame font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText( input_frame, 'Veiculos Detectados: ' + str(total_passed_vehicle), (10, 35), font, 0.8, (0, 0xFF, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX, ) cv2.putText( input_frame, 'Linha de ROI', (545, roi-10), font, 0.6, (0, 0, 0xFF), 2, cv2.LINE_AA, ) # image = utils.draw_bbox(frame, bboxes) # curr_time = time.time() # exec_time = curr_time - prev_time # result = np.asarray(image) # info = "time: %.2f ms" %(1000*exec_time) # print(info) # cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) # result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # cv2.imshow("result", result) # if cv2.waitKey(1) & 0xFF == ord('q'): break output_movie.write(frame) print ("writing frame") if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() output_movie.release() cv2.destroyAllWindows()
def main(): #not sure whether this is effective or not tf.executing_eagerly() strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0") with strategy.scope(): # if True: #SETTINGS TO ADJUST------------------------------------------- #whether or not to save video to output file or show on screen RECORD = False INPUT_VID = 'aot1' #INPUT_VID = 'mrb3' #INPUT_VID OUTPUT_VID= 'C:/Users/Nikki/Documents/work/inputs-outputs/vid_output/' + INPUT_VID + '.avi' SHOW_VID = True THROWOUT_NUM = 3 #min is 1 INPUT_SIZE = 419 #608 #230 #999 #800 #initialize constants STRIDES = np.array(cfg.YOLO.STRIDES) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE WEIGHTS = './data/yolov4.weights' #must end in .weights #setup variables based on what video is being used video_path, GPS_pix, pix_GPS, origin = pg.sample_select(INPUT_VID) video_path = addresses.TEST #start video capture print("Video from: ", video_path ) vid = cv2.VideoCapture(video_path) #initialize occupancy and compliance buffers buf_size = 5 count_buf = buf_size * [0] ind = 0 people_buf = buf_size * [0] #open file to output to output_f = 'C:/Users/Nikki/Documents/work/inputs-outputs/txt_output/' + INPUT_VID + '.txt' f = open(output_f, 'w') print('file started') f.write('Time\t\t\t\tPed\t<6ft\n') #define writer and output video properties if RECORD: fps = vid.get(5) wdt = int(vid.get(3)) hgt = int(vid.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out_vid = cv2.VideoWriter(OUTPUT_VID, fourcc, fps/THROWOUT_NUM, (wdt, hgt)) #generate model input_layer = tf.keras.Input([INPUT_SIZE, INPUT_SIZE, 3]) feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) print('model built') #force to run eagerly model.run_eagerly = True #load existing weights into model utils.load_weights(model, WEIGHTS) #continue reading and showing frames until interrupted try: while True: #skip desired number of frames to speed up processing for i in range (THROWOUT_NUM): vid.grab() #get current time and next frame dt = str(datetime.datetime.now()) return_value, frame = vid.retrieve() # check that the next frame exists, if not, close display window and exit loop if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) #image = Image.fromarray(frame) else: if SHOW_VID: cv2.destroyWindow('result') print('Video has ended') break #resize image and add another dimension frame_size = frame.shape[:2] cur_frame = np.copy(frame) image_data = utils.image_preprocess(cur_frame, [INPUT_SIZE, INPUT_SIZE]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() #for calculating how long it takes to process a frame with tf.device('/GPU:0'): image_data = tf.convert_to_tensor(image_data) print(image_data.device) #for calculating how long it takes to process a frame curr_time = time.time() exec_time = curr_time - prev_time info = "time1: %.2f ms" %(1000*exec_time) print(info) prev_time = time.time() #make bboxes pred_bbox = model.predict(image_data) pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) all_bboxes, probs, classes = utils.postprocess_boxes(pred_bbox, frame_size, INPUT_SIZE, 0.25)#.25 bboxes = utils.filter_people(all_bboxes, probs, classes) #only continue processing if there were people identified if len(bboxes) > 0: #get rid of redundant boxes bboxes = utils.nms(bboxes, 0.213, method='nms') #.213 #draw bbox and get centered point at base of box frame = utils.draw_bbox(frame, bboxes, show_label = False) pts = utils.get_ftpts(bboxes) #draw radii and count people frame, count_buf[ind] = pg.draw_radius(frame, pts, GPS_pix, pix_GPS, origin) people_buf[ind] = pts.shape[0] else: count_buf[ind] = 0 people_buf[ind] = 0 #avg people and count within 6ft buffers people = int(sum(people_buf)/len(people_buf)) count = int(sum(count_buf)/len(count_buf)) #write info to file and overlay on video utils.video_write_info(f, bboxes, dt, count, people) utils.overlay_occupancy(frame, count, people, frame_size) #for calculating how long it takes to process a frame curr_time = time.time() exec_time = curr_time - prev_time info = "time2: %.2f ms" %(1000*exec_time) print(info) #convert frame to correct cv colors and display/record result = np.asarray(frame) result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR) if SHOW_VID: cv2.namedWindow("result", cv2.WINDOW_NORMAL) cv2.imshow("result", result) if RECORD: out_vid.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break #increment index ind = (ind + 1) % buf_size #end video, close viewer, stop writing to file vid.release() if RECORD: out_vid.release() if SHOW_VID: cv2.destroyAllWindows() f.close() #if interrupted, end video, close viewer, stop writing to file except: print("Unexpected error:", sys.exc_info()[0]) vid.release() if RECORD == True: out_vid.release() if SHOW_VID: cv2.destroyAllWindows() f.close()
def __init__(self, framework='tf', size=608, tiny=False, model='yolov4', NUM_CLASS=len(utils.read_class_names(cfg.YOLO.CLASSES)), load_h5=False, h5_file=None): self.framework = framework self.weights = 'weights' #None#weights self.size = size self.tiny = tiny self.model = model self.instanciated_model = None # Instanciate model if load_h5: print('Loading Model from h5 file') self.instanciated_model = tf.keras.models.load_model(h5_file) else: print('Tiny ', self.tiny) #image_path = self.image #NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) input_size = self.size if self.framework == 'tf': input_layer = tf.keras.layers.Input( [input_size, input_size, 3]) if self.tiny: if self.model == 'yolov3': feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) self.weights = os.path.join(self.weights, 'yolov3-tiny.weights') else: feature_maps = YOLOv4_tiny(input_layer, NUM_CLASS) self.weights = os.path.join(self.weights, 'yolov4-tiny.weights') bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) model.summary() ##Added if self.weights.split(".")[len(self.weights.split(".")) - 1] == "weights": print('test_0') if self.model == 'yolov3': utils.load_weights_tiny(model, self.weights, 'yolov3') else: utils.load_weights_tiny(model, self.weights, 'yolov4') else: print('test_1') model.load_weights(self.weights).expect_partial() #utils.load_weights_tiny(model, self.weights, self.model) ## else: if self.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) yolov3_weights_path = os.path.join( self.weights, 'yolov4.weights') #utils.load_weights_v3(model, self.weights) elif self.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) # Check if files have already been downloaded yolov4_weights_path = os.path.join( self.weights, 'yolov4.weights') #yolov4_weights_path = os.path.join(Path(os.path.realpath(__file__)).parent,'data/yolov4.weights') if not os.path.exists(yolov4_weights_path): print('Downloading weights file') self.weights = self.download( 'yolov4.weights', local_path=self.weights) print('Weight file was downloaded to', self.weights) else: print('Weights file already downloaded') self.weights = yolov4_weights_path if self.weights.split(".")[len(self.weights.split(".")) - 1] == "weights": if self.model == 'yolov3': utils.load_weights(model, yolov3_weights_path) elif self.model == 'yolov4': utils.load_weights(model, self.weights) else: model.load_weights(self.weights).expect_partial() self.instanciated_model = model else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=self.weights) interpreter.allocate_tensors() self.instanciated_model = interpreter
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) trainset = Dataset('train') testset = Dataset('test') logdir = "./data/log" isfreeze = False steps_per_epoch = len(trainset) first_stage_epochs = cfg.TRAIN.FISRT_STAGE_EPOCHS second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS global_steps = tf.Variable(1, trainable=False, dtype=tf.int64) warmup_steps = cfg.TRAIN.WARMUP_EPOCHS * steps_per_epoch total_steps = (first_stage_epochs + second_stage_epochs) * steps_per_epoch # train_steps = (first_stage_epochs + second_stage_epochs) * steps_per_period input_layer = tf.keras.layers.Input( [cfg.TRAIN.INPUT_SIZE, cfg.TRAIN.INPUT_SIZE, 3]) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) STRIDES = np.array(cfg.YOLO.STRIDES) IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH XYSCALE = cfg.YOLO.XYSCALE ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode_train(fm, NUM_CLASS, STRIDES, ANCHORS, i) bbox_tensors.append(fm) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode_train(fm, NUM_CLASS, STRIDES, ANCHORS, i) bbox_tensors.append(fm) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode_train(fm, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE) bbox_tensors.append(fm) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights == None: print("Training from scratch") else: if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": if FLAGS.tiny: utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': utils.load_weights_v3(model, FLAGS.weights) else: utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights) print('Restoring weights from: %s ... ' % FLAGS.weights) optimizer = tf.keras.optimizers.Adam() if os.path.exists(logdir): shutil.rmtree(logdir) writer = tf.summary.create_file_writer(logdir) def train_step(image_data, target): with tf.GradientTape() as tape: pred_result = model(image_data, training=True) giou_loss = conf_loss = prob_loss = 0 # optimizing process for i in range(3): conv, pred = pred_result[i * 2], pred_result[i * 2 + 1] loss_items = compute_loss(pred, conv, target[i][0], target[i][1], STRIDES=STRIDES, NUM_CLASS=NUM_CLASS, IOU_LOSS_THRESH=IOU_LOSS_THRESH, i=i) giou_loss += loss_items[0] conf_loss += loss_items[1] prob_loss += loss_items[2] total_loss = giou_loss + conf_loss + prob_loss gradients = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) tf.print( "=> STEP %4d lr: %.6f giou_loss: %4.2f conf_loss: %4.2f " "prob_loss: %4.2f total_loss: %4.2f" % (global_steps, optimizer.lr.numpy(), giou_loss, conf_loss, prob_loss, total_loss)) # update learning rate global_steps.assign_add(1) if global_steps < warmup_steps: lr = global_steps / warmup_steps * cfg.TRAIN.LR_INIT else: lr = cfg.TRAIN.LR_END + 0.5 * ( cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * ((1 + tf.cos( (global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi))) optimizer.lr.assign(lr.numpy()) # writing summary data with writer.as_default(): tf.summary.scalar("lr", optimizer.lr, step=global_steps) tf.summary.scalar("loss/total_loss", total_loss, step=global_steps) tf.summary.scalar("loss/giou_loss", giou_loss, step=global_steps) tf.summary.scalar("loss/conf_loss", conf_loss, step=global_steps) tf.summary.scalar("loss/prob_loss", prob_loss, step=global_steps) writer.flush() def test_step(image_data, target): with tf.GradientTape() as tape: pred_result = model(image_data, training=True) giou_loss = conf_loss = prob_loss = 0 # optimizing process for i in range(3): conv, pred = pred_result[i * 2], pred_result[i * 2 + 1] loss_items = compute_loss(pred, conv, target[i][0], target[i][1], STRIDES=STRIDES, NUM_CLASS=NUM_CLASS, IOU_LOSS_THRESH=IOU_LOSS_THRESH, i=i) giou_loss += loss_items[0] conf_loss += loss_items[1] prob_loss += loss_items[2] total_loss = giou_loss + conf_loss + prob_loss tf.print( "=> TEST STEP %4d giou_loss: %4.2f conf_loss: %4.2f " "prob_loss: %4.2f total_loss: %4.2f" % (global_steps, giou_loss, conf_loss, prob_loss, total_loss)) for epoch in range(first_stage_epochs + second_stage_epochs): if epoch < first_stage_epochs: if not isfreeze: isfreeze = True for name in ['conv2d_93', 'conv2d_101', 'conv2d_109']: freeze = model.get_layer(name) freeze_all(freeze) elif epoch >= first_stage_epochs: if isfreeze: isfreeze = False for name in ['conv2d_93', 'conv2d_101', 'conv2d_109']: freeze = model.get_layer(name) unfreeze_all(freeze) for image_data, target in trainset: train_step(image_data, target) for image_data, target in testset: test_step(image_data, target) model.save_weights("./checkpoints/yolov4")
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) XYSCALE = cfg.YOLO.XYSCALE_TINY else: STRIDES = np.array(cfg.YOLO.STRIDES) XYSCALE = cfg.YOLO.XYSCALE if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) NUM_CLASS = len(CLASSES) input_size = FLAGS.size try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) times = [] if FLAGS.output: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: if FLAGS.model == 'yolov3': feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) else: feature_maps = YOLOv4_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() elif FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() elif FLAGS.framework == 'trt': saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] max_cosine_distance = 0.5 # 0.5 / 0.7 nn_budget = None model_filename = './weights/tracker/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) key_list = list(CLASSES.keys()) val_list = list(CLASSES.values()) Track_only = ["person"] nacho_image = face_recognition.load_image_file("data/faces/nacho.jpg") nacho_face_encoding = face_recognition.face_encodings(nacho_image)[0] known_face_encodings = [nacho_face_encoding] known_face_names = ["Nacho"] logging.info("Models loaded!") while True: return_value, frame = vid.read() if not return_value: logging.warning("Empty Frame") break frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) elif FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] elif FLAGS.framework == 'trt': batched_input = tf.constant(image_data) pred_bbox = [] result = infer(batched_input) for _, value in result.items(): value = value.numpy() pred_bbox.append(value) t2 = time.time() times.append(t2 - t1) times = times[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.5) # 0.25 bboxes = utils.nms(bboxes, 0.213, method='nms') # 0.213 bboxes = utils.calculate_safety(bboxes) # FACE RECOGNITION PART face_locations = face_recognition.face_locations(frame) face_encodings = face_recognition.face_encodings(frame, face_locations) face_names = [] for face_encoding in face_encodings: matches = face_recognition.compare_faces(known_face_encodings, face_encoding) name = "Unknown" if True in matches: first_match_index = matches.index(True) name = known_face_names[first_match_index] # face_distances = face_recognition.face_distance(known_face_encodings, face_encoding) # best_match_index = np.argmin(face_distances) # if matches[best_match_index]: # name = known_face_names[best_match_index] face_names.append(name) for bbox in bboxes: person_coor = np.array(bbox[:4], dtype=np.int32) for (top, right, bottom, left), name in zip(face_locations, face_names): face_coor = np.array([left, top, right, bottom], dtype=np.int32) iou_score = utils.calculate_iou(person_coor, face_coor) if iou_score > 0.75: bbox.append(name) break if len(bbox) < 8: bbox.append("Unknown") boxes, scores, names, safety_scores, face_ids = [], [], [], [], [] for bbox in bboxes: if len(Track_only) != 0 and CLASSES[int( bbox[5])] in Track_only or len(Track_only) == 0: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) scores.append(bbox[4]) names.append(CLASSES[int(bbox[5])]) safety_scores.append(int(bbox[6])) face_ids.append(bbox[7]) boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(frame, boxes)) safety_scores = np.array(safety_scores) face_ids = np.array(face_ids) detections = [ Detection(bbox, score, class_name, feature, face_name_id, safety_score) for bbox, score, class_name, feature, face_name_id, safety_score in zip(boxes, scores, names, features, face_ids, safety_scores) ] tracker.predict() tracker.update(detections) tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed( ) or track.time_since_update > 1: # 1 / 5 continue bbox = track.to_tlbr() class_name = track.get_class() # tracking_id = track.track_id # index = key_list[val_list.index(class_name)] face_name_id = track.get_face_name() safety_score = track.get_safety_score() tracked_bboxes.append(bbox.tolist() + [face_name_id, safety_score]) image = utils.draw_demo(frame, tracked_bboxes) # image = utils.draw_bbox(frame, tracked_bboxes, # classes=CLASSES, tracking=True) # image = cv2.putText(image, "Time: {:.2f} FPS".format( # fps), (0, 24), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE) cv2.imshow("Detections", image) if FLAGS.output: out.write(image) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() if FLAGS.output: out.release() cv2.destroyAllWindows()
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) input_size = FLAGS.size physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) elif FLAGS.framework == 'trt': saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) signature_keys = list(saved_model_loaded.signatures.keys()) print(signature_keys) infer = saved_model_loaded.signatures['serving_default'] logging.info('weights loaded') @tf.function def run_model(x): return model(x) # Test the TensorFlow Lite model on random input data. sum = 0 original_image = cv2.imread(FLAGS.image) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [FLAGS.size, FLAGS.size]) image_data = image_data[np.newaxis, ...].astype(np.float32) img_raw = tf.image.decode_image( open(FLAGS.image, 'rb').read(), channels=3) img_raw = tf.expand_dims(img_raw, 0) img_raw = tf.image.resize(img_raw, (FLAGS.size, FLAGS.size)) batched_input = tf.constant(image_data) for i in range(1000): prev_time = time.time() # pred_bbox = model.predict(image_data) if FLAGS.framework == 'tf': pred_bbox = run_model(image_data) elif FLAGS.framework == 'trt': pred_bbox = infer(batched_input) # pred_bbox = pred_bbox.numpy() curr_time = time.time() exec_time = curr_time - prev_time if i == 0: continue sum += (1 / exec_time) info = str(i) + " time:" + str(round(exec_time, 3)) + " average FPS:" + str(round(sum / i, 2)) + ", FPS: " + str( round((1 / exec_time), 1)) print(info)
def main(_argv): INPUT_SIZE = FLAGS.size if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) predicted_dir_path = './mAP/predicted' ground_truth_dir_path = './mAP/ground-truth' if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path) if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path) if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH): shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH) os.mkdir(predicted_dir_path) os.mkdir(ground_truth_dir_path) os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH) # Build Model if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([INPUT_SIZE, INPUT_SIZE, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) num_lines = sum(1 for line in open(FLAGS.annotation_path)) with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file: for num, line in enumerate(annotation_file): annotation = line.strip().split() image_path = annotation[0] image_name = image_path.split('/')[-1] image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) bbox_data_gt = np.array([list(map(int, box.split(','))) for box in annotation[1:]]) if len(bbox_data_gt) == 0: bboxes_gt = [] classes_gt = [] else: bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4] ground_truth_path = os.path.join(ground_truth_dir_path, str(num) + '.txt') print('=> ground truth of %s:' % image_name) num_bbox_gt = len(bboxes_gt) with open(ground_truth_path, 'w') as f: for i in range(num_bbox_gt): class_name = CLASSES[classes_gt[i]] xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i])) bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print('=> predict result of %s:' % image_name) predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt') # Predict Process image_size = image.shape[:2] image_data = utils.image_preporcess(np.copy(image), [INPUT_SIZE, INPUT_SIZE]) image_data = image_data[np.newaxis, ...].astype(np.float32) if FLAGS.framework == "tf": pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if FLAGS.model == 'yolov3': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) elif FLAGS.model == 'yolov4': XYSCALE = cfg.YOLO.XYSCALE pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE=XYSCALE) pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD) bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms') if cfg.TEST.DECTECTED_IMAGE_PATH is not None: image = utils.draw_bbox(image, bboxes) cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image) with open(predict_result_path, 'w') as f: for bbox in bboxes: coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) class_name = CLASSES[class_ind] score = '%.4f' % score xmin, ymin, xmax, ymax = list(map(str, coor)) bbox_mess = ' '.join([class_name, score, xmin, ymin, xmax, ymax]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print(num, num_lines)
def main(_argv): # physical_devices = tf.config.experimental.list_physical_devices('GPU') # if len(physical_devices) > 0: # tf.config.experimental.set_memory_growth(physical_devices[0], True) trainset = Dataset(is_training=True) testset = Dataset(is_training=False) logdir = "./data/log" isfreeze = False steps_per_epoch = len(trainset) first_stage_epochs = cfg.TRAIN.FISRT_STAGE_EPOCHS second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS global_steps = tf.Variable(1, trainable=False, dtype=tf.int64) warmup_steps = cfg.TRAIN.WARMUP_EPOCHS * steps_per_epoch total_steps = (first_stage_epochs + second_stage_epochs) * steps_per_epoch input_layer = tf.keras.layers.Input( [cfg.TRAIN.INPUT_SIZE, cfg.TRAIN.INPUT_SIZE, 3]) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config() IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH freeze_layers = utils.load_freeze_layer() feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): if i == 0: bbox_tensor = decode_train(fm, SIZE // 8, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE) elif i == 1: bbox_tensor = decode_train(fm, SIZE // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE) else: bbox_tensor = decode_train(fm, SIZE // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE) bbox_tensors.append(fm) bbox_tensors.append(bbox_tensor) #confidence map model = tf.keras.Model(input_layer, bbox_tensors) model.summary() optimizer = tf.keras.optimizers.Adam() if os.path.exists(logdir): shutil.rmtree(logdir) writer = tf.summary.create_file_writer(logdir) # define training step function def train_step(image_data, target): with tf.GradientTape() as tape: pred_result = model(image_data, training=True) giou_loss = conf_loss = prob_loss = 0 # optimizing process for i in range(len(freeze_layers)): conv, pred = pred_result[i * 2], pred_result[i * 2 + 1] loss_items = compute_loss(pred, conv, target[i][0], target[i][1], STRIDES=STRIDES, NUM_CLASS=NUM_CLASS, IOU_LOSS_THRESH=IOU_LOSS_THRESH, i=i) giou_loss += loss_items[0] conf_loss += loss_items[1] prob_loss += loss_items[2] total_loss = giou_loss + conf_loss + prob_loss gradients = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) tf.print( "=> STEP %4d/%4d lr: %.6f giou_loss: %4.2f conf_loss: %4.2f " "prob_loss: %4.2f total_loss: %4.2f" % (global_steps, total_steps, optimizer.lr.numpy(), giou_loss, conf_loss, prob_loss, total_loss)) # update learning rate global_steps.assign_add(1) if global_steps < warmup_steps: lr = global_steps / warmup_steps * cfg.TRAIN.LR_INIT else: lr = cfg.TRAIN.LR_END + 0.5 * ( cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * ((1 + tf.cos( (global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi))) optimizer.lr.assign(lr.numpy()) # writing summary data with writer.as_default(): tf.summary.scalar("lr", optimizer.lr, step=global_steps) tf.summary.scalar("loss/total_loss", total_loss, step=global_steps) tf.summary.scalar("loss/giou_loss", giou_loss, step=global_steps) tf.summary.scalar("loss/conf_loss", conf_loss, step=global_steps) tf.summary.scalar("loss/prob_loss", prob_loss, step=global_steps) writer.flush() def test_step(image_data, target): with tf.GradientTape() as tape: pred_result = model(image_data, training=True) giou_loss = conf_loss = prob_loss = 0 # optimizing process for i in range(len(freeze_layers)): conv, pred = pred_result[i * 2], pred_result[i * 2 + 1] loss_items = compute_loss(pred, conv, target[i][0], target[i][1], STRIDES=STRIDES, NUM_CLASS=NUM_CLASS, IOU_LOSS_THRESH=IOU_LOSS_THRESH, i=i) giou_loss += loss_items[0] conf_loss += loss_items[1] prob_loss += loss_items[2] total_loss = giou_loss + conf_loss + prob_loss tf.print( "=> TEST STEP %4d giou_loss: %4.2f conf_loss: %4.2f " "prob_loss: %4.2f total_loss: %4.2f" % (global_steps, giou_loss, conf_loss, prob_loss, total_loss)) for epoch in range(first_stage_epochs + second_stage_epochs): if epoch < first_stage_epochs: if not isfreeze: isfreeze = True for name in freeze_layers: freeze = model.get_layer(name) freeze_all(freeze) elif epoch >= first_stage_epochs: if isfreeze: isfreeze = False for name in freeze_layers: freeze = model.get_layer(name) unfreeze_all(freeze) for image_data, target in trainset: train_step(image_data, target) for image_data, target in testset: test_step(image_data, target) model.save_weights("./checkpoints/yolov4")
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size image_path = FLAGS.image if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) host = '' # Symbolic name meaning all available interfaces port = 45678 # Arbitrary non-privileged port s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind((host, port)) s.listen(1) print('Initializing connection') conn, addr = s.accept() print('Connected by', addr) data = (conn.recv(1024)) data = json.loads(data.decode()) # conn.sendall(json.dumps(data).encode()) #print('Recived iniatializing data') shape = data.get("shape") shared_memory_name = data.get("name") type_data = data.get("type") object_to_find = data.get("object") # Attach to the existing shared memory block existing_shm = shared_memory.SharedMemory(name=shared_memory_name) # Note that a.shape is (6,) and a.dtype is np.int64 in this example previous = None frame = None # print('objeto a encontrar ' + object_to_find) print('Esperando imágenes') coords = np.empty(4, dtype=np.int32) while (True): start_time = time.time() # print('Waiting frame') data = (conn.recv(1024)) if data == bytes('1', 'utf8'): previous = frame frame = np.ndarray(shape, dtype=type_data, buffer=existing_shm.buf) frame, object_found, coords = process(frame, input_size, model, object_to_find, FLAGS, ANCHORS, STRIDES, XYSCALE) cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break if (object_found): data = json.dumps({ "was_found": object_found, "coords": coords }) else: data = json.dumps({"was_found": object_found}) conn.sendall(data.encode()) #print("FPS: ", 1.0 / (time.time() - start_time)) existing_shm.close()