def on_run(image): image_data = utils.image_preporcess(np.copy(image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) pred_bbox = model.predict(image_data) # sys.stdout.write(f"[yolo_detect] pred_bbox {pred_bbox}") # sys.stdout.write(f"[yolo_detect] pred_bbox[0].shape {pred_bbox[0].shape}") # sys.stdout.write(f"[yolo_detect] anchors {anchors}") # sys.stdout.flush() if model_name == "yolov3": pred_bbox = utils.postprocess_bbbox(pred_bbox, anchors, STRIDES) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, anchors, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, image.shape[:-1], input_size, conf_threshold) bboxes = utils.nms(bboxes, iou_threshold, method='nms') # bboxes[[xmin, ymin, xmax, ymax, score, class]] # sys.stdout.write(f"[yolov4 detect] bboxes {bboxes}") # sys.stdout.flush() return {'bboxes': np.array(bboxes)}
def batch_bboxes(model, frames): all_image_data = [None] * len(frames) sizes = [None] * len(frames) bbbb = [[], []] for i, frame in enumerate(frames): #move frame to GPU all_image_data[i] = frame_to_gpu(frame) sizes[i] = frame.shape[:2] stacked = tf.stack(all_image_data, axis=1) trimmed = tf.squeeze(stacked) # print(trimmed.shape) # dataset = tf.data.Dataset.from_tensor_slices(stacked) # dataset = dataset.batch(2) # for im_data in dataset.as_numpy_iterator(): pred_bbox = model.predict(trimmed) print('aww yeah') pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) all_bboxes, probs, classes, image_nums = utils.postprocess_boxes( pred_bbox, sizes[0], INPUT_SIZE, 0.25) #.25 bboxes = utils.filter_people(all_bboxes, probs, classes) bboxes = utils.nms(bboxes, 0.213, method='nms') bboxes = np.array(bboxes) # bboxes2 = utils.nms(bboxes2, 0.213, method='nms') # bboxes2 = np.array(bboxes2) print('frame1')
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) XYSCALE = cfg.YOLO.XYSCALE_TINY else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) XYSCALE = cfg.YOLO.XYSCALE NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) input_size = FLAGS.size image_path = FLAGS.image original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) pred_bbox = grpc_yolov4_client(FLAGS.host, FLAGS.model, image_data, shape_size=FLAGS.size) if FLAGS.model == 'yolov4': if FLAGS.tiny: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output, image)
def process(frame, input_size, model, object_to_find, FLAGS, ANCHORS, STRIDES, XYSCALE): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray((frame * 255).astype(np.uint8)) frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image, was_found, coords = utils.draw_bbox(frame, bboxes, object_to_find) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) #print(info) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) return result, was_found, coords
def person_bboxes(model, image_data, frame_size): #make bboxes # print(image_data.shape) pred_bbox = model.predict(image_data) pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) all_bboxes, probs, classes = utils.postprocess_boxes( pred_bbox, frame_size, INPUT_SIZE, 0.25) #.25 bboxes = utils.filter_people(all_bboxes, probs, classes) #only continue processing if there were people identified if len(bboxes) > 0: #get rid of redundant boxes bboxes = utils.nms(bboxes, 0.213, method='nms') #.213 return bboxes
def main(): STRIDES = np.array(cfg.YOLO.STRIDES) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = args.size image_path = args.input original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) model = YOLOv4(NUM_CLASS, STRIDES, ANCHORS, XYSCALE, 'test') if args.pretrained: dummy_input = np.ones((1, args.size, args.size, 3)) model.predict(dummy_input) model.load_weights('./weights/pretrained.h5') print('Pretrained weights loaded') elif args.weights is not None: ckpt = tf.train.Checkpoint(model=model) ckpt_manager = tf.train.CheckpointManager(ckpt, args.weights, max_to_keep=3) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint).expect_partial() print('Latest checkpoint restored') else: print('Failed to load latest checkpoint') pred_bbox = model.predict(image_data) pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(original_image, bboxes, utils.read_class_names(cfg.YOLO.CLASSES)) image = Image.fromarray(image) image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(os.path.join(args.output, 'result.png'), image)
def post_process_boxes(pred_bbox, model_type, frame_size, input_size): if model_type == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS_PPE, STRIDES_PPE, XYSCALE_PPE) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.5) # 0.25 bboxes = utils.nms(bboxes, 0.213, method='nms') # 0.213 return bboxes else: bboxes = [] boxes, objectness, classes, nums = pred_bbox boxes, objectness, classes, nums = boxes[0], objectness[0], classes[ 0], nums[0] wh = np.array([frame_size[1], frame_size[0]]) for i in range(nums): x1y1 = tuple((np.array(boxes[i][0:2]) * wh).astype(np.int32)) x2y2 = tuple((np.array(boxes[i][2:4]) * wh).astype(np.int32)) bboxes.append([ x1y1[0], x1y1[1], x2y2[0], x2y2[1], objectness[i], int(classes[i]) ]) return bboxes
def get_boxes(model, original_image, input_size=608): original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) pred_bbox = model.predict(image_data) pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) if bboxes is None: return [], [], [] bboxes = utils.nms(bboxes, 0.213, method='nms') boxs = list(np.array(bboxes)[:, 0:4]) confidence = list(np.array(bboxes)[:, 4]) class_idx = list(np.array(bboxes)[:, 5]) # image = utils.draw_bbox(original_image, bboxes) # image = Image.fromarray(image) return boxs, confidence, class_idx
scaled_depth = cv2.convertScaleAbs(depth_image, alpha=0.08) depth_colormap = cv2.applyColorMap(scaled_depth, cv2.COLORMAP_JET) if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') for box in bboxes: x_mid = int((box[0] + box[2]) / 2) y_mid = int((box[1] + box[3]) / 2) pixel_depths = [] for i in range(3): for j in range(3): pixel_depths.append( depth_frame.get_distance(int(x_mid + i - 1),
def main(_argv): INPUT_SIZE = FLAGS.size if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) predicted_dir_path = './mAP/predicted' ground_truth_dir_path = './mAP/ground-truth' if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path) if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path) if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH): shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH) os.mkdir(predicted_dir_path) os.mkdir(ground_truth_dir_path) os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH) times = [] classes = [ 'Book', 'Bottle', 'Computer keyboard', 'Computer mouse', 'Laptop', 'Mobile phone', 'Backpack' ] # Build Model if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([INPUT_SIZE, INPUT_SIZE, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) num_lines = sum(1 for line in open(FLAGS.annotation_path)) with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file: for num, line in enumerate(annotation_file): annotation = line.strip().split() image_path = annotation[0] image_name = image_path.split('/')[-1] image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) bbox_data_gt = np.array( [list(map(float, box.split(','))) for box in annotation[1:]]) if len(bbox_data_gt) == 0: bboxes_gt = [] classes_gt = [] else: bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4] ground_truth_path = os.path.join(ground_truth_dir_path, str(num) + '.txt') current_class = '' print('=> ground truth of %s:' % image_name) num_bbox_gt = len(bboxes_gt) with open(ground_truth_path, 'w') as f: for i in range(num_bbox_gt): # esto class_name = CLASSES[classes_gt[i]] # esto if i == 0: current_class = class_name class_name = CLASSES[classes_gt[i]] if class_name == current_class: xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i])) bbox_mess = ' '.join( [class_name, xmin, ymin, xmax, ymax]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print('=> predict result of %s:' % image_name) predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt') # Predict Process image_size = image.shape[:2] image_data = utils.image_preprocess(np.copy(image), [INPUT_SIZE, INPUT_SIZE]) image_data = image_data[np.newaxis, ...].astype(np.float32) if FLAGS.framework == "tf": startTime = time.time() pred_bbox = model.predict(image_data) times.append(time.time() - startTime) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) elif FLAGS.model == 'yolov4': XYSCALE = cfg.YOLO.XYSCALE pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE=XYSCALE) pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD) bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms') if cfg.TEST.DECTECTED_IMAGE_PATH is not None: image = utils.draw_bbox(image, bboxes) cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image) with open(predict_result_path, 'w') as f: for bbox in bboxes: if (CLASSES[int(bbox[5])] in classes) and (current_class == CLASSES[int( bbox[5])]): coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) class_name = CLASSES[class_ind] score = '%.4f' % score xmin, ymin, xmax, ymax = list(map(str, coor)) bbox_mess = ' '.join( [class_name, score, xmin, ymin, xmax, ymax]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print(num, num_lines) print("Elapsed time: " + str(sum(times) / len(times)))
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size video_path = FLAGS.video print("Video from: ", video_path) vid = cv2.VideoCapture(video_path) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() # model.summary() else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # setup for output video width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH) + 0.5) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT) + 0.5) size = (width, height) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('/content/output-vid.avi', fourcc, 20.0, size) total_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) print('Total Frames:', total_frames) while True: return_value, frame = vid.read() n_frame = int(vid.get(cv2.CAP_PROP_POS_FRAMES)) if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: out.release() if (total_frames - 1 != n_frame): raise ValueError("No image! Try with another video format") print("Finished processing video.") break frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) print(info) # write modified frame to video resultFrame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) out.write(resultFrame) # save modified frames print("Frame:", n_frame) cv2.imwrite("frame{}.jpg".format(n_frame), resultFrame) if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size image_path = FLAGS.image original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) model.summary() pred_bbox = model.predict(image_data) else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) image.show()
def main(): #not sure whether this is effective or not tf.executing_eagerly() strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0") with strategy.scope(): # if True: #SETTINGS TO ADJUST------------------------------------------- #whether or not to save video to output file or show on screen RECORD = False INPUT_VID = 'aot1' #INPUT_VID = 'mrb3' #INPUT_VID OUTPUT_VID= 'C:/Users/Nikki/Documents/work/inputs-outputs/vid_output/' + INPUT_VID + '.avi' SHOW_VID = True THROWOUT_NUM = 3 #min is 1 INPUT_SIZE = 419 #608 #230 #999 #800 #initialize constants STRIDES = np.array(cfg.YOLO.STRIDES) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE WEIGHTS = './data/yolov4.weights' #must end in .weights #setup variables based on what video is being used video_path, GPS_pix, pix_GPS, origin = pg.sample_select(INPUT_VID) video_path = addresses.TEST #start video capture print("Video from: ", video_path ) vid = cv2.VideoCapture(video_path) #initialize occupancy and compliance buffers buf_size = 5 count_buf = buf_size * [0] ind = 0 people_buf = buf_size * [0] #open file to output to output_f = 'C:/Users/Nikki/Documents/work/inputs-outputs/txt_output/' + INPUT_VID + '.txt' f = open(output_f, 'w') print('file started') f.write('Time\t\t\t\tPed\t<6ft\n') #define writer and output video properties if RECORD: fps = vid.get(5) wdt = int(vid.get(3)) hgt = int(vid.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out_vid = cv2.VideoWriter(OUTPUT_VID, fourcc, fps/THROWOUT_NUM, (wdt, hgt)) #generate model input_layer = tf.keras.Input([INPUT_SIZE, INPUT_SIZE, 3]) feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) print('model built') #force to run eagerly model.run_eagerly = True #load existing weights into model utils.load_weights(model, WEIGHTS) #continue reading and showing frames until interrupted try: while True: #skip desired number of frames to speed up processing for i in range (THROWOUT_NUM): vid.grab() #get current time and next frame dt = str(datetime.datetime.now()) return_value, frame = vid.retrieve() # check that the next frame exists, if not, close display window and exit loop if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) #image = Image.fromarray(frame) else: if SHOW_VID: cv2.destroyWindow('result') print('Video has ended') break #resize image and add another dimension frame_size = frame.shape[:2] cur_frame = np.copy(frame) image_data = utils.image_preprocess(cur_frame, [INPUT_SIZE, INPUT_SIZE]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() #for calculating how long it takes to process a frame with tf.device('/GPU:0'): image_data = tf.convert_to_tensor(image_data) print(image_data.device) #for calculating how long it takes to process a frame curr_time = time.time() exec_time = curr_time - prev_time info = "time1: %.2f ms" %(1000*exec_time) print(info) prev_time = time.time() #make bboxes pred_bbox = model.predict(image_data) pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) all_bboxes, probs, classes = utils.postprocess_boxes(pred_bbox, frame_size, INPUT_SIZE, 0.25)#.25 bboxes = utils.filter_people(all_bboxes, probs, classes) #only continue processing if there were people identified if len(bboxes) > 0: #get rid of redundant boxes bboxes = utils.nms(bboxes, 0.213, method='nms') #.213 #draw bbox and get centered point at base of box frame = utils.draw_bbox(frame, bboxes, show_label = False) pts = utils.get_ftpts(bboxes) #draw radii and count people frame, count_buf[ind] = pg.draw_radius(frame, pts, GPS_pix, pix_GPS, origin) people_buf[ind] = pts.shape[0] else: count_buf[ind] = 0 people_buf[ind] = 0 #avg people and count within 6ft buffers people = int(sum(people_buf)/len(people_buf)) count = int(sum(count_buf)/len(count_buf)) #write info to file and overlay on video utils.video_write_info(f, bboxes, dt, count, people) utils.overlay_occupancy(frame, count, people, frame_size) #for calculating how long it takes to process a frame curr_time = time.time() exec_time = curr_time - prev_time info = "time2: %.2f ms" %(1000*exec_time) print(info) #convert frame to correct cv colors and display/record result = np.asarray(frame) result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR) if SHOW_VID: cv2.namedWindow("result", cv2.WINDOW_NORMAL) cv2.imshow("result", result) if RECORD: out_vid.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break #increment index ind = (ind + 1) % buf_size #end video, close viewer, stop writing to file vid.release() if RECORD: out_vid.release() if SHOW_VID: cv2.destroyAllWindows() f.close() #if interrupted, end video, close viewer, stop writing to file except: print("Unexpected error:", sys.exc_info()[0]) vid.release() if RECORD == True: out_vid.release() if SHOW_VID: cv2.destroyAllWindows() f.close()
def main(argv): NUM_CLASS = 2 ANCHORS = [ 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 ] ANCHORS = np.array(ANCHORS, dtype=np.float32) ANCHORS = ANCHORS.reshape(3, 3, 2) STRIDES = [8, 16, 32] XYSCALE = [1.2, 1.1, 1.05] input_size = FLAGS.size image_path = FLAGS.image_path score_thresh = FLAGS.score_thresh iou_thresh = FLAGS.iou_thresh save_path = FLAGS.save_path print(f'[DEBUG][image] input_size : {input_size}') print(f'[DEBUG][image] image_path : {image_path}') print(f'[DEBUG][image] score_thresh : {score_thresh}') print(f'[DEBUG][image] iou_thresh : {iou_thresh}') original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] print(f'[DEBUG][image] original_image_size : {original_image_size}') image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) print('[INFO] Bulding Yolov4 architecture') tic = time.perf_counter() input_layer = tf.keras.layers.Input([input_size, input_size, 3]) print(f'[INFO][image] Created input_layer of size {input_size}') print(f'[DEBUG][image] input_layer : {input_layer}') feature_maps = YOLOv4(input_layer, NUM_CLASS) print(f'[DEBUG][image] feature_maps : {feature_maps}') bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensors.append(decode(fm, NUM_CLASS, i)) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) toc = time.perf_counter() print(f'[INFO] Architecture built.') print(f'[DEBUG][image] Execution took {(1000 * (toc - tic)):0.4f} ms') pred_bbox = model.predict(image_data) print(f'[INFO][image] Finished initial predication on image') pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, score_thresh) bboxes = utils.nms(bboxes, iou_thresh, method='nms') image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) image.show() if (save_path): image.save(save_path) print(f'[INFO][image] Detected image saved to {save_path}')
def predict(self, image_path, result_dir='.', save_img=True, image_name=None): try: if not (os.path.exists(image_path)): print('No such file or directory', image_path) #return None else: original_image = cv2.imread(image_path) print('Shape1', original_image.shape) except: original_image = image_path print('Shape2', original_image.shape) if self.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, self.tiny) XYSCALE = cfg.YOLO.XYSCALE_TINY else: STRIDES = np.array(cfg.YOLO.STRIDES) if self.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, self.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, self.tiny) XYSCALE = cfg.YOLO.XYSCALE input_size = self.size try: #print('image:',original_image) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] except: return pd.DataFrame() image_data = utils.image_preprocess( np.copy(original_image), [self.size, self.size]) #[input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) if self.framework == 'tf': model = self.instanciated_model #model.summary() pred_bbox = model.predict(image_data) else: interpreter = self.instanciated_model # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if self.model == 'yolov4': if self.tiny: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE, RESIZE=1.5) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) #image.show() classes = utils.read_class_names(cfg.YOLO.CLASSES) list_bboxes = [] for i, bbox in enumerate(bboxes): coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) #print('type bbox',type(bbox)) #print('bbox',bbox[:4]) #print('coor',list(coor)) bbox_info = { 'coor': list(coor), 'probability': score, 'class': classes[class_ind] } list_bboxes.append(bbox_info) output_name = os.path.join(result_dir + '/out_' + str(image_name) + '.jpg') if save_img: image.save(output_name) #cv2.imwrite(output_name,img) print('Img saved to', output_name) try: output_name = os.path.join(result_dir + '/out_' + os.path.basename(image_path)) if save_img: image.save(output_name) #cv2.imwrite(output_name,img) print('Img saved to', output_name) output = pd.DataFrame(list_bboxes) #print('image_path',image_path ) output_name = '.'.join(output_name.split('.')[:2]) + '.xlsx' #output_name = 'results/out_'+image_path.split('\\')[-1].split('.')[0]+'.xlsx' print('Result file saved to', output_name) output.to_excel(output_name) return output except Exception as e: print(e) return pd.DataFrame() #yolo = YoloV4() #yolo.predict('1fc35a5149379fff131e939f18257341.7.jpeg') # Working Class # ============================================================================= # class YoloV4: # # def __init__(self,framework = 'tf', weights=os.path.join(Path(os.path.realpath(__file__)).parent,'data/yolov4.weights'),size=608,tiny=False,model='yolov4'): # self.framework = framework # self.weights = weights # self.size = size # self.tiny = tiny # self.model = model # self.instanciated_model = None # # # Instanciate model # # print('Tiny ',self.tiny) # # #image_path = self.image # NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) # input_size = self.size # if self.framework == 'tf': # input_layer = tf.keras.layers.Input([input_size, input_size, 3]) # if self.tiny: # if self.model == 'yolov3': # feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) # else: # feature_maps = YOLOv4_tiny(input_layer, NUM_CLASS) # bbox_tensors = [] # for i, fm in enumerate(feature_maps): # bbox_tensor = decode(fm, NUM_CLASS, i) # bbox_tensors.append(bbox_tensor) # model = tf.keras.Model(input_layer, bbox_tensors) # model.summary() # utils.load_weights_tiny(model, self.weights, self.model) # else: # if self.model == 'yolov3': # feature_maps = YOLOv3(input_layer, NUM_CLASS) # bbox_tensors = [] # for i, fm in enumerate(feature_maps): # bbox_tensor = decode(fm, NUM_CLASS, i) # bbox_tensors.append(bbox_tensor) # model = tf.keras.Model(input_layer, bbox_tensors) # utils.load_weights_v3(model, self.weights) # elif self.model == 'yolov4': # feature_maps = YOLOv4(input_layer, NUM_CLASS) # bbox_tensors = [] # for i, fm in enumerate(feature_maps): # bbox_tensor = decode(fm, NUM_CLASS, i) # bbox_tensors.append(bbox_tensor) # model = tf.keras.Model(input_layer, bbox_tensors) # # if self.weights.split(".")[len(self.weights.split(".")) - 1] == "weights": # utils.load_weights(model, self.weights) # else: # model.load_weights(self.weights).expect_partial() # # self.instanciated_model = model # # else: # # Load TFLite model and allocate tensors. # interpreter = tf.lite.Interpreter(model_path=self.weights) # interpreter.allocate_tensors() # # self.instanciated_model = interpreter # # # def predict(self,image_path,result_dir='results',save_img=True): # # if self.tiny: # STRIDES = np.array(cfg.YOLO.STRIDES_TINY) # ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, self.tiny) # XYSCALE = cfg.YOLO.XYSCALE_TINY # else: # STRIDES = np.array(cfg.YOLO.STRIDES) # if self.model == 'yolov4': # ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, self.tiny) # else: # ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, self.tiny) # XYSCALE = cfg.YOLO.XYSCALE # # input_size = self.size # # original_image = cv2.imread(image_path) # print('image:',original_image) # original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) # original_image_size = original_image.shape[:2] # # image_data = utils.image_preprocess(np.copy(original_image), [self.size,self.size])#[input_size, input_size]) # image_data = image_data[np.newaxis, ...].astype(np.float32) # # if self.framework == 'tf': # model = self.instanciated_model # model.summary() # pred_bbox = model.predict(image_data) # # else: # interpreter = self.instanciated_model # # # Get input and output tensors. # input_details = interpreter.get_input_details() # output_details = interpreter.get_output_details() # print(input_details) # print(output_details) # # interpreter.set_tensor(input_details[0]['index'], image_data) # interpreter.invoke() # pred_bbox = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] # # if self.model == 'yolov4': # if self.tiny: # pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE, RESIZE=1.5) # else: # pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) # else: # pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) # bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) # bboxes = utils.nms(bboxes, 0.213, method='nms') # # image = utils.draw_bbox(original_image, bboxes) # image = Image.fromarray(image) # #image.show() # # print('Image path',image_path) # print('Type Image path',type(image_path)) # print('Bboxes type',type(bboxes)) # # classes = utils.read_class_names(cfg.YOLO.CLASSES) # list_bboxes = [] # # for i, bbox in enumerate(bboxes): # coor = np.array(bbox[:4], dtype=np.int32) # score = bbox[4] # class_ind = int(bbox[5]) # #print('type bbox',type(bbox)) # #print('bbox',bbox[:4]) # #print('coor',list(coor)) # bbox_info = {'coor':list(coor),'probability':score,'class':classes[class_ind]} # list_bboxes.append(bbox_info) # # try: # output_name = os.path.join(result_dir+'/out_' + os.path.basename(image_path)) # # if save_img: # image.save(output_name) # #cv2.imwrite(output_name,img) # print('Img saved to',output_name) # # output = pd.DataFrame(list_bboxes) # print('image_path',image_path ) # output_name = '.'.join(output_name.split('.')[:2])+'.xlsx' # #output_name = 'results/out_'+image_path.split('\\')[-1].split('.')[0]+'.xlsx' # print('output_name',output_name) # output.to_excel(output_name) # # except Exception as e: # print(e) # ============================================================================= #yolo = YoloV4() #yolo.predict('1fc35a5149379fff131e939f18257341.7.jpeg')
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size video_path = FLAGS.video print("Video from: ", video_path ) vid = cv2.VideoCapture(video_path) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) fps = int(vid.get(cv2.CAP_PROP_FPS)) fourcc = cv2.VideoWriter_fourcc(*'XVID') output_movie = cv2.VideoWriter('output' + str(round(time.time()))+ '.avi', fourcc, fps, (width, height)) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) total_passed_vehicle = 0 speed = "waiting..." direction = "waiting..." size = "waiting..." color = "waiting..." counting_mode = "..." width_heigh_taken = True while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image! Try with another video format") frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) boxes = bboxes[:, 0:4] scores = bboxes[:, 4] classes = bboxes[:, 5] #bboxes = utils.nms(bboxes, 0.213, method='nms') roi = 450 category_index = utils.read_class_names(cfg.YOLO.CLASSES) counter, csv_line, counting_mode = vis_util.visualize_boxes_and_labels_on_image_array_y_axis(vid.get(1), frame, 1, False, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, y_reference = roi, use_normalized_coordinates=True, line_thickness=4) if counter == 1: cv2.line(frame, (roi, 0), (roi, height), (0, 0xFF, 0), 5) else: cv2.line(frame, (roi, 0), (roi, height), (0, 0, 0xFF), 5) total_passed_vehicle = total_passed_vehicle + counter # insert information text to video frame font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText( input_frame, 'Veiculos Detectados: ' + str(total_passed_vehicle), (10, 35), font, 0.8, (0, 0xFF, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX, ) cv2.putText( input_frame, 'Linha de ROI', (545, roi-10), font, 0.6, (0, 0, 0xFF), 2, cv2.LINE_AA, ) # image = utils.draw_bbox(frame, bboxes) # curr_time = time.time() # exec_time = curr_time - prev_time # result = np.asarray(image) # info = "time: %.2f ms" %(1000*exec_time) # print(info) # cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) # result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # cv2.imshow("result", result) # if cv2.waitKey(1) & 0xFF == ord('q'): break output_movie.write(frame) print ("writing frame") if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() output_movie.release() cv2.destroyAllWindows()
def main(_argv): #Yolo-tiny버전이 아닌지 if문을 통해 구분한다. if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) #tiny버전이 아닐 경우 Yolo-v4모델을 가져오고 anchor박스의 정보도 함께 가져온다. if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) #클래스개수, 박스의 XYSCALE을 Yolo-v4의 cfg파일에서 불러오고 input_size와 image_path를 미리 정의한 flags객체의 size와 image값으로 정의한다. NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size image_path = FLAGS.image #cv2모듈을 통해 이미지를 불러오고 불러온 이미지를 BGR이미지를 RGB로 바꿔준다. #이는 컬러 사진을 opencv에서는 BGR순서로 저장하는데 matplotlib에서는 RGB로 저장하기 때문이다. original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] #이미지 데이터들을 배열로 바꿔주고 데이터타입을 float32로 변환해준다. image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) #framework가 tf로 정의된 경우 FLAGS.model이 어떻게 정의되었는지에 따라 불러오는 모델이 다르다. #지금의 경우는 Yolo-v4를 다루고 있으므로 FLAGS.model이 yolov4로 정의된 경우만 보겠다. if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) #YOLOv4에 input 레이어와 클래스를 넣어주어 feature map을 생성하고 바운딩 박스를 예측하기 위한 리스트를 선언해준다. #이후 반복문을 통해 예측된 바운딩박스의 좌표를 리스트에 넣어준 뒤 이것을 model에 input레이어와 함께 넣어 model을 생성해준다. #그 다음 미리 학습된 weights값들을 load해온다. elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) model.summary() #이후 원래 이미지 데이터에서 예측된 바운딩 박스를 표시해준다. pred_bbox = model.predict(image_data) else:. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] #이후 표시된 바운딩 박스 중 유효한 바운딩 박스들만 남기는 작업을 한 후 최종적으로 pred_bbox에 저장한다. if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') #cv2모듈을 사용하여 예측한 바운딩박스가 표시된 이미지를 출력한다. image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) image.show()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) XYSCALE = cfg.YOLO.XYSCALE_TINY if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY_V3, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) XYSCALE = cfg.YOLO.XYSCALE if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) NUM_CLASSES = len(CLASSES) input_size = FLAGS.size try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) times = [] if FLAGS.output: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) # TODO: switch to get vertical if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: if FLAGS.model == 'yolov3': feature_maps = YOLOv3_tiny(input_layer, NUM_CLASSES) else: feature_maps = YOLOv4_tiny(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights, FLAGS.model) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() elif FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() elif FLAGS.framework == 'trt': saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] logging.info("Model loaded!") while True: return_value, frame = vid.read() # frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE) # TODO: here if not return_value: logging.warning("Empty Frame") break frame_size = frame.shape[:2] frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) elif FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] elif FLAGS.framework == 'trt': batched_input = tf.constant(image_data) pred_bbox = [] result = infer(batched_input) for _, value in result.items(): value = value.numpy() pred_bbox.append(value) curr_time = time.time() times.append(curr_time - prev_time) times = times[-20:] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.5) # 0.25 bboxes = utils.nms(bboxes, 0.213, method='nms') # 0.213 image = utils.draw_bbox(frame, bboxes, classes=CLASSES) image = cv2.putText( image, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 24), # 24 cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) # 0.7 image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE) cv2.imshow("Detections", image) if FLAGS.output: out.write(image) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() if FLAGS.output: out.release() cv2.destroyAllWindows()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) XYSCALE = cfg.YOLO.XYSCALE_TINY if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY_V3, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) XYSCALE = cfg.YOLO.XYSCALE if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) NUM_CLASSES = len(CLASSES) input_size = FLAGS.size image_path = FLAGS.image original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: if FLAGS.model == 'yolov3': feature_maps = YOLOv3_tiny(input_layer, NUM_CLASSES) else: feature_maps = YOLOv4_tiny(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights, FLAGS.model) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() pred_bbox = model.predict(image_data) elif FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] elif FLAGS.framework == 'trt': saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] batched_input = tf.constant(image_data) pred_bbox = [] result = infer(batched_input) for _, value in result.items(): value = value.numpy() pred_bbox.append(value) if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.5) # 0.25 bboxes = utils.nms(bboxes, 0.5, method='nms') # 0.213 image = utils.draw_bbox(original_image, bboxes, classes=CLASSES) image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output, image)
def main(_argv): #TODO: add valid extensions directory = os.path.join(FLAGS.image_dir, "*") image_list = glob.glob(directory) if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) model.summary() fieldnames = ["filename", "cars", "trucks", "buses"] with open(os.path.join(orig_dir, "result.csv"), 'w+', newline='') as f: # Attach a CSV writer to the file with the desired fieldnames writer = csv.DictWriter(f, fieldnames, delimiter=";") writer.writeheader() for image_path in image_list: if "_out" in image_path: continue d = {} original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) pred_bbox = model.predict(image_data) if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') bboxes_filtered = bboxes.copy() l = len(bboxes) for i, bbox in enumerate(bboxes_filtered): bboxes_filtered = np.delete(bboxes_filtered, l - 1 - i, 0) cars = 0 trucks = 0 buses = 0 for i, bbox in enumerate(bboxes): class_ind = int(bbox[5]) if class_ind == 2 or class_ind == 5 or class_ind == 7: bboxes_filtered = np.insert(bboxes_filtered, 0, bbox, axis=0) if class_ind == 2: cars += 1 if class_ind == 5: buses += 1 if class_ind == 7: trucks += 1 d["filename"] = image_path d["cars"] = cars d["trucks"] = trucks d["buses"] = buses writer.writerow(d) image = utils.draw_bbox(original_image, bboxes_filtered) image = Image.fromarray(image) # image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(image_path.replace(".jpg", "_out.jpg"), image)
def main(_argv): import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size video_path = FLAGS.video if video_path == 'none': possible_camera_index = [5, 6, 7, 8] print("Searching for camera...") for camera_index in possible_camera_index: vid = cv2.VideoCapture(camera_index) return_value, frame = vid.read() if frame is not None: print("Camera found at index", camera_index) break else: print("Video from: ", video_path) vid = cv2.VideoCapture(video_path) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) model.summary() else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image! Try with another video format") frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') for box in bboxes: print('x_min', box[0]) print('y_min', box[1]) print('x_max', box[2]) print('y_max', box[3]) print('probability', box[4]) print('object_id', box[5]) print('-----') image = utils.draw_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) print(info) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) print(cv2.getWindowImageRect('result')) if cv2.waitKey(1) & 0xFF == ord('q'): vid.release() break
def main(): if not os.path.exists(args.output): os.mkdir(args.output) testset = Dataset('test') test_generator = tf.data.Dataset.from_generator( lambda: testset, (tf.float32, (tf.string, tf.int32, tf.int32))).batch(cfg.TEST.BATCH_SIZE) classes = utils.read_class_names(cfg.YOLO.CLASSES) STRIDES = np.array(cfg.YOLO.STRIDES) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS) NUM_CLASS = len(classes) XYSCALE = cfg.YOLO.XYSCALE input_size = cfg.TEST.INPUT_SIZE model = YOLOv4(NUM_CLASS, STRIDES, ANCHORS, XYSCALE, 'test') if args.pretrained: dummy_input = np.ones((1, input_size, input_size, 3)) model.predict(dummy_input) model.load_weights('./weights/pretrained.h5') print('Pretrained weights loaded') elif args.weights is not None: ckpt = tf.train.Checkpoint(model=model) ckpt_manager = tf.train.CheckpointManager(ckpt, args.weights, max_to_keep=3) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint).expect_partial() print('Latest checkpoint restored') else: print('Failed to load latest checkpoint') times = 0.0 for index, (image_data, image_meta) in enumerate(test_generator): if index % 100 == 0 or index == 0: print('Processing {}/{} images...'.format(index + 1, len(testset))) original_image_size = (image_meta[1].numpy().item(), image_meta[2].numpy().item()) start_time = time.time() pred_bbox = model.predict(image_data) pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') times += (time.time() - start_time) bboxes = sorted(bboxes, key=lambda x: -x[4]) f = open( os.path.join( args.output, image_meta[0].numpy().item().decode( 'utf-8').split('/')[-1][:-4] + '.txt'), 'w') for i, b in enumerate(bboxes): class_name = classes[int(b[5])] conf = str(b[4]) xmin, ymin, xmax, ymax = str(int(b[0])), str(int(b[1])), str( int(b[2])), str(int(b[3])) predicted = [class_name, conf, xmin, ymin, xmax, ymax, '\n'] if i < len(bboxes) - 1: f.write(' '.join(predicted)) else: f.write(' '.join(predicted[:-1])) f.close() print('All test images were processed. FPS is {:.2f}'.format( (len(testset) / times)))
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size video_path = FLAGS.video print("Video from: ", video_path) vid = cv2.VideoCapture(video_path) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: cv2.destroyWindow("result") raise ValueError("No image! Try with another video format") while cv2.getWindowProperty('window-name', 0) >= 0: keyCode = cv2.waitKey(50) #break frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) print(info) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(_argv): import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) model.summary() else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) while True: frames = pipeline.wait_for_frames() depth_frame = frames.get_depth_frame() # Align the depth frame to color frame aligned_frames = align.process(frames) # Get aligned frames depth_frame = aligned_frames.get_depth_frame() color_frame = aligned_frames.get_color_frame() if not depth_frame or not color_frame: continue depth_intrin = depth_frame.profile.as_video_stream_profile().intrinsics color_intrin = color_frame.profile.as_video_stream_profile().intrinsics depth_to_color_extrin = depth_frame.profile.get_extrinsics_to( color_frame.profile) depth_image = np.asanyarray(depth_frame.get_data()) color_image = np.asanyarray(color_frame.get_data()) frame = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() scaled_depth = cv2.convertScaleAbs(depth_image, alpha=0.08) depth_colormap = cv2.applyColorMap(scaled_depth, cv2.COLORMAP_JET) if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') view2d = np.zeros((480, 640, 3), np.uint8) for box in bboxes: x_mid = int((box[0] + box[2]) / 2) y_mid = int((box[1] + box[3]) / 2) pixel_depths = [] for i in range(3): for j in range(3): pixel_depths.append( depth_frame.get_distance(int(x_mid + i - 1), int(y_mid + j - 1))) object_depth = statistics.median(pixel_depths) object_point = rs.rs2_deproject_pixel_to_point( depth_intrin, [x_mid, y_mid], object_depth) if box[5] == 67.0: print('found phone') if object_depth == 0.0: print('depth not found') depth_colormap[max(0, min(y_mid, 479)), max(0, min(x_mid, 639))] = [0, 255, 0] view2d[max(0, min(480 - int(object_point[2] * 350), 479)), max(0, min(int(object_point[0] * 350) + 320, 639))] = [0, 255, 0] #print('x_min', box[0]) #print('y_min', box[1]) #print('x_max', box[2]) #print('y_max', box[3]) #print('probability', box[4]) #print('object_id', box[5]) #print('point', object_point) #print('-----') #curr_time = time.time() #exec_time = curr_time - prev_time #info = "time: %.2f ms" %(1000*exec_time) #print(info) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) image_color = utils.draw_bbox(frame, bboxes) result = cv2.cvtColor(image_color, cv2.COLOR_RGB2BGR) image_depth = utils.draw_bbox(depth_colormap, bboxes) images = np.hstack((view2d, image_depth)) cv2.imshow("result", images) print('-----') if cv2.waitKey(1) & 0xFF == ord('q'): pipeline.stop() break
def main(_argv): print('Arguments', _argv) print('Flags', flags) FLAGS.tiny = False print('Tiny ', FLAGS.tiny) if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) XYSCALE = cfg.YOLO.XYSCALE_TINY else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) XYSCALE = cfg.YOLO.XYSCALE NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) input_size = FLAGS.size image_path = FLAGS.image original_image = cv2.imread(image_path) print('image:', original_image) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: if FLAGS.model == 'yolov3': feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) else: feature_maps = YOLOv4_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) model.summary() utils.load_weights_tiny(model, FLAGS.weights, FLAGS.model) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() pred_bbox = model.predict(image_data) else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': if FLAGS.tiny: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE, RESIZE=1.5) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) #image.show() print('Image path', image_path) print('Type Image path', type(image_path)) print('Bboxes type', type(bboxes)) classes = utils.read_class_names(cfg.YOLO.CLASSES) list_bboxes = [] for i, bbox in enumerate(bboxes): coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) #print('type bbox',type(bbox)) #print('bbox',bbox[:4]) #print('coor',list(coor)) bbox_info = { 'coor': list(coor), 'probability': score, 'class': classes[class_ind] } list_bboxes.append(bbox_info) try: output_name = os.path.join('results/out_' + os.path.basename(image_path)) image.save(output_name) #cv2.imwrite(output_name,img) print('Img saved to', output_name) output = pd.DataFrame(list_bboxes) print('image_path', image_path) output_name = '.'.join(output_name.split('.')[:2]) + '.xlsx' #output_name = 'results/out_'+image_path.split('\\')[-1].split('.')[0]+'.xlsx' print('output_name', output_name) output.to_excel(output_name) except Exception as e: print(e)
def main(): tf.executing_eagerly() strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0") with strategy.scope(): # if True: STRIDES = np.array(cfg.YOLO.STRIDES) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE WEIGHTS = './data/yolov4.weights' #must end in .weights video_path = './data/road.mp4' video_path = './data/AOTsample3.mp4' #video_path = './data/vtest.avi' #video_path = './data/20190422_153844_DA4A.mkv' print("Video from: ", video_path ) #vid = cv2.VideoCapture(video_path) print('thread started') INPUT_SIZE = 419 #608 #230 #open file to output to output_f = video_path[:-3] + 'txt' f = open(output_f, 'w') print('file started') #generate model input_layer = tf.keras.Input([INPUT_SIZE, INPUT_SIZE, 3]) print('tensors started 1') feature_maps = YOLOv4(input_layer, NUM_CLASS) print('tensors started 2') bbox_tensors = [] print('tensors started 3') for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) print('tensors started 4') model = tf.keras.Model(input_layer, bbox_tensors) print('model built') #force to run eagerly model.run_eagerly = True if model.run_eagerly: print ('yeeyee') else: print ('hawhaw') utils.load_weights(model, WEIGHTS) with tf.device('/GPU:0'): buf = Queue(maxsize=8) # buf = VidThread(video_path) # buf.start() vid = cv2.VideoCapture(video_path) coord = tf.train.Coordinator() t = Thread(target=MyLoop, args=(video_path, buf,vid, coord)) t.daemon = True #coord.register_thread(t) t.start() time.sleep(1.0) try: while not buf.empty(): frame = buf.get() frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) dt = str(datetime.datetime.now()) frame_size = frame.shape[:2] #resize image and add another dimension cur_frame = np.copy(frame) image_data = utils.image_preprocess(cur_frame, [INPUT_SIZE, INPUT_SIZE]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() with tf.device('/GPU:0'): image_data = tf.convert_to_tensor(image_data) print(image_data.device) curr_time = time.time() exec_time = curr_time - prev_time info = "time1: %.2f ms" %(1000*exec_time) print(info) prev_time = time.time() #make bboxes pred_bbox = model.predict(image_data) pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, INPUT_SIZE, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') #output bbox info to file and show image #calculate and display time it took to process frame utils.video_write_info(frame, f, bboxes, dt) image = utils.draw_some_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time info = "time2: %.2f ms" %(1000*exec_time) print(info) result = np.asarray(image) cv2.namedWindow("result", cv2.WINDOW_NORMAL) result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR) #swapped image with result, not sure what the effect was cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break #end video, close viewer, stop writing to file vid.release() cv2.destroyAllWindows() f.close() #if interrupted, end video, close viewer, stop writing to file except: print("Unexpected error:", sys.exc_info()[0]) vid.release() cv2.destroyAllWindows() f.close()
def main(argv): NUM_CLASS = 2 ANCHORS = [ 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 ] ANCHORS = np.array(ANCHORS, dtype=np.float32) ANCHORS = ANCHORS.reshape(3, 3, 2) STRIDES = [8, 16, 32] XYSCALE = [1.2, 1.1, 1.05] input_size = FLAGS.size score_thresh = FLAGS.score_thresh iou_thresh = FLAGS.iou_thresh save_path = FLAGS.save_path print(f'[DEBUG][webcam] input_size : {input_size}') print(f'[DEBUG][webcam] score_thresh : {score_thresh}') print(f'[DEBUG][webcam] iou_thresh : {iou_thresh}') print('[INFO] Bulding Yolov4 architecture') tic = time.perf_counter() input_layer = tf.keras.layers.Input([input_size, input_size, 3]) print(f'[INFO][webcam] Created input_layer of size {input_size}') print(f'[DEBUG][webcam] input_layer : {input_layer}') feature_maps = YOLOv4(input_layer, NUM_CLASS) print(f'[DEBUG][webcam] feature_maps : {feature_maps}') bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensors.append(decode(fm, NUM_CLASS, i)) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) toc = time.perf_counter() print(f'[INFO] Architecture built.') print(f'[DEBUG][webcam] Execution took {(1000 * (toc - tic)):0.4f} ms') vid = cv2.VideoCapture(0) if save_path: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) print(f"[DEBUG][video] Video CODEC : {FLAGS.save_path.split('.')[1]}") codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(FLAGS.save_path, codec, fps, (width, height)) while True: return_value, frame = vid.read() if return_value: print(f'[DEBUG] Got video capture') frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image! Try with another video format") frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.perf_counter() pred_bbox = model.predict(image_data) print(f'[INFO][webcam] Finished initial predication on image') pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, score_thresh) bboxes = utils.nms(bboxes, iou_thresh, method='nms') image = utils.draw_bbox(frame, bboxes) curr_time = time.perf_counter() exec_time = curr_time - prev_time result = np.asarray(image) info = "fdpms: %.2f ms" % (1000 * exec_time) print(info) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) print(result.shape) if save_path: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() out.release()
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size video_path = FLAGS.video print("Video from: ", video_path) vid = cv2.VideoCapture(video_path) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) fps = int(vid.get(cv2.CAP_PROP_FPS)) fourcc = cv2.VideoWriter_fourcc(*'XVID') output_movie = cv2.VideoWriter('output' + str(round(time.time())) + '.avi', fourcc, fps, (width, height)) # initialize our centroid tracker ct = CentroidTracker() if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) while True: return_value, frame = vid.read() if not return_value: #verify if the last frame was empty print("end of the video file...") break if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image! Try with another video format") frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.40) #bboxes = utils.nms(bboxes, 0.213, method='nms') #coord list of all boxes detected in this frame rects = bboxes[:, 0:4] # box rectangles objects = ct.update(rects) # loop over the tracked objects for (objectID, centroid) in objects.items(): # draw both the ID of the object and the centroid of the # object on the output frame text = "ID {}".format(objectID) cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1) # image = utils.draw_bbox(frame, bboxes) # curr_time = time.time() # exec_time = curr_time - prev_time # result = np.asarray(image) # info = "time: %.2f ms" %(1000*exec_time) # print(info) # # cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) # result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # # cv2.imshow("result", result) output_movie.write(frame) print("writing frame") if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() output_movie.release() cv2.destroyAllWindows()
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) input_size = FLAGS.size physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) elif FLAGS.framework == 'trt': saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) signature_keys = list(saved_model_loaded.signatures.keys()) print(signature_keys) infer = saved_model_loaded.signatures['serving_default'] logging.info('weights loaded') @tf.function def run_model(x): return model(x) # Test the TensorFlow Lite model on random input data. sum = 0 original_image = cv2.imread(FLAGS.image) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [FLAGS.size, FLAGS.size]) image_data = image_data[np.newaxis, ...].astype(np.float32) img_raw = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img_raw = tf.expand_dims(img_raw, 0) img_raw = tf.image.resize(img_raw, (FLAGS.size, FLAGS.size)) batched_input = tf.constant(image_data) for i in range(1000): prev_time = time.time() # pred_bbox = model.predict(image_data) if FLAGS.framework == 'tf': pred_bbox = [] result = run_model(image_data) for value in result: value = value.numpy() pred_bbox.append(value) if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') elif FLAGS.framework == 'trt': pred_bbox = [] result = infer(batched_input) for key, value in result.items(): value = value.numpy() pred_bbox.append(value) if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') # pred_bbox = pred_bbox.numpy() curr_time = time.time() exec_time = curr_time - prev_time if i == 0: continue sum += (1 / exec_time) info = str(i) + " time:" + str(round( exec_time, 3)) + " average FPS:" + str(round( sum / i, 2)) + ", FPS: " + str(round((1 / exec_time), 1)) print(info)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) XYSCALE = cfg.YOLO.XYSCALE_TINY if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY_V3, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) XYSCALE = cfg.YOLO.XYSCALE if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) NUM_CLASSES = len(CLASSES) input_size = FLAGS.size try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) times = [] if FLAGS.output: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: if FLAGS.model == 'yolov3': feature_maps = YOLOv3_tiny(input_layer, NUM_CLASSES) else: feature_maps = YOLOv4_tiny(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights, FLAGS.model) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() elif FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() elif FLAGS.framework == 'trt': saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] max_cosine_distance = 0.7 # 0.5 / 0.7 nn_budget = None model_filename = './weights/tracker/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) key_list = list(CLASSES.keys()) val_list = list(CLASSES.values()) Track_only = [] logging.info("Models loaded!") while True: return_value, frame = vid.read() if not return_value: logging.warning("Empty Frame") break frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) elif FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] elif FLAGS.framework == 'trt': batched_input = tf.constant(image_data) pred_bbox = [] result = infer(batched_input) for _, value in result.items(): value = value.numpy() pred_bbox.append(value) t2 = time.time() times.append(t2 - t1) times = times[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.5) # 0.25 bboxes = utils.nms(bboxes, 0.5, method='nms') # 0.213 boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) != 0 and CLASSES[int( bbox[5])] in Track_only or len(Track_only) == 0: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) scores.append(bbox[4]) names.append(CLASSES[int(bbox[5])]) boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(frame, boxes)) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] tracker.predict() tracker.update(detections) tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed( ) or track.time_since_update > 1: # 1 / 5 continue bbox = track.to_tlbr() class_name = track.get_class() tracking_id = track.track_id index = key_list[val_list.index(class_name)] tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) image = utils.draw_bbox(frame, tracked_bboxes, classes=CLASSES, tracking=True) image = cv2.putText( image, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 36), # 24 cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE) cv2.imshow("Detections", image) if FLAGS.output: out.write(image) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() if FLAGS.output: out.release() cv2.destroyAllWindows()