def detect_videos(self, data, mode): """ Detect videos by YOLO, andt store the detected bounding boxes and feature maps """ video_folders_list, annotations_list = data_preparation(data[mode], FOR_YOLO=True) # ############################## # Make the model # ############################## yolo = YOLO(architecture = self.yolo_config['model']['architecture'], input_size = self.yolo_config['model']['input_size'], labels = self.yolo_config['model']['labels'], max_box_per_image = self.yolo_config['model']['max_box_per_image'], anchors = self.yolo_config['model']['anchors']) # ############################### # # Load trained weights # ############################### print("YOLO weights path:", self.yolo_weights_path) yolo.load_weights(self.yolo_weights_path) for vid, video_folder in enumerate(video_folders_list): print(basename(video_folder)) detected_label_path = os.path.join(data[mode]['detected_folder'], basename(video_folder)) if os.path.exists(detected_label_path + '.npy') is True: continue features_path = os.path.join(data[mode]['features_folder'], basename(video_folder)) if os.path.exists(features_path + '.npy') is True: continue num_frames = sum(1 for line in open(annotations_list[vid], 'r')) image_path_list = sorted(glob.glob(video_folder + "/*")) sort_nicely(image_path_list) if num_frames != len(image_path_list): raise IOError("Number of frames in {} does not match annotations.".format(basename(video_folder))) with open(annotations_list[vid], 'r') as annot_file: first_box_unnormailzed = parse_label(annot_file.readline()) first_image = cv2.imread(image_path_list[0]) first_box = normalize_box(first_image.shape, first_box_unnormailzed) last_box = first_box # Write the detected labels into detected/ detected_boxes = [] detected_box = [first_box.x, first_box.y, first_box.w, first_box.h] detected_boxes.append(detected_box) # Write the detected features into features/ features = [] for i, image_path in enumerate(image_path_list): print("============ Detecting {} video, {} frame ===============".format(basename(video_folder), basename(image_path))) image = cv2.imread(image_path) if image is None: print('Cannot find', image_path) boxes, feature = yolo.predict_for_rolo(image) chosen_box = choose_best_box(boxes, last_box) last_box = chosen_box # Write the detected images into detected_img/ # detected_video_folder_path = os.path.join(data[mode]['detected_images_folder'], basename(video_folder)) # if not os.path.exists(detected_video_folder_path): # os.mkdir(detected_video_folder_path) # detected_img_path = os.path.join(detected_video_folder_path, basename(image_path)) # cv2.imwrite(detected_img_path, image) if i > 0: # Write the detected result of target detected_box = [chosen_box.x, chosen_box.y, chosen_box.w, chosen_box.h] detected_boxes.append(detected_box) # Write the detected features into features/ features.append(feature) # Store YOLO detection result # image = draw_boxes(image, boxes, "person") # # print(len(boxes), 'boxes are found') # detected_video_folder_path = os.path.join(data[mode]['detected_images_folder'], self.yolo_config['model']['labels']) # if not os.path.exists(detected_video_folder_path): # os.mkdir(detected_video_folder_path) # detected_img_path = os.path.join(detected_video_folder_path, basename(image_path)) # cv2.imwrite(detected_img_path, image) print("======================= Save detected label result ==========================") detected_boxes = np.array(detected_boxes) print("Video:{} {} boxes are detected".format(basename(video_folder), detected_boxes.shape[0])) np.save(detected_label_path + '.npy', detected_boxes) print("========================== Save feature map =================================") features = np.array(features) np.save(features_path + '.npy', features)
def track(self, video_folder_path, initial_box): yolo = YOLO(architecture = self.yolo_config['model']['architecture'], input_size = self.yolo_config['model']['input_size'], labels = self.yolo_config['model']['labels'], max_box_per_image = self.yolo_config['model']['max_box_per_image'], anchors = self.yolo_config['model']['anchors']) print("YOLO weights path:", self.yolo_weights_path) yolo.load_weights(self.yolo_weights_path) frame_path_list = sorted(glob.glob((video_folder_path + "*"))) if len(frame_path_list) == 0: raise IOError("Found {} frames".format(len(frame_path_list))) feature_inputs_list = [] bbox_inputs_list = [] initial_box = xywh_xymin_to_xycenter(initial_box) # x_center, y_center, w, h tracking_time = 0.0 for i, frame_path in enumerate(frame_path_list): print("================ {}th frame ==================".format(i)) frame = cv2.imread(frame_path) if i == 0: frame = draw_box(frame, initial_box) boxes, feature = yolo.predict_for_rolo(frame) normalized_initial_box = normalize_box(frame.shape, initial_box) # inputs = np.concatenate((feature.flatten(), normalized_initial_box)) # inputs = feature # inputs = normalized_initial_box feature_inputs_list.append(feature) bbox = np.expand_dims(np.array(normalized_initial_box), axis=0) bbox_inputs_list.append(bbox) last_box = BoundBox(normalized_initial_box[0], normalized_initial_box[1], normalized_initial_box[2] ,normalized_initial_box[3]) else: boxes, feature = yolo.predict_for_rolo(frame) chosen_box = choose_best_box(boxes, last_box) last_box = chosen_box # chosen_box.print_box() # inputs = np.concatenate((feature.flatten(), [chosen_box.x, chosen_box.y, chosen_box.w, chosen_box.h])) # inputs = feature # inputs = [chosen_box.x, chosen_box.y, chosen_box.w, chosen_box.h] feature_inputs_list.append(feature) # shape: [1,13,13,1024] bbox = np.expand_dims(np.array([chosen_box.x, chosen_box.y, chosen_box.w, chosen_box.h]), axis=0) # shape: [1,4] bbox_inputs_list.append(bbox) l_bound = i - self.time_step + 1 if l_bound < 0: l_bound = 0 feature_input = self.get_test_batch(feature_inputs_list[l_bound:i+1]) bbox_input = self.get_test_batch(bbox_inputs_list[l_bound:i+1]) # bbox_input = np.array([[chosen_box.x, chosen_box.y, chosen_box.w, chosen_box.h]]) start_time = time.time() # Prediction by ROLO # bbox = self.model.predict([feature_input, bbox_input])[0, self.time_step - 1] predict_bbox = self.model.predict([feature_input, bbox_input]) end_time = time.time() print("ROLO predict time: {} sec per image.".format(end_time-start_time)) # end_time = time.time() # Draw detected box by YOLO detected_box = denormalize_box(frame.shape, chosen_box) frame = draw_box(frame, detected_box, color=(255,0,0)) print("Detected box: [ {:.2f} {:.2f} {:.2f} {:.2f} ]".format(detected_box[0], detected_box[1], detected_box[2], detected_box[3])) if i < self.time_step: bbox = predict_bbox[0, i, ...] else: bbox = predict_bbox[0, -1, ...] # Denormalize box bbox[0] *= frame.shape[1] bbox[1] *= frame.shape[0] bbox[2] *= frame.shape[1] bbox[3] *= frame.shape[0] # Draw Tracked box by ROLO frame = draw_box(frame, bbox) print("Tracked box : [ {:.2f} {:.2f} {:.2f} {:.2f} ]".format(bbox[0], bbox[1], bbox[2], bbox[3])) tracking_time += (end_time - start_time) print("==============================================") # cv2.imshow('video', frame) # cv2.waitKey(0) cv2.imwrite('output/' + str(i) + '.jpg', frame) if i >= 100: break print("Tracking speed: {:.3f} FPS".format((len(frame_path_list) - 1) / tracking_time))
def detect_videos(annotations_list, video_folders_list, detected_folder): """ Detect videos by YOLO, and store the detected bounding boxes """ yolo_config_path = "../config_aerial.json" with open(yolo_config_path) as config_buffer: yolo_config = json.load(config_buffer) # ############################## # Make the model # ############################## yolo = YOLO(architecture = yolo_config['model']['architecture'], input_size = yolo_config['model']['input_size'], labels = yolo_config['model']['labels'], max_box_per_image = yolo_config['model']['max_box_per_image'], anchors = yolo_config['model']['anchors']) # ############################### # # Load trained weights # ############################### yolo_weights_path = "../yolo_coco_aerial_person.h5" print("YOLO weights path:", yolo_weights_path) yolo.load_weights(yolo_weights_path) if len(annotations_list) != len(video_folders_list): raise IOError("Mismatch # videos {} {}.".format(len(annotations_list), len(video_folders_list))) for vid, video_folder in enumerate(video_folders_list): print(basename(video_folder)) detected_label_path = os.path.join(detected_folder, basename(video_folder)) if os.path.exists(detected_label_path + '.npy'): continue if basename(annotations_list[vid]) != (basename(video_folder) + ".txt"): print("Annot: {}".format(basename(annotations_list[vid]))) print("image: {}".format(basename(video_folder))) raise IOError("Mismatch video {}.".format(basename(video_folder))) num_frames = sum(1 for line in open(annotations_list[vid], 'r')) image_path_list = sorted(glob.glob(video_folder + "/*")) sort_nicely(image_path_list) if num_frames != len(image_path_list): raise IOError("Number of frames in {} does not match annotations.".format(basename(video_folder))) with open(annotations_list[vid], 'r') as annot_file: first_box_unnormailzed = parse_label(annot_file.readline()) first_image = cv2.imread(image_path_list[0]) first_box = normalize_box(first_image.shape, first_box_unnormailzed) last_box = first_box # Write the detected labels into detected/ detected_boxes = [] detected_box = [first_box.x, first_box.y, first_box.w, first_box.h] detected_boxes.append(detected_box) # Write the detected features into features/ for i, image_path in enumerate(image_path_list): print("============ Detecting {} video, {} frame ===============".format(basename(video_folder), basename(image_path))) image = cv2.imread(image_path) if image is None: print('Cannot find', image_path) boxes, dummy_feature = yolo.predict_for_rolo(image) chosen_box = choose_best_box(boxes, last_box) last_box = chosen_box if i > 0: # Write the detected result of target detected_box = [chosen_box.x, chosen_box.y, chosen_box.w, chosen_box.h] detected_boxes.append(detected_box) print("======================= Save detected label result ==========================") detected_boxes = np.array(detected_boxes) print("Video:{} {} boxes are detected".format(basename(video_folder), detected_boxes.shape[0])) np.save(detected_label_path + '.npy', detected_boxes)