Example #1
0
    def detect_videos(self, data, mode):
        """ Detect videos by YOLO, andt store the detected bounding boxes and feature maps
        """
        
        video_folders_list, annotations_list = data_preparation(data[mode], FOR_YOLO=True)

        # ##############################
        #   Make the model 
        # ##############################

        yolo = YOLO(architecture        = self.yolo_config['model']['architecture'],
                    input_size          = self.yolo_config['model']['input_size'], 
                    labels              = self.yolo_config['model']['labels'], 
                    max_box_per_image   = self.yolo_config['model']['max_box_per_image'],
                    anchors             = self.yolo_config['model']['anchors'])

        # ###############################
        # #   Load trained weights
        # ###############################    

        print("YOLO weights path:", self.yolo_weights_path)
        yolo.load_weights(self.yolo_weights_path)

        for vid, video_folder in enumerate(video_folders_list):
            print(basename(video_folder))
            detected_label_path = os.path.join(data[mode]['detected_folder'], basename(video_folder))
            if os.path.exists(detected_label_path + '.npy') is True:
                continue

            features_path = os.path.join(data[mode]['features_folder'], basename(video_folder))
            if os.path.exists(features_path + '.npy') is True:
                continue
            
            num_frames = sum(1 for line in open(annotations_list[vid], 'r'))
            image_path_list = sorted(glob.glob(video_folder + "/*"))
            sort_nicely(image_path_list)

            if num_frames != len(image_path_list):
                raise IOError("Number of frames in {} does not match annotations.".format(basename(video_folder)))

            with open(annotations_list[vid], 'r') as annot_file:
                first_box_unnormailzed = parse_label(annot_file.readline())

            first_image = cv2.imread(image_path_list[0])
            first_box = normalize_box(first_image.shape, first_box_unnormailzed)
            last_box = first_box

            # Write the detected labels into detected/
            detected_boxes = []
            detected_box = [first_box.x, first_box.y, first_box.w, first_box.h]
            detected_boxes.append(detected_box)


            # Write the detected features into features/
            features = []

            for i, image_path in enumerate(image_path_list):
                print("============ Detecting {} video, {} frame ===============".format(basename(video_folder), basename(image_path)))
                image = cv2.imread(image_path)
                if image is None:
                    print('Cannot find', image_path)
                boxes, feature = yolo.predict_for_rolo(image)
                chosen_box = choose_best_box(boxes, last_box)
                last_box = chosen_box                                

                # Write the detected images into detected_img/
                # detected_video_folder_path = os.path.join(data[mode]['detected_images_folder'], basename(video_folder))
                # if not os.path.exists(detected_video_folder_path):
                #     os.mkdir(detected_video_folder_path)
                # detected_img_path = os.path.join(detected_video_folder_path, basename(image_path))
                # cv2.imwrite(detected_img_path, image)

                if i > 0:
                    # Write the detected result of target
                    detected_box = [chosen_box.x, chosen_box.y, chosen_box.w, chosen_box.h]
                    detected_boxes.append(detected_box)

                # Write the detected features into features/
                features.append(feature)


                # Store YOLO detection result
                # image = draw_boxes(image, boxes, "person")
                # # print(len(boxes), 'boxes are found')
                # detected_video_folder_path = os.path.join(data[mode]['detected_images_folder'], self.yolo_config['model']['labels'])
                # if not os.path.exists(detected_video_folder_path):
                #     os.mkdir(detected_video_folder_path)
                # detected_img_path = os.path.join(detected_video_folder_path, basename(image_path))
                # cv2.imwrite(detected_img_path, image)
            
            

            print("======================= Save detected label result ==========================")
            detected_boxes = np.array(detected_boxes)
            print("Video:{} {} boxes are detected".format(basename(video_folder), detected_boxes.shape[0]))
            np.save(detected_label_path + '.npy', detected_boxes)

            print("========================== Save feature map =================================")
            features = np.array(features)
            np.save(features_path + '.npy', features)
Example #2
0
    def track(self, video_folder_path, initial_box):
        yolo = YOLO(architecture        = self.yolo_config['model']['architecture'],
                    input_size          = self.yolo_config['model']['input_size'], 
                    labels              = self.yolo_config['model']['labels'], 
                    max_box_per_image   = self.yolo_config['model']['max_box_per_image'],
                    anchors             = self.yolo_config['model']['anchors'])
        print("YOLO weights path:", self.yolo_weights_path)
        yolo.load_weights(self.yolo_weights_path)

        frame_path_list = sorted(glob.glob((video_folder_path + "*")))
        if len(frame_path_list) == 0:
            raise IOError("Found {} frames".format(len(frame_path_list)))

        feature_inputs_list = []
        bbox_inputs_list = []

        initial_box = xywh_xymin_to_xycenter(initial_box)  # x_center, y_center, w, h

        tracking_time = 0.0

        for i, frame_path in enumerate(frame_path_list):
            print("================ {}th frame ==================".format(i))
            frame = cv2.imread(frame_path)
            if i == 0:
                frame = draw_box(frame, initial_box)
                boxes, feature = yolo.predict_for_rolo(frame)
                normalized_initial_box = normalize_box(frame.shape, initial_box)
                # inputs = np.concatenate((feature.flatten(), normalized_initial_box))
                # inputs = feature
                # inputs = normalized_initial_box
                feature_inputs_list.append(feature)
                bbox = np.expand_dims(np.array(normalized_initial_box), axis=0)
                bbox_inputs_list.append(bbox)
                
                last_box = BoundBox(normalized_initial_box[0], normalized_initial_box[1], normalized_initial_box[2] ,normalized_initial_box[3])
            else:
                boxes, feature = yolo.predict_for_rolo(frame)

                chosen_box = choose_best_box(boxes, last_box)
                last_box = chosen_box
                # chosen_box.print_box()
                # inputs = np.concatenate((feature.flatten(), [chosen_box.x, chosen_box.y, chosen_box.w, chosen_box.h]))
                # inputs = feature
                # inputs = [chosen_box.x, chosen_box.y, chosen_box.w, chosen_box.h]
                feature_inputs_list.append(feature)  # shape: [1,13,13,1024]
                bbox = np.expand_dims(np.array([chosen_box.x, chosen_box.y, chosen_box.w, chosen_box.h]), axis=0)  # shape: [1,4]
                bbox_inputs_list.append(bbox)

                l_bound = i - self.time_step + 1
                if l_bound < 0:
                    l_bound = 0
                feature_input = self.get_test_batch(feature_inputs_list[l_bound:i+1])
                bbox_input = self.get_test_batch(bbox_inputs_list[l_bound:i+1])

                # bbox_input = np.array([[chosen_box.x, chosen_box.y, chosen_box.w, chosen_box.h]])

                start_time = time.time()
                # Prediction by ROLO
                # bbox = self.model.predict([feature_input, bbox_input])[0, self.time_step - 1]
                predict_bbox = self.model.predict([feature_input, bbox_input])
                end_time = time.time()
                print("ROLO predict time: {} sec per image.".format(end_time-start_time))

                # end_time = time.time()

                # Draw detected box by YOLO
                detected_box = denormalize_box(frame.shape, chosen_box)
                frame = draw_box(frame, detected_box, color=(255,0,0))
                print("Detected box: [ {:.2f}  {:.2f}  {:.2f}  {:.2f} ]".format(detected_box[0], detected_box[1], detected_box[2], detected_box[3]))
                
                if i < self.time_step:
                    bbox = predict_bbox[0, i, ...]
                else:
                    bbox = predict_bbox[0, -1, ...]
                
                # Denormalize box
                bbox[0] *= frame.shape[1]
                bbox[1] *= frame.shape[0]
                bbox[2] *= frame.shape[1]
                bbox[3] *= frame.shape[0]
                # Draw Tracked box by ROLO
                frame = draw_box(frame, bbox)
                print("Tracked box : [ {:.2f}  {:.2f}  {:.2f}  {:.2f} ]".format(bbox[0], bbox[1], bbox[2], bbox[3]))

                tracking_time += (end_time - start_time)

            print("==============================================")
            

            # cv2.imshow('video', frame)
            # cv2.waitKey(0)
            cv2.imwrite('output/' + str(i) + '.jpg', frame)

            if i >= 100:
                break
        
        print("Tracking speed: {:.3f} FPS".format((len(frame_path_list) - 1) / tracking_time))
Example #3
0
def detect_videos(annotations_list, video_folders_list, detected_folder):
    """ Detect videos by YOLO, and store the detected bounding boxes
    """
    yolo_config_path = "../config_aerial.json"
    with open(yolo_config_path) as config_buffer:    
        yolo_config = json.load(config_buffer)

    # ##############################
    #   Make the model 
    # ##############################

    yolo = YOLO(architecture        = yolo_config['model']['architecture'],
                input_size          = yolo_config['model']['input_size'], 
                labels              = yolo_config['model']['labels'], 
                max_box_per_image   = yolo_config['model']['max_box_per_image'],
                anchors             = yolo_config['model']['anchors'])

    # ###############################
    # #   Load trained weights
    # ###############################    

    yolo_weights_path = "../yolo_coco_aerial_person.h5"
    print("YOLO weights path:", yolo_weights_path)
    yolo.load_weights(yolo_weights_path)

    if len(annotations_list) != len(video_folders_list):
        raise IOError("Mismatch # videos {} {}.".format(len(annotations_list), len(video_folders_list)))

    for vid, video_folder in enumerate(video_folders_list):
        print(basename(video_folder))
        detected_label_path = os.path.join(detected_folder, basename(video_folder))
        if os.path.exists(detected_label_path + '.npy'):
            continue

        if basename(annotations_list[vid]) != (basename(video_folder) + ".txt"):
            print("Annot: {}".format(basename(annotations_list[vid])))
            print("image: {}".format(basename(video_folder)))
            raise IOError("Mismatch video {}.".format(basename(video_folder)))

        num_frames = sum(1 for line in open(annotations_list[vid], 'r'))
        image_path_list = sorted(glob.glob(video_folder + "/*"))
        sort_nicely(image_path_list)

        if num_frames != len(image_path_list):
            raise IOError("Number of frames in {} does not match annotations.".format(basename(video_folder)))

        with open(annotations_list[vid], 'r') as annot_file:
            first_box_unnormailzed = parse_label(annot_file.readline())

        first_image = cv2.imread(image_path_list[0])
        first_box = normalize_box(first_image.shape, first_box_unnormailzed)
        last_box = first_box

        # Write the detected labels into detected/
        detected_boxes = []
        detected_box = [first_box.x, first_box.y, first_box.w, first_box.h]
        detected_boxes.append(detected_box)

        # Write the detected features into features/

        for i, image_path in enumerate(image_path_list):
            print("============ Detecting {} video, {} frame ===============".format(basename(video_folder), basename(image_path)))
            image = cv2.imread(image_path)
            if image is None:
                print('Cannot find', image_path)
            boxes, dummy_feature = yolo.predict_for_rolo(image)
            chosen_box = choose_best_box(boxes, last_box)
            last_box = chosen_box                                

            if i > 0:
                # Write the detected result of target
                detected_box = [chosen_box.x, chosen_box.y, chosen_box.w, chosen_box.h]
                detected_boxes.append(detected_box)

        print("======================= Save detected label result ==========================")
        detected_boxes = np.array(detected_boxes)
        print("Video:{} {} boxes are detected".format(basename(video_folder), detected_boxes.shape[0]))
        np.save(detected_label_path + '.npy', detected_boxes)