コード例 #1
0
    def __init__(self, path, transform=None, queue_size=128):
        """
        Initialize the class
        :param path: path to the video file
        :param transform: a function to transform the read images
        :param queue_size: the size of the queue
        """

        setup_environment()

        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.stream = cv2.VideoCapture(path)
        self.stopped = False
        self.transform = transform

        # Get attributes of Video
        self.width = int(self.stream .get(cv2.CAP_PROP_FRAME_WIDTH))
        self.height = int(self.stream .get(cv2.CAP_PROP_FRAME_HEIGHT))
        self.frames_per_second = self.stream .get(cv2.CAP_PROP_FPS)
        self.num_frames = int(self.stream .get(cv2.CAP_PROP_FRAME_COUNT))
        self.video_length_seconds = self.get_video_length_in_seconds(path)

        # initialize the queue used to store frames read from
        # the video file
        self.video_image_queue = Queue(maxsize=queue_size)
        # The idx of an image starting from 0
        self.img_idx = -1
        # intialize thread
        self.thread = Thread(target=self.update, args=())
        self.thread.daemon = True
コード例 #2
0
    def __init__(self,
                 cfg,
                 input_queue=None,
                 output_queue_vis=None,
                 output_queue_action_pred=None,
                 use_gpu=True,
                 show_video=False):
        """
        :param cfg: the prototype config
        :param input_queue: inut
        :param: output_queue: output
        :param use_gpu: (boolean) whether gpu should be use for inference
        """

        setup_environment()
        # Setup logging format
        logging.setup_logging(cfg.OUTPUT_DIR)

        self.cfg = cfg
        self.show_video = show_video

        # The queues from the main thread used for multiprocessing
        self.output_detection_queue = input_queue
        if self.show_video:
            self.output_tracker_queue_visualization = output_queue_vis
        self.output_tracker_queue_action_recognition = output_queue_action_pred

        # Used to order images retrieved from the two queues used as  input
        self.get_idx = -1

        # Has the previous process terminated?
        self.first_poison_pill_received = False
        # True if self.first_poison_pill_received and get does not lead to any results and self.result_rank is empty
        self.output_detection_queue_is_finished = False

        # Whether we will use a gpu or not
        use_gpu = use_gpu and torch.cuda.is_available()

        # The tracker we will use for object detection
        self.deepsort = build_tracker(cfg, use_cuda=use_gpu)

        # A list that contains and is sorted by get_idxs (ascending) -> result_rank[0] is smallest get_idx
        self.result_rank = []
        # A list that contains the images (ndarray) image with shape (H, W, C) (in BGR order) and [0,255])
        self.result_img_data = []
        # A list that contains the prediction results (predictions {dict}) and that is
        # also sorted by the get_idxs -> corresponding to result_rank
        self.result_prediction_data = []

        # The process for person detection
        self.update_tracker_with_next_image_prcocess = mp.Process(
            target=self.update_tracker_with_next_image, args=())
コード例 #3
0
def run_demo(cfg, progress_callback=None):
    """
    :param cfg:
    :return:
    """
    # Set up environment.
    setup_environment()
    # Setup logging format
    logging.setup_logging(cfg.OUTPUT_DIR)
    logger.info("=== Demo started ===")
    multi_process_demo = MultiProcessDemo(cfg, progress_callback)
    multi_process_demo.run_demo()
    logger.info("=== Demo finished ===")
コード例 #4
0
    def __init__(self,
                 cfg,
                 img_height,
                 img_width,
                 parallel=False,
                 num_gpu=None,
                 input_queue=None,
                 output_queue=None,
                 gpuid_action_recognition=None):
        """
        Creates an Detectron2 based prediction class
        which is optimized for demo and should be used for it.
        The code is slightly modified from the original detectron2 demo content
        :param cfg: the config file for the prototype
        :param img_height: (int) the height of the input images
        :param img_width: (int) the width of input images
        :param parallel: (boolean) whether, we will do asynchronous computation
        :param num_gpu: (int) number of gpus we will use for asynchronous computation
        :param input_queue: (multiprocessing.queue) containing the input images
                            (img_idx, image of shape (H, W, C) (in BGR order) and [0,255])
        :param output_queue: (multiprocessing.queue) containing the computed predictions
        :param gpuid_action_recognition: (int) the gpuid for object tracking

        """

        setup_environment()
        # Setup logging format
        logging.setup_logging(cfg.OUTPUT_DIR)

        # The cfg file for the prototype
        self.cfg = cfg

        # The original image resolution: used for resizing provided images
        self.img_height = img_height
        self.img_width = img_width

        # We only use the demo config
        self.detectron2_cfg_file = self.cfg.DETECTRON.DETECTION_MODEL_CFG
        self.detectron2_model_weights = self.cfg.DETECTRON.MODEL_WEIGHTS
        self.detectron2_score_tresh_test = self.cfg.DETECTRON.DEMO_PERSON_SCORE_THRESH

        # Load the detectron config
        self.detectron_config = self.setup_detectron_config()

        # Can be useful for displaying the object classes
        self.metadata = MetadataCatalog.get(
            self.detectron_config.DATASETS.TEST[0]
            if len(self.detectron_config.DATASETS.TEST) else "__unused")
        self.cpu_device = torch.device("cpu")

        # Determines whether we will use async processing
        self.parallel = parallel
        if self.parallel:
            # Used for async processing
            self.predictor = AsyncPredictor(
                self.cfg,
                self.detectron_config,
                self.img_height,
                self.img_width,
                num_gpus=num_gpu,
                input_queue=input_queue,
                output_queue=output_queue,
                gpuid_action_recognition=gpuid_action_recognition)
            # Used to count the frames provided for detect_persons
            self.provided_image_count = 0
            self.buffer_size = self.predictor.default_buffer_size
            # In the original version this attribute was used to store
            # the images in chronological order as well as a counter that represents the size of the task_queue
            # attribute. Since we do not return the images, we only use it as a counter representing the task_queue and
            # thus insert a dummy int variable instead of an image, because it is more memory efficient
            self.frame_data = deque()
        else:
            # Use the modified predictor for the demo
            self.predictor = DemoDefaultPredictor(self.cfg,
                                                  self.detectron_config,
                                                  self.img_height,
                                                  self.img_width)
コード例 #5
0
def test(cfg):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    setup_environment()
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model, cfg, is_train=False)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        # raise NotImplementedError("Unknown way to load checkpoint.")
        logger.info("Testing with random initialization. Only for debugging.")

    # Create video testing loaders.
    test_loader = loader.construct_loader(cfg, "test")
    logger.info("Testing model for {} iterations".format(len(test_loader)))

    if cfg.DETECTION.ENABLE:
        assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE
        test_meter = AVAMeter(len(test_loader), cfg, mode="test")
    else:
        assert (
            len(test_loader.dataset)
            % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS)
            == 0
        )
        # Create meters for multi-view testing.
        test_meter = TestMeter(
            len(test_loader.dataset)
            // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
            cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
            cfg.MODEL.NUM_CLASSES,
            len(test_loader),
        )

    # # Perform multi-view test on the entire dataset.
    perform_test(test_loader, model, test_meter, cfg)
コード例 #6
0
    def __init__(self, cfg, progress_callback):

        # Set up environment.
        setup_environment()
        # Setup logging format
        logging.setup_logging(cfg.OUTPUT_DIR)

        logger.info("Demo with config:")
        logger.info(pprint.pformat(cfg))

        # Prepare the input video for best demo results
        cfg.DEMO.VIDEO_SOURCE_PATH_AT_FPS = self.create_demo_video_at_target_framerate(
            cfg.DEMO.VIDEO_SOURCE_PATH, cfg.CUSTOM_DATASET.FRAME_RATE)

        self.cfg = cfg

        # An output folder for all demo-related output
        output_datetime = datetime.datetime.now().strftime("%Y-%m-%d_%H_%M_%S")
        self.cfg.DEMO.OUTPUT_FOLDER = os.path.join(
            self.cfg.CUSTOM_DATASET.DEMO_DIR, output_datetime)
        create_folder(self.cfg.DEMO.OUTPUT_FOLDER)
        logger.info("Created output-folder for demo results at: " +
                    self.cfg.DEMO.OUTPUT_FOLDER)

        # (pyqtSignal) used for signaling back the progress for the GUI
        # We currently take the progress as the percentage of distributed images
        self.progress_callback = progress_callback

        # Used for extracting the data frames from the video file
        self.file_video_stream = FileVideoStream(
            self.cfg.DEMO.VIDEO_SOURCE_PATH_AT_FPS)
        self.video_file_name = Path(self.cfg.DEMO.VIDEO_SOURCE_PATH).stem

        # Whether we display our results
        self.use_video_visualizer = self.cfg.DEMO.VIDEO_SHOW_VIDEO_ENABLE or self.cfg.DEMO.VIDEO_EXPORT_VIDEO_ENABLE

        # Whether we export our output
        self.export_output = self.cfg.DEMO.EXPORT_EXPORT_RESULTS

        # The fps of the video video source
        self.frames_per_second = self.file_video_stream.frames_per_second
        self.video_length_seconds = self.file_video_stream.video_length_seconds

        # Information on the sampling requirements for the
        # video data
        self.sample_rate = self.cfg.DATA.SAMPLING_RATE
        self.num_frames = self.cfg.DATA.NUM_FRAMES
        self.seq_len = self.sample_rate * self.num_frames
        self.half_seq_len = int(self.seq_len / 2)
        self.half_seq_len_seconds = self.half_seq_len / self.frames_per_second

        # The seconds in the video that are suited for inference
        self.earliest_full_start_second = np.math.ceil(
            self.half_seq_len_seconds)
        self.final_full_second = math.floor(
            self.video_length_seconds) - math.ceil(self.half_seq_len_seconds)
        # Set the current_second to start. The current second is the second for which we make the prediction
        self.current_video_second = self.earliest_full_start_second

        # Used for telling the gui the progress of our distribute images function [0, final_full_second] seconds
        self.number_of_relevant_frames = (self.final_full_second +
                                          1) * self.frames_per_second

        # The corresponding frame index to any middle_frame_timestamp of interest
        self.first_middle_frame_index = sec_to_frame(
            self.earliest_full_start_second, self.cfg, mode="demo") - 1
        # Used to determine whether an index is a middle frame index for which action recognition is done
        self.current_middle_frame_index = self.first_middle_frame_index

        # The inference frame indices are sampled around the middle frame as defined for slowfast
        # when using ava_dataset.
        # Here we have indices. index = frame number - 1
        self.inference_frame_indices = list(
            range(self.current_middle_frame_index + 1 - self.half_seq_len,
                  self.current_middle_frame_index + 1 + self.half_seq_len,
                  self.sample_rate))
        # Indicates whether the main process should put the next image in the input_detection_queue
        self.next_image_in_relevant_range = self.current_video_second <= self.final_full_second

        # Multiprocessing configs:
        # How many cpus we have
        self.num_cpu = mp.cpu_count()

        # We have 5 processes in parallel in the simplest case of the demo
        # 1. Main, 2. Object Predictor, 3. Deep Sort Tracker, 4. Video Visualizer, 5. Action Recognizer
        self.num_occupied_processes = 5

        assert self.num_cpu >= self.num_occupied_processes, "You need at least " + str(
            self.num_occupied_processes
        ) + " cores for the multiprocessing demo"

        self.free_cpu_cores = self.num_cpu - self.num_occupied_processes
        # How many gpus we have for the demo
        self.num_gpu = self.cfg.NUM_GPUS

        # How many gpus should be used for object detection (increasing number)
        self.num_gpu_object_detection = min(self.free_cpu_cores, self.num_gpu)

        # The gpuid for action recognition (decreasing or in our case last gpuid
        # We take the las possible gpuid for action recognition because this is beneficiary, if we have
        # less processes than free_cpu_cores (object detection and action recognition are separated this way)
        self.gpuid_action_recognition = self.num_gpu - 1

        # The queue sizes as specified in the config files
        self.queue_size = self.cfg.DEMO.QSIZE_SECONDS * self.cfg.CUSTOM_DATASET.FRAME_RATE

        # Queues
        # Contains the original images with an idx each:
        #   1. img_idx (int)
        #   2. image of shape (H, W, C) (in BGR order) and [0,255])
        self.input_detection_queue = mp.Queue(maxsize=self.queue_size)
        # Queue containing the detections per image in form
        #   1. img_idx (int),
        #   2. image of shape (H, W, C) (in BGR order) and [0,255]),
        #   3. predictions {dict}: a dict with the following keys
        #       pred_boxes: tensor of shape num_predictions, 4 =
        #                   the coordinates of the predicted boxes [x1, y1, x2, y2]) --> if empty it is []
        #       scores: tensor of shape (num_predictions) containing the confidence scores [0,1]) --> if empty it is []
        self.output_detection_queue = mp.Queue(maxsize=self.queue_size)
        # Contains the images with the corresponding ids and person_tracking_outputs -> used for visualization
        #   1. img_idx (int)
        #   2. image of shape (H, W, C) (in BGR order) and [0,255])
        #   3. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number)
        #                          --> if empty it is a list []
        self.output_tracker_queue_visualization = mp.Queue(
            maxsize=self.queue_size)

        # Contains the images with the corresponding ids and person_tracking_outputs -> used for action recognition
        #   1. img_idx (int)
        #   2. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number)
        #                          --> if empty it is a list []
        self.output_tracker_queue_action_recognition = mp.Queue(
            maxsize=self.queue_size)

        # Contains the input for action_recognition (only for img_idxs that are middle_frames)
        #   1. current_video_second: (int) the current video second for which the prediction data is given
        #   2. img_idxs=current_middle_frame_index (int) the image img_idx, which is always the next middle_frame_index
        #   3. img_idx (int) = the idx of the current middle_frame
        #   4. image of shape (H, W, C) (in BGR order) and [0,255])
        # It is bigger than the other queues
        self.input_action_recognition_queue = mp.Queue(
            maxsize=int(self.queue_size * 1.5))

        # Contains the input for action_recognition (only for img_idxs that are middle_frames)
        #   1. img_idx (int), only for middle frames
        #   2. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number)
        #                          --> if empty it is a list []
        #   3. pred_action_category_scores (ndarray float32) shape(num_person_ids, num_categories),
        #                                       the scores for each person and each action category
        #                                   --> if empty it is a list []
        self.output_action_recognition_queue_visualization = mp.Queue(
            maxsize=self.queue_size)

        # Contains the input for action_recognition (only for img_idxs that are middle_frames)
        #   1. current_video_second: (int) the current video second for which the prediction data is given
        #   2. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number)
        #                          --> if empty it is a list []
        #   3. pred_action_category_scores (ndarray float32) shape(num_person_ids, num_categories),
        #                                       the scores for each person and each action category
        #                                   --> if empty it is a list []
        self.output_action_recognition_queue_result_export = mp.Queue(
            maxsize=int(self.video_length_seconds * self.frames_per_second))

        # A list of dicts that contains detected middle_frame_seconds
        self.middle_frame_seconds = []

        # The detectron2_object_predictor_class for person detection
        self.object_predictor = DemoDetectron2ObjectPredictor(
            self.cfg,
            self.file_video_stream.height,
            self.file_video_stream.width,
            parallel=True,
            num_gpu=self.num_gpu_object_detection,
            input_queue=self.input_detection_queue,
            output_queue=self.output_detection_queue,
            gpuid_action_recognition=self.gpuid_action_recognition)

        # The deep sort tracker class for person tracking
        self.deep_sort_tracker = DeepSortTracker(
            self.cfg,
            input_queue=self.output_detection_queue,
            output_queue_vis=self.output_tracker_queue_visualization,
            output_queue_action_pred=self.
            output_tracker_queue_action_recognition,
            show_video=self.use_video_visualizer)

        # The action recognition class
        self.action_recognizer = ActionRecognizer(
            self.cfg,
            self.file_video_stream.height,
            self.file_video_stream.width,
            model_device=self.gpuid_action_recognition,
            first_middle_frame_index=self.first_middle_frame_index,
            sample_rate=self.sample_rate,
            half_seq_len=self.half_seq_len,
            current_video_second=self.current_video_second,
            input_queue_tracker=self.output_tracker_queue_action_recognition,
            input_queue_images=self.input_action_recognition_queue,
            output_queue=self.output_action_recognition_queue_visualization,
            output_action_recognition_queue_result_export=self.
            output_action_recognition_queue_result_export)

        if self.export_output:
            # Our demo meter to store and finally print the results
            self.demo_meter = DemoMeter(self.cfg,
                                        self.file_video_stream.height,
                                        self.file_video_stream.width)
            # Used to control the completeness of our export
            self.current_export_second = self.earliest_full_start_second - 1

        if self.use_video_visualizer:
            self.demo_visualizer = VideoVisualizer(
                self.cfg,
                self.file_video_stream.height,
                self.first_middle_frame_index,
                self.frames_per_second,
                input_detection_queue=self.input_detection_queue,
                output_detection_queue=self.output_detection_queue,
                output_tracker_queue_visualization=self.
                output_tracker_queue_visualization,
                output_tracker_queue_action_recognition=self.
                output_tracker_queue_action_recognition,
                input_action_recognition_queue=self.
                input_action_recognition_queue,
                output_action_recognition_queue_visualization=self.
                output_action_recognition_queue_visualization,
                output_action_recognition_queue_result_export=self.
                output_action_recognition_queue_result_export)
コード例 #7
0
    def __init__(self,
                 cfg,
                 img_height,
                 first_middle_frame_index,
                 frames_per_second,
                 input_detection_queue=None,
                 output_detection_queue=None,
                 output_tracker_queue_visualization=None,
                 output_tracker_queue_action_recognition=None,
                 input_action_recognition_queue=None,
                 output_action_recognition_queue_visualization=None,
                 output_action_recognition_queue_result_export=None):
        """
        Initialize the object
        :param cfg: our demo config
        :param img_height: (int) the height of the image
        :param first_middle_frame_index: (int) the index of the first middle_frame index
        :param frames_per_second: (float) the fps of the video -> required for determining middle frames
        :param input_detection_queue: please refer to class MultiProcessDemo
        :param output_detection_queue: please refer to class MultiProcessDemo
        :param output_tracker_queue_visualization: please refer to class MultiProcessDemo
        :param output_tracker_queue_action_recognition: please refer to class MultiProcessDemo
        :param input_action_recognition_queue: please refer to class MultiProcessDemo
        :param output_action_recognition_queue_visualization: please refer to class MultiProcessDemo
        :param output_action_recognition_queue_result_export: please refer to class MultiProcessDemo
        """

        setup_environment()
        # Setup logging format
        logging.setup_logging(cfg.OUTPUT_DIR)

        self.cfg = cfg

        # The name of the input video
        self.demo_video_name = Path(self.cfg.DEMO.VIDEO_SOURCE_PATH).stem

        # Whether we will export an image
        self.export_video = self.cfg.DEMO.VIDEO_EXPORT_VIDEO_ENABLE

        if self.export_video:
            # number of digits for exporting the images (determines how many images can be stored)
            self.number_of_digits_for_image_export = 10
            # The path of the to be created video
            self.export_video_path = os.path.join(
                self.cfg.DEMO.OUTPUT_FOLDER,
                self.demo_video_name + "_annotated.mp4")

        # Whether we will display an image
        self.display_video = self.cfg.DEMO.VIDEO_SHOW_VIDEO_ENABLE

        self.cv2_display_name = "Demo: " + self.demo_video_name

        # Whether we will display the meta information (Queues Sizes and img idx)
        self.display_meta_info = cfg.DEMO.VIDEO_SHOW_VIDEO_DEBUGGING_INFO
        # Used for finding the position of meta info
        self.img_height = img_height
        # Used for determining middle_frame_indices (they have the action prediction)
        self.first_middle_frame_index = first_middle_frame_index
        self.frames_per_second = frames_per_second

        # Additional options for displaying the video
        self.video_display_scaling_factor = cfg.DEMO.VIDEO_DISPLAY_SCALING_FACTOR
        self.video_action_display_duration_milliseconds = cfg.DEMO.VIDEO_ACTION_DISPLAY_DURATION_MILLISECONDS

        # The queues containing relevant information
        self.input_detection_queue = input_detection_queue
        self.output_detection_queue = output_detection_queue,
        self.output_tracker_queue_visualization = output_tracker_queue_visualization
        self.output_tracker_queue_action_recognition = output_tracker_queue_action_recognition,
        self.input_action_recognition_queue = input_action_recognition_queue
        self.output_action_recognition_queue_visualization = output_action_recognition_queue_visualization
        self.output_action_recognition_queue_result_export = output_action_recognition_queue_result_export
        # The queue sizes as specified in the config files
        self.queue_size = self.cfg.DEMO.QSIZE_SECONDS * self.cfg.CUSTOM_DATASET.FRAME_RATE

        # Used for terminating the process successfully
        self.action_recognition_input_finished = False

        # The information for displaying actions
        # Load the categories:
        self.path_to_label_map_file = os.path.join(cfg.CUSTOM_DATASET.ANNOTATION_DIR, cfg.CUSTOM_DATASET.LABEL_MAP_FILE) \
            if not os.path.isfile(cfg.ACTIONRECOGNIZER.LABEL_MAP_FILE) \
            else cfg.ACTIONRECOGNIZER.LABEL_MAP_FILE
        # List of dicts (id, name)
        self.action_categories, _ = read_labelmap(self.path_to_label_map_file)
        # A color value for every category
        self.palette_actions = np.random.randint(
            64, 128, (len(self.action_categories), 3)).tolist()

        # The information required for displaying person_tracking info
        self.palette_person_ids = (2**11 - 1, 2**15 - 1, 2**20 - 1)

        # The process for person detection
        self.display_next_frame_process = mp.Process(
            target=self.display_and_or_export_next_frame, args=())

        # Used to test the correct order of images
        self.display_img_idx = -1

        # The information for action info display
        self.current_action_output_img_idx = ""
        self.current_pred_action_category_scores = ""
コード例 #8
0
def benchmark_data_loading(cfg):
    """
    Benchmark the speed of data loading in PySlowFast.
    Args:

        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    setup_environment()
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Benchmark data loading with config:")
    logger.info(pprint.pformat(cfg))

    timer = Timer()
    dataloader = loader.construct_loader(cfg, "train")
    logger.info("Initialize loader using {:.2f} seconds.".format(
        timer.seconds()))
    # Total batch size across different machines.
    batch_size = cfg.TRAIN.BATCH_SIZE * cfg.NUM_SHARDS
    log_period = cfg.BENCHMARK.LOG_PERIOD
    epoch_times = []
    # Test for a few epochs.
    for cur_epoch in range(cfg.BENCHMARK.NUM_EPOCHS):
        timer = Timer()
        timer_epoch = Timer()
        iter_times = []
        for cur_iter, _ in enumerate(tqdm.tqdm(dataloader)):
            if cur_iter > 0 and cur_iter % log_period == 0:
                iter_times.append(timer.seconds())
                ram_usage, ram_total = misc.cpu_mem_usage()
                logger.info(
                    "Epoch {}: {} iters ({} videos) in {:.2f} seconds. "
                    "RAM Usage: {:.2f}/{:.2f} GB.".format(
                        cur_epoch,
                        log_period,
                        log_period * batch_size,
                        iter_times[-1],
                        ram_usage,
                        ram_total,
                    ))
                timer.reset()
        epoch_times.append(timer_epoch.seconds())
        ram_usage, ram_total = misc.cpu_mem_usage()
        logger.info(
            "Epoch {}: in total {} iters ({} videos) in {:.2f} seconds. "
            "RAM Usage: {:.2f}/{:.2f} GB.".format(
                cur_epoch,
                len(dataloader),
                len(dataloader) * batch_size,
                epoch_times[-1],
                ram_usage,
                ram_total,
            ))
        logger.info(
            "Epoch {}: on average every {} iters ({} videos) take {:.2f}/{:.2f} "
            "(avg/std) seconds.".format(
                cur_epoch,
                log_period,
                log_period * batch_size,
                np.mean(iter_times),
                np.std(iter_times),
            ))
    logger.info("On average every epoch ({} videos) takes {:.2f}/{:.2f} "
                "(avg/std) seconds.".format(
                    len(dataloader) * batch_size,
                    np.mean(epoch_times),
                    np.std(epoch_times),
                ))
コード例 #9
0
    def __init__(self, cfg, img_height, img_width):
        """
        Initialize the DemoMeter with the relevant paramters
        :param cfg:
        :param img_height: (int) the height of the input images
        :param img_width: (int) the width of input images
        """

        # Set up environment.
        setup_environment()

        # Setup logging format
        logging.setup_logging(cfg.OUTPUT_DIR)

        self.cfg = cfg

        # In the case of an AVA-like predictor it is necessary to specify a
        # cfg.ACTIONRECOGNIZER.LABEL_MAP_FILE, because it comprises all 80 categories.
        # during the ava challenge only 60 categories were evaluated, an we
        # want alle categories
        path_to_label_map_file = os.path.join(cfg.CUSTOM_DATASET.ANNOTATION_DIR, cfg.CUSTOM_DATASET.LABEL_MAP_FILE) \
            if not os.path.isfile(cfg.ACTIONRECOGNIZER.LABEL_MAP_FILE) \
            else cfg.ACTIONRECOGNIZER.LABEL_MAP_FILE

        # Export properties
        datetime_for_filenames = datetime.datetime.now().strftime(
            "%Y-%m-%d_%H_%M_%S")
        self.delimiter = ","
        assert cfg.DEMO.OUTPUT_FOLDER != "", "Please specify cfg.DEMO.OUTPUT_FOLDER to be able to export the output"
        self.output_dir_path = self.cfg.DEMO.OUTPUT_FOLDER
        self.file_name_demo_log = datetime_for_filenames + "_" + "demo_log"
        self.file_name_demo_gt_like_file = datetime_for_filenames + "_" + "demo_gt_format"
        self.results_gt_like_csv_path = os.path.join(
            self.output_dir_path, self.file_name_demo_gt_like_file + ".csv")
        self.results_log_path_prefix = os.path.join(self.output_dir_path,
                                                    self.file_name_demo_log)
        self.results_log_csv_path = ""
        self.results_xes_path = ""
        # The minimum score for a predicted category to be exported
        self.min_category_export_score = cfg.DEMO.EXPORT_MIN_CATEGORY_EXPORT_SCORE
        # Whether a person can do multiple actions at the same time or not
        # This influences the export options, since only the option with max value is chosen
        self.multiple_action_possible = cfg.CUSTOM_DATASET.MULTIPLE_ACTION_POSSIBLE

        # Resolution used for export
        self.img_height = img_height
        self.img_width = img_width

        # List of dict with items "id" and "name"
        self.categories, _ = read_labelmap(path_to_label_map_file)
        # Replace delimiter out of category_name to guarantee good csv export
        for idx in range(0, len(self.categories)):
            self.categories[idx]["name"] = self.categories[idx][
                "name"].replace(self.delimiter, "")

        # The list-variables we use to store the demo prediction results.
        # They will be used to export the information into csv or xes
        self.res_person_tracking_outputs = []
        self.res_pred_action_category_scores = []
        self.res_all_metadata = []
        self.res_case_ids = []

        # This df is used to assign correct case_concept_name and concept_instance values
        self.case_and_instance_df = self.create_empty_case_and_instance_df()
        # Used to indicate that the activity instance has not yet completed
        self.video_second_not_complete = -1

        # Used as test for correct functionality of lifecycle_transition
        self.not_closed_list = []
コード例 #10
0
def benchmark_data(cfg):
    # Set up environment.
    setup_environment()
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Benchmark data loading with config:")
    logger.info(pprint.pformat(cfg))

    timer = Timer()
    dataloader = loader.construct_loader(cfg, "train")
    logger.info("Initialize loader using {:.2f} seconds.".format(
        timer.seconds()))
    batch_size = cfg.TRAIN.BATCH_SIZE
    log_period = cfg.BENCHMARK.LOG_PERIOD
    epoch_times = []
    # Test for a few epochs.
    for cur_epoch in range(cfg.BENCHMARK.NUM_EPOCHS):
        timer = Timer()
        timer_epoch = Timer()
        iter_times = []
        for cur_iter, _ in enumerate(tqdm.tqdm(dataloader)):
            if cur_iter > 0 and cur_iter % log_period == 0:
                iter_times.append(timer.seconds())
                vram = psutil.virtual_memory()
                logger.info(
                    "Epoch {}: {} iters ({} videos) in {:.2f} seconds. "
                    "RAM Usage: {:.2f}/{:.2f} GB.".format(
                        cur_epoch,
                        log_period,
                        log_period * batch_size,
                        iter_times[-1],
                        (vram.total - vram.available) / 1024**3,
                        vram.total / 1024**3,
                    ))
                timer.reset()
        epoch_times.append(timer_epoch.seconds())
        vram = psutil.virtual_memory()
        logger.info(
            "Epoch {}: in total {} iters ({} videos) in {:.2f} seconds. "
            "RAM Usage: {:.2f}/{:.2f} GB.".format(
                cur_epoch,
                len(dataloader),
                len(dataloader) * batch_size,
                epoch_times[-1],
                (vram.total - vram.available) / 1024**3,
                vram.total / 1024**3,
            ))
        logger.info(
            "Epoch {}: on average every {} iters ({} videos) take {:.2f}/{:.2f} "
            "(avg/std) seconds.".format(
                cur_epoch,
                log_period,
                log_period * batch_size,
                np.mean(iter_times),
                np.std(iter_times),
            ))
    logger.info("On average every epoch ({} videos) takes {:.2f}/{:.2f} "
                "(avg/std) seconds.".format(
                    len(dataloader) * batch_size,
                    np.mean(epoch_times),
                    np.std(epoch_times),
                ))
コード例 #11
0
    def __init__(self,
                 cfg,
                 img_height,
                 img_width,
                 model_device,
                 first_middle_frame_index,
                 sample_rate,
                 half_seq_len,
                 current_video_second,
                 input_queue_tracker=None,
                 input_queue_images=None,
                 output_queue=None,
                 output_action_recognition_queue_result_export=None):
        """
        Initialize the ActionRecognizer
        :param cfg: the prototype config
        :param img_height: (int) the height of the images
        :param img_width: (int) the width of the images
        :param model_device: (int) the GPU-ID to which to transfer the model to
        :param first_middle_frame_index: (int) the index of the first middle_frame corresponding to current_video_second
        :param sample_rate: (int) the sample rate
        :param half_seq_len: (int) the half length of a sequence, where each sequence has a defined length and
                        comprises the relevant images for action prediction
        :param current_video_second: (int) the video second, corresponding to the first_middle_frame_index
        :param input_queue_tracker: the queue that provides the person tracking outputs
        :param input_queue_images: the queue that provides the images for action inference (only middle frames)
        :param output_queue: the queue that stores the predicted categories with the corresponding people
        """

        setup_environment()
        # Setup logging format
        logging.setup_logging(cfg.OUTPUT_DIR)

        self.cfg = cfg
        self.show_video = self.cfg.DEMO.VIDEO_SHOW_VIDEO_ENABLE or self.cfg.DEMO.VIDEO_EXPORT_VIDEO_ENABLE

        self.model_device = model_device

        # Build the video model and print model statistics.
        self.activity_prediction_model = build_model_for_demo(
            self.cfg, self.model_device)
        # Load the pretrained model used for demo
        cu.load_demo_checkpoint(self.cfg, self.activity_prediction_model)
        # Set model to eval mode
        self.activity_prediction_model.eval()

        # Register the queues
        self.output_tracker_queue_action_recognition = input_queue_tracker
        self.input_action_recognition_queue = input_queue_images
        self.output_action_recognition_queue_visualization = output_queue
        self.output_action_recognition_queue_result_export = output_action_recognition_queue_result_export

        # Relevant information for image preprocessing
        self.img_height = img_height
        self.img_width = img_width
        # The short size of our images is scaled to this size
        self.crop_size = cfg.DATA.TEST_CROP_SIZE
        self.data_mean = cfg.DATA.MEAN
        self.data_std = cfg.DATA.STD
        # This is very important. Our images are in BGR format from thread_video_reader
        # Note that Kinetics pre-training uses RGB, which may require changing our
        # BGR images (only for inference) to RGB
        self.use_bgr = cfg.ACTIONRECOGNIZER.BGR

        # The process for person detection
        self.recognize_actions_process = mp.Process(
            target=self.recognize_actions_multi_processing, args=())

        # A list that stores all image_idx data from queue and is sorted by get_idxs (ascending)
        # -> image_idx_from_queue[0] is smallest image_idx
        self.image_idx_from_queue = []
        # A list that stores the corresponding image data also from queue. It is also sorted by the image_idx
        # -> corresponding to image_idx_from_queue
        self.image_data_from_queue = []

        # Stores the relevant image_idx for an action prediction, sorted by image_idx
        self.image_idx_for_prediction = []

        # Stores the relevant image data for an action prediction, sorted by image_idx from image_idx_for_prediction
        self.image_data_for_prediction = []

        ######### All the relevant data for retrieving the process data in the correct form

        # The corresponding frame index to any middle_frame_timestamp of interest
        self.first_middle_frame_index = first_middle_frame_index
        # Used to determine whether an index is a middle frame index for which action recognition is done
        self.current_middle_frame_index = self.first_middle_frame_index

        # Used to test the validity of a to be added image_idx
        self.last_image_idx = -1

        self.sample_rate = sample_rate
        self.half_seq_len = half_seq_len
        self.current_video_second = current_video_second

        # The inference frame indices are sampled around the middle frame as defined for slowfast
        # when using ava_dataset.
        # Here we have indices. index = frame number - 1
        self.inference_frame_indices = list(
            range(self.current_middle_frame_index + 1 - self.half_seq_len,
                  self.current_middle_frame_index + 1 + self.half_seq_len,
                  self.sample_rate))

        # The length of our "raw" data list has to be equal to this value
        self.batch_size = len(self.inference_frame_indices)
コード例 #12
0
def train(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    setup_environment()
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Print config.
    logger.info("Train with config:")
    logger.info(pprint.pformat(cfg))

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model, cfg, is_train=True)

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)

    # Load a checkpoint to resume training if applicable.
    if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR):
        logger.info("Load from last checkpoint.")
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        checkpoint_epoch = cu.load_checkpoint(last_checkpoint, model,
                                              cfg.NUM_GPUS > 1, optimizer)
        start_epoch = checkpoint_epoch + 1
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        logger.info("Load from given checkpoint file.")
        checkpoint_epoch = cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            optimizer,
            inflation=cfg.TRAIN.CHECKPOINT_INFLATE,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
        start_epoch = checkpoint_epoch + 1
    else:
        start_epoch = 0

    # Create the video train and val loaders.
    train_loader = loader.construct_loader(cfg, "train")
    val_loader = loader.construct_loader(cfg, "val")

    # Create meters.
    if cfg.DETECTION.ENABLE:
        train_meter = AVAMeter(len(train_loader), cfg, mode="train")
        val_meter = AVAMeter(len(val_loader), cfg, mode="val")
    else:
        train_meter = TrainMeter(len(train_loader), cfg)
        val_meter = ValMeter(len(val_loader), cfg)

    # Perform the training loop.
    logger.info("Start epoch: {}".format(start_epoch + 1))

    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):
        # Shuffle the dataset.
        loader.shuffle_dataset(train_loader, cur_epoch)
        # Train for one epoch.
        train_epoch(train_loader, model, optimizer, train_meter, cur_epoch,
                    cfg)

        # Compute precise BN stats.
        if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0:
            calculate_and_update_precise_bn(train_loader, model,
                                            cfg.BN.NUM_BATCHES_PRECISE)

        # Save a checkpoint.
        if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD):
            cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch,
                               cfg)
        # Evaluate the model on validation set.
        if misc.is_eval_epoch(cfg, cur_epoch):
            eval_epoch(val_loader, model, val_meter, cur_epoch, cfg)
コード例 #13
0
ファイル: __init__.py プロジェクト: AK391/X3D
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.

from slowfast.utils.env import setup_environment

setup_environment()