def __init__(self, path, transform=None, queue_size=128): """ Initialize the class :param path: path to the video file :param transform: a function to transform the read images :param queue_size: the size of the queue """ setup_environment() # initialize the file video stream along with the boolean # used to indicate if the thread should be stopped or not self.stream = cv2.VideoCapture(path) self.stopped = False self.transform = transform # Get attributes of Video self.width = int(self.stream .get(cv2.CAP_PROP_FRAME_WIDTH)) self.height = int(self.stream .get(cv2.CAP_PROP_FRAME_HEIGHT)) self.frames_per_second = self.stream .get(cv2.CAP_PROP_FPS) self.num_frames = int(self.stream .get(cv2.CAP_PROP_FRAME_COUNT)) self.video_length_seconds = self.get_video_length_in_seconds(path) # initialize the queue used to store frames read from # the video file self.video_image_queue = Queue(maxsize=queue_size) # The idx of an image starting from 0 self.img_idx = -1 # intialize thread self.thread = Thread(target=self.update, args=()) self.thread.daemon = True
def __init__(self, cfg, input_queue=None, output_queue_vis=None, output_queue_action_pred=None, use_gpu=True, show_video=False): """ :param cfg: the prototype config :param input_queue: inut :param: output_queue: output :param use_gpu: (boolean) whether gpu should be use for inference """ setup_environment() # Setup logging format logging.setup_logging(cfg.OUTPUT_DIR) self.cfg = cfg self.show_video = show_video # The queues from the main thread used for multiprocessing self.output_detection_queue = input_queue if self.show_video: self.output_tracker_queue_visualization = output_queue_vis self.output_tracker_queue_action_recognition = output_queue_action_pred # Used to order images retrieved from the two queues used as input self.get_idx = -1 # Has the previous process terminated? self.first_poison_pill_received = False # True if self.first_poison_pill_received and get does not lead to any results and self.result_rank is empty self.output_detection_queue_is_finished = False # Whether we will use a gpu or not use_gpu = use_gpu and torch.cuda.is_available() # The tracker we will use for object detection self.deepsort = build_tracker(cfg, use_cuda=use_gpu) # A list that contains and is sorted by get_idxs (ascending) -> result_rank[0] is smallest get_idx self.result_rank = [] # A list that contains the images (ndarray) image with shape (H, W, C) (in BGR order) and [0,255]) self.result_img_data = [] # A list that contains the prediction results (predictions {dict}) and that is # also sorted by the get_idxs -> corresponding to result_rank self.result_prediction_data = [] # The process for person detection self.update_tracker_with_next_image_prcocess = mp.Process( target=self.update_tracker_with_next_image, args=())
def run_demo(cfg, progress_callback=None): """ :param cfg: :return: """ # Set up environment. setup_environment() # Setup logging format logging.setup_logging(cfg.OUTPUT_DIR) logger.info("=== Demo started ===") multi_process_demo = MultiProcessDemo(cfg, progress_callback) multi_process_demo.run_demo() logger.info("=== Demo finished ===")
def __init__(self, cfg, img_height, img_width, parallel=False, num_gpu=None, input_queue=None, output_queue=None, gpuid_action_recognition=None): """ Creates an Detectron2 based prediction class which is optimized for demo and should be used for it. The code is slightly modified from the original detectron2 demo content :param cfg: the config file for the prototype :param img_height: (int) the height of the input images :param img_width: (int) the width of input images :param parallel: (boolean) whether, we will do asynchronous computation :param num_gpu: (int) number of gpus we will use for asynchronous computation :param input_queue: (multiprocessing.queue) containing the input images (img_idx, image of shape (H, W, C) (in BGR order) and [0,255]) :param output_queue: (multiprocessing.queue) containing the computed predictions :param gpuid_action_recognition: (int) the gpuid for object tracking """ setup_environment() # Setup logging format logging.setup_logging(cfg.OUTPUT_DIR) # The cfg file for the prototype self.cfg = cfg # The original image resolution: used for resizing provided images self.img_height = img_height self.img_width = img_width # We only use the demo config self.detectron2_cfg_file = self.cfg.DETECTRON.DETECTION_MODEL_CFG self.detectron2_model_weights = self.cfg.DETECTRON.MODEL_WEIGHTS self.detectron2_score_tresh_test = self.cfg.DETECTRON.DEMO_PERSON_SCORE_THRESH # Load the detectron config self.detectron_config = self.setup_detectron_config() # Can be useful for displaying the object classes self.metadata = MetadataCatalog.get( self.detectron_config.DATASETS.TEST[0] if len(self.detectron_config.DATASETS.TEST) else "__unused") self.cpu_device = torch.device("cpu") # Determines whether we will use async processing self.parallel = parallel if self.parallel: # Used for async processing self.predictor = AsyncPredictor( self.cfg, self.detectron_config, self.img_height, self.img_width, num_gpus=num_gpu, input_queue=input_queue, output_queue=output_queue, gpuid_action_recognition=gpuid_action_recognition) # Used to count the frames provided for detect_persons self.provided_image_count = 0 self.buffer_size = self.predictor.default_buffer_size # In the original version this attribute was used to store # the images in chronological order as well as a counter that represents the size of the task_queue # attribute. Since we do not return the images, we only use it as a counter representing the task_queue and # thus insert a dummy int variable instead of an image, because it is more memory efficient self.frame_data = deque() else: # Use the modified predictor for the demo self.predictor = DemoDefaultPredictor(self.cfg, self.detectron_config, self.img_height, self.img_width)
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. setup_environment() # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc(): misc.log_model_info(model, cfg, is_train=False) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: # raise NotImplementedError("Unknown way to load checkpoint.") logger.info("Testing with random initialization. Only for debugging.") # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE test_meter = AVAMeter(len(test_loader), cfg, mode="test") else: assert ( len(test_loader.dataset) % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0 ) # Create meters for multi-view testing. test_meter = TestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), ) # # Perform multi-view test on the entire dataset. perform_test(test_loader, model, test_meter, cfg)
def __init__(self, cfg, progress_callback): # Set up environment. setup_environment() # Setup logging format logging.setup_logging(cfg.OUTPUT_DIR) logger.info("Demo with config:") logger.info(pprint.pformat(cfg)) # Prepare the input video for best demo results cfg.DEMO.VIDEO_SOURCE_PATH_AT_FPS = self.create_demo_video_at_target_framerate( cfg.DEMO.VIDEO_SOURCE_PATH, cfg.CUSTOM_DATASET.FRAME_RATE) self.cfg = cfg # An output folder for all demo-related output output_datetime = datetime.datetime.now().strftime("%Y-%m-%d_%H_%M_%S") self.cfg.DEMO.OUTPUT_FOLDER = os.path.join( self.cfg.CUSTOM_DATASET.DEMO_DIR, output_datetime) create_folder(self.cfg.DEMO.OUTPUT_FOLDER) logger.info("Created output-folder for demo results at: " + self.cfg.DEMO.OUTPUT_FOLDER) # (pyqtSignal) used for signaling back the progress for the GUI # We currently take the progress as the percentage of distributed images self.progress_callback = progress_callback # Used for extracting the data frames from the video file self.file_video_stream = FileVideoStream( self.cfg.DEMO.VIDEO_SOURCE_PATH_AT_FPS) self.video_file_name = Path(self.cfg.DEMO.VIDEO_SOURCE_PATH).stem # Whether we display our results self.use_video_visualizer = self.cfg.DEMO.VIDEO_SHOW_VIDEO_ENABLE or self.cfg.DEMO.VIDEO_EXPORT_VIDEO_ENABLE # Whether we export our output self.export_output = self.cfg.DEMO.EXPORT_EXPORT_RESULTS # The fps of the video video source self.frames_per_second = self.file_video_stream.frames_per_second self.video_length_seconds = self.file_video_stream.video_length_seconds # Information on the sampling requirements for the # video data self.sample_rate = self.cfg.DATA.SAMPLING_RATE self.num_frames = self.cfg.DATA.NUM_FRAMES self.seq_len = self.sample_rate * self.num_frames self.half_seq_len = int(self.seq_len / 2) self.half_seq_len_seconds = self.half_seq_len / self.frames_per_second # The seconds in the video that are suited for inference self.earliest_full_start_second = np.math.ceil( self.half_seq_len_seconds) self.final_full_second = math.floor( self.video_length_seconds) - math.ceil(self.half_seq_len_seconds) # Set the current_second to start. The current second is the second for which we make the prediction self.current_video_second = self.earliest_full_start_second # Used for telling the gui the progress of our distribute images function [0, final_full_second] seconds self.number_of_relevant_frames = (self.final_full_second + 1) * self.frames_per_second # The corresponding frame index to any middle_frame_timestamp of interest self.first_middle_frame_index = sec_to_frame( self.earliest_full_start_second, self.cfg, mode="demo") - 1 # Used to determine whether an index is a middle frame index for which action recognition is done self.current_middle_frame_index = self.first_middle_frame_index # The inference frame indices are sampled around the middle frame as defined for slowfast # when using ava_dataset. # Here we have indices. index = frame number - 1 self.inference_frame_indices = list( range(self.current_middle_frame_index + 1 - self.half_seq_len, self.current_middle_frame_index + 1 + self.half_seq_len, self.sample_rate)) # Indicates whether the main process should put the next image in the input_detection_queue self.next_image_in_relevant_range = self.current_video_second <= self.final_full_second # Multiprocessing configs: # How many cpus we have self.num_cpu = mp.cpu_count() # We have 5 processes in parallel in the simplest case of the demo # 1. Main, 2. Object Predictor, 3. Deep Sort Tracker, 4. Video Visualizer, 5. Action Recognizer self.num_occupied_processes = 5 assert self.num_cpu >= self.num_occupied_processes, "You need at least " + str( self.num_occupied_processes ) + " cores for the multiprocessing demo" self.free_cpu_cores = self.num_cpu - self.num_occupied_processes # How many gpus we have for the demo self.num_gpu = self.cfg.NUM_GPUS # How many gpus should be used for object detection (increasing number) self.num_gpu_object_detection = min(self.free_cpu_cores, self.num_gpu) # The gpuid for action recognition (decreasing or in our case last gpuid # We take the las possible gpuid for action recognition because this is beneficiary, if we have # less processes than free_cpu_cores (object detection and action recognition are separated this way) self.gpuid_action_recognition = self.num_gpu - 1 # The queue sizes as specified in the config files self.queue_size = self.cfg.DEMO.QSIZE_SECONDS * self.cfg.CUSTOM_DATASET.FRAME_RATE # Queues # Contains the original images with an idx each: # 1. img_idx (int) # 2. image of shape (H, W, C) (in BGR order) and [0,255]) self.input_detection_queue = mp.Queue(maxsize=self.queue_size) # Queue containing the detections per image in form # 1. img_idx (int), # 2. image of shape (H, W, C) (in BGR order) and [0,255]), # 3. predictions {dict}: a dict with the following keys # pred_boxes: tensor of shape num_predictions, 4 = # the coordinates of the predicted boxes [x1, y1, x2, y2]) --> if empty it is [] # scores: tensor of shape (num_predictions) containing the confidence scores [0,1]) --> if empty it is [] self.output_detection_queue = mp.Queue(maxsize=self.queue_size) # Contains the images with the corresponding ids and person_tracking_outputs -> used for visualization # 1. img_idx (int) # 2. image of shape (H, W, C) (in BGR order) and [0,255]) # 3. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number) # --> if empty it is a list [] self.output_tracker_queue_visualization = mp.Queue( maxsize=self.queue_size) # Contains the images with the corresponding ids and person_tracking_outputs -> used for action recognition # 1. img_idx (int) # 2. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number) # --> if empty it is a list [] self.output_tracker_queue_action_recognition = mp.Queue( maxsize=self.queue_size) # Contains the input for action_recognition (only for img_idxs that are middle_frames) # 1. current_video_second: (int) the current video second for which the prediction data is given # 2. img_idxs=current_middle_frame_index (int) the image img_idx, which is always the next middle_frame_index # 3. img_idx (int) = the idx of the current middle_frame # 4. image of shape (H, W, C) (in BGR order) and [0,255]) # It is bigger than the other queues self.input_action_recognition_queue = mp.Queue( maxsize=int(self.queue_size * 1.5)) # Contains the input for action_recognition (only for img_idxs that are middle_frames) # 1. img_idx (int), only for middle frames # 2. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number) # --> if empty it is a list [] # 3. pred_action_category_scores (ndarray float32) shape(num_person_ids, num_categories), # the scores for each person and each action category # --> if empty it is a list [] self.output_action_recognition_queue_visualization = mp.Queue( maxsize=self.queue_size) # Contains the input for action_recognition (only for img_idxs that are middle_frames) # 1. current_video_second: (int) the current video second for which the prediction data is given # 2. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number) # --> if empty it is a list [] # 3. pred_action_category_scores (ndarray float32) shape(num_person_ids, num_categories), # the scores for each person and each action category # --> if empty it is a list [] self.output_action_recognition_queue_result_export = mp.Queue( maxsize=int(self.video_length_seconds * self.frames_per_second)) # A list of dicts that contains detected middle_frame_seconds self.middle_frame_seconds = [] # The detectron2_object_predictor_class for person detection self.object_predictor = DemoDetectron2ObjectPredictor( self.cfg, self.file_video_stream.height, self.file_video_stream.width, parallel=True, num_gpu=self.num_gpu_object_detection, input_queue=self.input_detection_queue, output_queue=self.output_detection_queue, gpuid_action_recognition=self.gpuid_action_recognition) # The deep sort tracker class for person tracking self.deep_sort_tracker = DeepSortTracker( self.cfg, input_queue=self.output_detection_queue, output_queue_vis=self.output_tracker_queue_visualization, output_queue_action_pred=self. output_tracker_queue_action_recognition, show_video=self.use_video_visualizer) # The action recognition class self.action_recognizer = ActionRecognizer( self.cfg, self.file_video_stream.height, self.file_video_stream.width, model_device=self.gpuid_action_recognition, first_middle_frame_index=self.first_middle_frame_index, sample_rate=self.sample_rate, half_seq_len=self.half_seq_len, current_video_second=self.current_video_second, input_queue_tracker=self.output_tracker_queue_action_recognition, input_queue_images=self.input_action_recognition_queue, output_queue=self.output_action_recognition_queue_visualization, output_action_recognition_queue_result_export=self. output_action_recognition_queue_result_export) if self.export_output: # Our demo meter to store and finally print the results self.demo_meter = DemoMeter(self.cfg, self.file_video_stream.height, self.file_video_stream.width) # Used to control the completeness of our export self.current_export_second = self.earliest_full_start_second - 1 if self.use_video_visualizer: self.demo_visualizer = VideoVisualizer( self.cfg, self.file_video_stream.height, self.first_middle_frame_index, self.frames_per_second, input_detection_queue=self.input_detection_queue, output_detection_queue=self.output_detection_queue, output_tracker_queue_visualization=self. output_tracker_queue_visualization, output_tracker_queue_action_recognition=self. output_tracker_queue_action_recognition, input_action_recognition_queue=self. input_action_recognition_queue, output_action_recognition_queue_visualization=self. output_action_recognition_queue_visualization, output_action_recognition_queue_result_export=self. output_action_recognition_queue_result_export)
def __init__(self, cfg, img_height, first_middle_frame_index, frames_per_second, input_detection_queue=None, output_detection_queue=None, output_tracker_queue_visualization=None, output_tracker_queue_action_recognition=None, input_action_recognition_queue=None, output_action_recognition_queue_visualization=None, output_action_recognition_queue_result_export=None): """ Initialize the object :param cfg: our demo config :param img_height: (int) the height of the image :param first_middle_frame_index: (int) the index of the first middle_frame index :param frames_per_second: (float) the fps of the video -> required for determining middle frames :param input_detection_queue: please refer to class MultiProcessDemo :param output_detection_queue: please refer to class MultiProcessDemo :param output_tracker_queue_visualization: please refer to class MultiProcessDemo :param output_tracker_queue_action_recognition: please refer to class MultiProcessDemo :param input_action_recognition_queue: please refer to class MultiProcessDemo :param output_action_recognition_queue_visualization: please refer to class MultiProcessDemo :param output_action_recognition_queue_result_export: please refer to class MultiProcessDemo """ setup_environment() # Setup logging format logging.setup_logging(cfg.OUTPUT_DIR) self.cfg = cfg # The name of the input video self.demo_video_name = Path(self.cfg.DEMO.VIDEO_SOURCE_PATH).stem # Whether we will export an image self.export_video = self.cfg.DEMO.VIDEO_EXPORT_VIDEO_ENABLE if self.export_video: # number of digits for exporting the images (determines how many images can be stored) self.number_of_digits_for_image_export = 10 # The path of the to be created video self.export_video_path = os.path.join( self.cfg.DEMO.OUTPUT_FOLDER, self.demo_video_name + "_annotated.mp4") # Whether we will display an image self.display_video = self.cfg.DEMO.VIDEO_SHOW_VIDEO_ENABLE self.cv2_display_name = "Demo: " + self.demo_video_name # Whether we will display the meta information (Queues Sizes and img idx) self.display_meta_info = cfg.DEMO.VIDEO_SHOW_VIDEO_DEBUGGING_INFO # Used for finding the position of meta info self.img_height = img_height # Used for determining middle_frame_indices (they have the action prediction) self.first_middle_frame_index = first_middle_frame_index self.frames_per_second = frames_per_second # Additional options for displaying the video self.video_display_scaling_factor = cfg.DEMO.VIDEO_DISPLAY_SCALING_FACTOR self.video_action_display_duration_milliseconds = cfg.DEMO.VIDEO_ACTION_DISPLAY_DURATION_MILLISECONDS # The queues containing relevant information self.input_detection_queue = input_detection_queue self.output_detection_queue = output_detection_queue, self.output_tracker_queue_visualization = output_tracker_queue_visualization self.output_tracker_queue_action_recognition = output_tracker_queue_action_recognition, self.input_action_recognition_queue = input_action_recognition_queue self.output_action_recognition_queue_visualization = output_action_recognition_queue_visualization self.output_action_recognition_queue_result_export = output_action_recognition_queue_result_export # The queue sizes as specified in the config files self.queue_size = self.cfg.DEMO.QSIZE_SECONDS * self.cfg.CUSTOM_DATASET.FRAME_RATE # Used for terminating the process successfully self.action_recognition_input_finished = False # The information for displaying actions # Load the categories: self.path_to_label_map_file = os.path.join(cfg.CUSTOM_DATASET.ANNOTATION_DIR, cfg.CUSTOM_DATASET.LABEL_MAP_FILE) \ if not os.path.isfile(cfg.ACTIONRECOGNIZER.LABEL_MAP_FILE) \ else cfg.ACTIONRECOGNIZER.LABEL_MAP_FILE # List of dicts (id, name) self.action_categories, _ = read_labelmap(self.path_to_label_map_file) # A color value for every category self.palette_actions = np.random.randint( 64, 128, (len(self.action_categories), 3)).tolist() # The information required for displaying person_tracking info self.palette_person_ids = (2**11 - 1, 2**15 - 1, 2**20 - 1) # The process for person detection self.display_next_frame_process = mp.Process( target=self.display_and_or_export_next_frame, args=()) # Used to test the correct order of images self.display_img_idx = -1 # The information for action info display self.current_action_output_img_idx = "" self.current_pred_action_category_scores = ""
def benchmark_data_loading(cfg): """ Benchmark the speed of data loading in PySlowFast. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. setup_environment() # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Benchmark data loading with config:") logger.info(pprint.pformat(cfg)) timer = Timer() dataloader = loader.construct_loader(cfg, "train") logger.info("Initialize loader using {:.2f} seconds.".format( timer.seconds())) # Total batch size across different machines. batch_size = cfg.TRAIN.BATCH_SIZE * cfg.NUM_SHARDS log_period = cfg.BENCHMARK.LOG_PERIOD epoch_times = [] # Test for a few epochs. for cur_epoch in range(cfg.BENCHMARK.NUM_EPOCHS): timer = Timer() timer_epoch = Timer() iter_times = [] for cur_iter, _ in enumerate(tqdm.tqdm(dataloader)): if cur_iter > 0 and cur_iter % log_period == 0: iter_times.append(timer.seconds()) ram_usage, ram_total = misc.cpu_mem_usage() logger.info( "Epoch {}: {} iters ({} videos) in {:.2f} seconds. " "RAM Usage: {:.2f}/{:.2f} GB.".format( cur_epoch, log_period, log_period * batch_size, iter_times[-1], ram_usage, ram_total, )) timer.reset() epoch_times.append(timer_epoch.seconds()) ram_usage, ram_total = misc.cpu_mem_usage() logger.info( "Epoch {}: in total {} iters ({} videos) in {:.2f} seconds. " "RAM Usage: {:.2f}/{:.2f} GB.".format( cur_epoch, len(dataloader), len(dataloader) * batch_size, epoch_times[-1], ram_usage, ram_total, )) logger.info( "Epoch {}: on average every {} iters ({} videos) take {:.2f}/{:.2f} " "(avg/std) seconds.".format( cur_epoch, log_period, log_period * batch_size, np.mean(iter_times), np.std(iter_times), )) logger.info("On average every epoch ({} videos) takes {:.2f}/{:.2f} " "(avg/std) seconds.".format( len(dataloader) * batch_size, np.mean(epoch_times), np.std(epoch_times), ))
def __init__(self, cfg, img_height, img_width): """ Initialize the DemoMeter with the relevant paramters :param cfg: :param img_height: (int) the height of the input images :param img_width: (int) the width of input images """ # Set up environment. setup_environment() # Setup logging format logging.setup_logging(cfg.OUTPUT_DIR) self.cfg = cfg # In the case of an AVA-like predictor it is necessary to specify a # cfg.ACTIONRECOGNIZER.LABEL_MAP_FILE, because it comprises all 80 categories. # during the ava challenge only 60 categories were evaluated, an we # want alle categories path_to_label_map_file = os.path.join(cfg.CUSTOM_DATASET.ANNOTATION_DIR, cfg.CUSTOM_DATASET.LABEL_MAP_FILE) \ if not os.path.isfile(cfg.ACTIONRECOGNIZER.LABEL_MAP_FILE) \ else cfg.ACTIONRECOGNIZER.LABEL_MAP_FILE # Export properties datetime_for_filenames = datetime.datetime.now().strftime( "%Y-%m-%d_%H_%M_%S") self.delimiter = "," assert cfg.DEMO.OUTPUT_FOLDER != "", "Please specify cfg.DEMO.OUTPUT_FOLDER to be able to export the output" self.output_dir_path = self.cfg.DEMO.OUTPUT_FOLDER self.file_name_demo_log = datetime_for_filenames + "_" + "demo_log" self.file_name_demo_gt_like_file = datetime_for_filenames + "_" + "demo_gt_format" self.results_gt_like_csv_path = os.path.join( self.output_dir_path, self.file_name_demo_gt_like_file + ".csv") self.results_log_path_prefix = os.path.join(self.output_dir_path, self.file_name_demo_log) self.results_log_csv_path = "" self.results_xes_path = "" # The minimum score for a predicted category to be exported self.min_category_export_score = cfg.DEMO.EXPORT_MIN_CATEGORY_EXPORT_SCORE # Whether a person can do multiple actions at the same time or not # This influences the export options, since only the option with max value is chosen self.multiple_action_possible = cfg.CUSTOM_DATASET.MULTIPLE_ACTION_POSSIBLE # Resolution used for export self.img_height = img_height self.img_width = img_width # List of dict with items "id" and "name" self.categories, _ = read_labelmap(path_to_label_map_file) # Replace delimiter out of category_name to guarantee good csv export for idx in range(0, len(self.categories)): self.categories[idx]["name"] = self.categories[idx][ "name"].replace(self.delimiter, "") # The list-variables we use to store the demo prediction results. # They will be used to export the information into csv or xes self.res_person_tracking_outputs = [] self.res_pred_action_category_scores = [] self.res_all_metadata = [] self.res_case_ids = [] # This df is used to assign correct case_concept_name and concept_instance values self.case_and_instance_df = self.create_empty_case_and_instance_df() # Used to indicate that the activity instance has not yet completed self.video_second_not_complete = -1 # Used as test for correct functionality of lifecycle_transition self.not_closed_list = []
def benchmark_data(cfg): # Set up environment. setup_environment() # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Benchmark data loading with config:") logger.info(pprint.pformat(cfg)) timer = Timer() dataloader = loader.construct_loader(cfg, "train") logger.info("Initialize loader using {:.2f} seconds.".format( timer.seconds())) batch_size = cfg.TRAIN.BATCH_SIZE log_period = cfg.BENCHMARK.LOG_PERIOD epoch_times = [] # Test for a few epochs. for cur_epoch in range(cfg.BENCHMARK.NUM_EPOCHS): timer = Timer() timer_epoch = Timer() iter_times = [] for cur_iter, _ in enumerate(tqdm.tqdm(dataloader)): if cur_iter > 0 and cur_iter % log_period == 0: iter_times.append(timer.seconds()) vram = psutil.virtual_memory() logger.info( "Epoch {}: {} iters ({} videos) in {:.2f} seconds. " "RAM Usage: {:.2f}/{:.2f} GB.".format( cur_epoch, log_period, log_period * batch_size, iter_times[-1], (vram.total - vram.available) / 1024**3, vram.total / 1024**3, )) timer.reset() epoch_times.append(timer_epoch.seconds()) vram = psutil.virtual_memory() logger.info( "Epoch {}: in total {} iters ({} videos) in {:.2f} seconds. " "RAM Usage: {:.2f}/{:.2f} GB.".format( cur_epoch, len(dataloader), len(dataloader) * batch_size, epoch_times[-1], (vram.total - vram.available) / 1024**3, vram.total / 1024**3, )) logger.info( "Epoch {}: on average every {} iters ({} videos) take {:.2f}/{:.2f} " "(avg/std) seconds.".format( cur_epoch, log_period, log_period * batch_size, np.mean(iter_times), np.std(iter_times), )) logger.info("On average every epoch ({} videos) takes {:.2f}/{:.2f} " "(avg/std) seconds.".format( len(dataloader) * batch_size, np.mean(epoch_times), np.std(epoch_times), ))
def __init__(self, cfg, img_height, img_width, model_device, first_middle_frame_index, sample_rate, half_seq_len, current_video_second, input_queue_tracker=None, input_queue_images=None, output_queue=None, output_action_recognition_queue_result_export=None): """ Initialize the ActionRecognizer :param cfg: the prototype config :param img_height: (int) the height of the images :param img_width: (int) the width of the images :param model_device: (int) the GPU-ID to which to transfer the model to :param first_middle_frame_index: (int) the index of the first middle_frame corresponding to current_video_second :param sample_rate: (int) the sample rate :param half_seq_len: (int) the half length of a sequence, where each sequence has a defined length and comprises the relevant images for action prediction :param current_video_second: (int) the video second, corresponding to the first_middle_frame_index :param input_queue_tracker: the queue that provides the person tracking outputs :param input_queue_images: the queue that provides the images for action inference (only middle frames) :param output_queue: the queue that stores the predicted categories with the corresponding people """ setup_environment() # Setup logging format logging.setup_logging(cfg.OUTPUT_DIR) self.cfg = cfg self.show_video = self.cfg.DEMO.VIDEO_SHOW_VIDEO_ENABLE or self.cfg.DEMO.VIDEO_EXPORT_VIDEO_ENABLE self.model_device = model_device # Build the video model and print model statistics. self.activity_prediction_model = build_model_for_demo( self.cfg, self.model_device) # Load the pretrained model used for demo cu.load_demo_checkpoint(self.cfg, self.activity_prediction_model) # Set model to eval mode self.activity_prediction_model.eval() # Register the queues self.output_tracker_queue_action_recognition = input_queue_tracker self.input_action_recognition_queue = input_queue_images self.output_action_recognition_queue_visualization = output_queue self.output_action_recognition_queue_result_export = output_action_recognition_queue_result_export # Relevant information for image preprocessing self.img_height = img_height self.img_width = img_width # The short size of our images is scaled to this size self.crop_size = cfg.DATA.TEST_CROP_SIZE self.data_mean = cfg.DATA.MEAN self.data_std = cfg.DATA.STD # This is very important. Our images are in BGR format from thread_video_reader # Note that Kinetics pre-training uses RGB, which may require changing our # BGR images (only for inference) to RGB self.use_bgr = cfg.ACTIONRECOGNIZER.BGR # The process for person detection self.recognize_actions_process = mp.Process( target=self.recognize_actions_multi_processing, args=()) # A list that stores all image_idx data from queue and is sorted by get_idxs (ascending) # -> image_idx_from_queue[0] is smallest image_idx self.image_idx_from_queue = [] # A list that stores the corresponding image data also from queue. It is also sorted by the image_idx # -> corresponding to image_idx_from_queue self.image_data_from_queue = [] # Stores the relevant image_idx for an action prediction, sorted by image_idx self.image_idx_for_prediction = [] # Stores the relevant image data for an action prediction, sorted by image_idx from image_idx_for_prediction self.image_data_for_prediction = [] ######### All the relevant data for retrieving the process data in the correct form # The corresponding frame index to any middle_frame_timestamp of interest self.first_middle_frame_index = first_middle_frame_index # Used to determine whether an index is a middle frame index for which action recognition is done self.current_middle_frame_index = self.first_middle_frame_index # Used to test the validity of a to be added image_idx self.last_image_idx = -1 self.sample_rate = sample_rate self.half_seq_len = half_seq_len self.current_video_second = current_video_second # The inference frame indices are sampled around the middle frame as defined for slowfast # when using ava_dataset. # Here we have indices. index = frame number - 1 self.inference_frame_indices = list( range(self.current_middle_frame_index + 1 - self.half_seq_len, self.current_middle_frame_index + 1 + self.half_seq_len, self.sample_rate)) # The length of our "raw" data list has to be equal to this value self.batch_size = len(self.inference_frame_indices)
def train(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. setup_environment() # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Print config. logger.info("Train with config:") logger.info(pprint.pformat(cfg)) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc(): misc.log_model_info(model, cfg, is_train=True) # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) # Load a checkpoint to resume training if applicable. if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR): logger.info("Load from last checkpoint.") last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) checkpoint_epoch = cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer) start_epoch = checkpoint_epoch + 1 elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": logger.info("Load from given checkpoint file.") checkpoint_epoch = cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, optimizer, inflation=cfg.TRAIN.CHECKPOINT_INFLATE, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) start_epoch = checkpoint_epoch + 1 else: start_epoch = 0 # Create the video train and val loaders. train_loader = loader.construct_loader(cfg, "train") val_loader = loader.construct_loader(cfg, "val") # Create meters. if cfg.DETECTION.ENABLE: train_meter = AVAMeter(len(train_loader), cfg, mode="train") val_meter = AVAMeter(len(val_loader), cfg, mode="val") else: train_meter = TrainMeter(len(train_loader), cfg) val_meter = ValMeter(len(val_loader), cfg) # Perform the training loop. logger.info("Start epoch: {}".format(start_epoch + 1)) for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): # Shuffle the dataset. loader.shuffle_dataset(train_loader, cur_epoch) # Train for one epoch. train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg) # Compute precise BN stats. if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0: calculate_and_update_precise_bn(train_loader, model, cfg.BN.NUM_BATCHES_PRECISE) # Save a checkpoint. if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD): cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg) # Evaluate the model on validation set. if misc.is_eval_epoch(cfg, cur_epoch): eval_epoch(val_loader, model, val_meter, cur_epoch, cfg)
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. from slowfast.utils.env import setup_environment setup_environment()