def start_input_processing(self, process_function): for lecture in self.database.lectures: self.current_lecture = lecture m_videos, lecture_file, skip = self.get_lecture_params(lecture) if skip: continue # read temporal file if self.input_temp_prefix is None: # null-input process (convenient way to process lectures) input_data = None else: if not isinstance(self.input_temp_prefix, list): input_data = MiscHelper.dump_load(self.temp_dir + '/' + self.input_temp_prefix + lecture_file) else: input_data = [] for temp_prefix in self.input_temp_prefix: input_data.append( MiscHelper.dump_load(self.temp_dir + '/' + temp_prefix + lecture_file)) # execute the actual process .... timer = TimeHelper() timer.startTimer() results = process_function(self, input_data) timer.endTimer() print("Process Finished in: " + timer.totalElapsedStamp()) # save results if self.output_temp_prefix is not None: if not isinstance(self.output_temp_prefix, list): MiscHelper.dump_save( results, self.temp_dir + '/' + self.output_temp_prefix + lecture_file) else: for out_idx, temp_prefix in enumerate( self.output_temp_prefix): MiscHelper.dump_save( results[out_idx], self.temp_dir + '/' + temp_prefix + lecture_file)
def FromUniformSample(database, lecture, step, sample_name, binary_source): # load output from pipeline ... lecture_suffix = str(lecture.id) + ".dat" # load binary images tempo_binary_filename = database.output_temporal + "/" + binary_source + lecture_suffix binary_data = MiscHelper.dump_load(tempo_binary_filename) original_frame_times, frame_indices, frame_compressed = binary_data # take a sample frame_times = [time for time in original_frame_times[::step]] frame_indices = [idx for idx in frame_indices[::step]] frame_compressed = [frame for frame in frame_compressed[::step]] print("Expanding loaded frames .... ") binary_frames = Helper.decompress_binary_images(frame_compressed) # segments .... output_prefix = database.output_summaries + "/" + sample_name + "_" + database.name + "_" + lecture.title.lower( ) print("Saving data to: " + output_prefix) # in abs frame indices ... intervals = [] abs_intervals = [] for idx, comp_frame in enumerate(binary_frames): if idx == 0: curr_start = int(frame_indices[idx] / 2) abs_start = frame_times[idx] / 2.0 else: curr_start = int( (frame_indices[idx - 1] + frame_indices[idx]) / 2) abs_start = (frame_times[idx - 1] + frame_times[idx]) / 2.0 if idx + 1 < len(frame_indices): curr_end = int( (frame_indices[idx + 1] + frame_indices[idx]) / 2) abs_end = (frame_times[idx + 1] + frame_times[idx]) / 2.0 else: curr_end = frame_indices[idx] abs_end = frame_times[idx] # invert binarization ... binary_frames[idx] = 255 - comp_frame intervals.append((curr_start, curr_end)) abs_intervals.append((abs_start, abs_end)) KeyframeExporter.Export(output_prefix, database, lecture, intervals, abs_intervals, frame_indices, frame_times, binary_frames)
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config".format(sys.argv[0])) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return # get paths and other configuration parameters .... output_dir = config.get_str("OUTPUT_PATH") features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR") classifier_dir = output_dir + "/" + config.get_str( "SPEAKER_ACTION_CLASSIFIER_DIR") os.makedirs(classifier_dir, exist_ok=True) classifier_filename = classifier_dir + "/" + config.get_str( "SPEAKER_ACTION_CLASSIFIER_FILENAME") dataset_name = config.get("SPEAKER_TRAINING_SET_NAME") training_set = database.datasets[dataset_name] training_titles = [lecture.title.lower() for lecture in training_set] # get classifier parameters rf_n_trees = config.get_int("SPEAKER_ACTION_CLASSIFIER_RF_TREES", 64) rf_depth = config.get_int("SPEAKER_ACTION_CLASSIFIER_RF_DEPTH", 16) # read all training data available .... train_dataset = {} for lecture in training_set: input_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle" train_dataset[lecture.title.lower()] = MiscHelper.dump_load( input_filename) train_x, train_y, train_frame_infos = PoseFeatureExtractor.combine_datasets( training_titles, train_dataset) # classify and confusion matrix part clf = RandomForestClassifier(n_estimators=rf_n_trees, max_depth=rf_depth, random_state=0) clf = clf.fit(train_x, train_y) MiscHelper.dump_save(clf, classifier_filename)
def prepare(args): # load database try: database = MetaDataDB.from_file(args['database']) except: print("Invalid AccessMath database file") return TangentV_Helper.VisualizerServer = database.indexing.visualization_server # ... Load 3D structures .... print("Loading CC indices per lecture ... ") for lecture in database.lectures: struct_filename = database.output_temporal + '/' + Parameters.Output_ST3D + str(lecture.id) + ".dat" TangentV_Helper.cache_3DSTs[lecture.title] = MiscHelper.dump_load(struct_filename)
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config".format(sys.argv[0])) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return # get paths and other configuration parameters .... output_dir = config.get_str("OUTPUT_PATH") output_segment_dir = output_dir + "/" + config.get("SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR") dataset_name = config.get("SPEAKER_TRAINING_SET_NAME") training_set = database.datasets[dataset_name] # prepare the feature extractor ... feature_points = config.get("SPEAKER_ACTION_FEATURE_POINTS") segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH", 15) feat_extractor = PoseFeatureExtractor(feature_points, segment_length) features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR") os.makedirs(features_dir, exist_ok=True) # for each file ... get features ... for lecture in training_set: input_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle" output_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle" lecture_pose_segments = MiscHelper.dump_load(input_filename) vid_data = feat_extractor.get_feature_dataset(lecture_pose_segments) MiscHelper.dump_save(vid_data, output_filename) return
def ExportVideo(database, lecture, binary_source, video_prefix, invert_binary, skip_interpolation=0, workers=7, block_size=100): # .... paths ... lecture_sufix = str(lecture.id) + ".dat" tempo_binary_filename = database.output_temporal + "/" + binary_source + lecture_sufix lecture_str = video_prefix + "_" + database.name + "_" + lecture.title.lower( ) temporary_prefix = database.output_images + "/" + lecture_str + "_" first_video_filename = lecture.main_videos[0]["path"] # load binary images binary_data = MiscHelper.dump_load(tempo_binary_filename) original_frame_times, frame_indices, frame_compressed = binary_data print("Generating Temporary Files") with ThreadPoolExecutor(max_workers=workers) as executor: n_blocks = int(math.ceil(frame_indices[-1] / block_size)) pref_list = [temporary_prefix] * n_blocks frame_list = [frame_compressed] * n_blocks idx_list = [frame_indices] * n_blocks inv_list = [invert_binary] * n_blocks start_list = [idx * block_size for idx in range(n_blocks)] block_list = [block_size] * n_blocks skip_list = [skip_interpolation] * n_blocks total_frames = n_blocks * block_size for idx, _ in enumerate( executor.map(KeyframeExporter.ExpandGenerateSaveTemp, pref_list, frame_list, idx_list, inv_list, start_list, block_list, skip_list)): prc_progress = ((idx + 1) * 100) / n_blocks print("-> Exporting: {0:.4f}% (Block {1:d} of {2:d})".format( prc_progress, idx + 1, n_blocks), end="\r", flush=True) print("", flush=True) # find source sampling frames per second capture = cv2.VideoCapture(first_video_filename) video_fps = capture.get(cv2.CAP_PROP_FPS) source_videos_str = " ".join( ["-i " + video["path"] for video in lecture.main_videos]) audio_filter_complex = " ".join([ "[{0:d}:a:0]".format(idx + 1) for idx in range(len(lecture.main_videos)) ]) audio_filter_complex += " concat=n={0:d}:v=0:a=1 [audio]".format( len(lecture.main_videos)) video_output = database.output_videos + "/" + lecture_str + ".mp4" input_framerate = video_fps output_framerate = video_fps video_inputs = "-hwaccel dxva2 -framerate {0:.2f} -start_number 0 -i {1:s}%d.png".format( input_framerate, temporary_prefix) audio_inputs = "{0:s} -filter_complex \"{1:s}\"".format( source_videos_str, audio_filter_complex) output_flags = "-pix_fmt yuv420p -vf \"scale=trunc(iw/2)*2:trunc(ih/2)*2\" " output_flags += "-s:v 1920x1080 -codec:v mpeg4 -c:v libx264 -r {0:s} -shortest".format( str(output_framerate)) export_command = "ffmpeg -y {0:s} {1:s} -map 0:0 -map \"[audio]\" {2:s} {3:s}" export_command = export_command.format(video_inputs, audio_inputs, output_flags, video_output) # generate video using ffmpeg .... print("Saving data to: " + video_output) print(export_command) os.system(export_command) # delete temporary images print("Deleting Temporary Files") for idx in range(total_frames): os.remove(temporary_prefix + str(idx) + ".png")
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config [gt_labels]".format(sys.argv[0])) print("\n\tWhere:") print( "\tgt_lablels:\t(Optional) Set to 1 to use Ground Truth labels instead of predictions" ) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] valid_actions = config.get("SPEAKER_VALID_ACTIONS") # get the paths to the outputs from previous scripts .... output_dir = config.get_str("OUTPUT_PATH") output_segment_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR") action_class_probabilities_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR") output_bboxes_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR") os.makedirs(output_bboxes_dir, exist_ok=True) remove_confidence = config.get("SPEAKER_REMOVE_JOINT_CONFIDENCE") speaker_right_handed = config.get("SPEAKER_IS_RIGHT_HANDED") n_joints_body = 25 n_joints_hand = 21 if len(sys.argv) >= 3: use_ground_truth = int(sys.argv[2]) > 0 else: use_ground_truth = False col_name = [ 'frame_id', ('ground_truth' if use_ground_truth else 'pred_label'), 'body_xmin', 'body_xmax', 'body_ymin', 'body_ymax', 'rh_xmin', 'rh_xmax', 'rh_ymin', 'rh_ymax' ] segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH") # load data + label for lecture in testing_set: input_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle" lec_segments = MiscHelper.dump_load(input_filename) if use_ground_truth: labels = lec_segments.get_all_labels() else: input_proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + lecture.title + ".csv" _, _, labels, _ = ResultReader.read_actions_probabilities_file( input_proba_filename, valid_actions) output_filename = output_bboxes_dir + "/" + database.name + "_" + lecture.title + ".csv" output_file = ResultRecorder(output_filename) output_file.write_headers(col_name) # get bbox for skeleton and right hands from all segments frames = [] segment_labels = [] body_bbox = [] rh_bbox = [] for ind in range(0, len(lec_segments.segments)): # get the pose data ... if not remove_confidence: # the data contains confidence ... which needs to be removed at this point ... base_pose_data = lec_segments.segments[ind].pose_data total_joints = n_joints_body + n_joints_hand * 2 seg_pose_data = np.zeros( (base_pose_data.shape[0], total_joints * 2), dtype=base_pose_data.dtype) seg_pose_data[:, ::2] = base_pose_data[:, ::3] seg_pose_data[:, 1::2] = base_pose_data[:, 1::3] else: # confidence has been removed .... seg_pose_data = lec_segments.segments[ind].pose_data body_features = seg_pose_data[:, 0:n_joints_body * 2] if speaker_right_handed: # get right hand data rh_features = seg_pose_data[:, (n_joints_body + n_joints_hand) * 2:] else: # use left hand data rh_features = seg_pose_data[:, n_joints_body * 2:(n_joints_body + n_joints_hand) * 2] # get body bboxes and add to the list .... temp_body_bbox = PoseSegmentData.get_bbox_frame_data( body_features, 2) body_bbox += temp_body_bbox.tolist() # get hand bboxes and add to the list .... temp_rh_bbox = PoseSegmentData.get_bbox_frame_data(rh_features, 2) rh_bbox += temp_rh_bbox.tolist() # add frame range .... f_start = lec_segments.segments[ind].frame_start f_end = lec_segments.segments[ind].frame_end temp_frames = list(range(f_start, f_end + 1)) frames += temp_frames # add label .... temp_label = [[labels[ind]] for _ in range(segment_length) ] # remove seg_len, you don't need this segment_labels += temp_label paras = frames, segment_labels, body_bbox, rh_bbox output_file.record_results(paras)
def main(): # usage check if len(sys.argv) < 2: print("Usage:") print("") print("\tpython {0:s} config [gt_labels]".format(sys.argv[0])) print("") print("Where") print("\tconfig:\tPath to AccessMath configuration file") print("\tgt_labels:\tuse ground truth action labels (Default= False)") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return output_dir = config.get_str("OUTPUT_PATH") output_bboxes_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR") video_metadata_dir = output_dir + "/" + config.get_str( "SPEAKER_ACTION_VIDEO_META_DATA_DIR") fg_mask_dir = output_dir + "/" + config.get_str( "SPEAKER_FG_ESTIMATION_MASK_DIR") os.makedirs(fg_mask_dir, exist_ok=True) dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] speaker_exp_factor = config.get_float( "SPEAKER_FG_ESTIMATION_SPK_EXPANSION_FACTOR") min_mask_frames = config.get_int("SPEAKER_FG_ESTIMATION_MIN_MASK_FRAMES") mask_exp_radius = config.get_int( "SPEAKER_FG_ESTIMATION_MASK_EXPANSION_RADIUS") if len(sys.argv) >= 3: use_ground_truth = int(sys.argv[2]) > 0 else: use_ground_truth = False for current_lecture in testing_set: bbox_filename = output_bboxes_dir + "/" + database.name + "_" + current_lecture.title + ".csv" frame_idxs, actions, body_bboxes, rh_bboxes = ResultReader.read_bbox_file( bbox_filename, use_ground_truth) info_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" video_info = MiscHelper.dump_load(info_filename) fg_estimator = ForegroundEstimator(video_info["width"], video_info["height"], speaker_exp_factor, min_mask_frames, mask_exp_radius) fg_mask = fg_estimator.get_mask(frame_idxs, actions, body_bboxes, rh_bboxes) # cv2.imshow(current_lecture.id, fg_mask) # cv2.waitKey() flag, raw_data = cv2.imencode(".png", fg_mask) output_filename = fg_mask_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" MiscHelper.dump_save(raw_data, output_filename)
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config".format(sys.argv[0])) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return # get paths and other configuration parameters .... output_dir = config.get_str("OUTPUT_PATH") features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR") action_class_output_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_OUTPUT_DIR") action_class_probabilities_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR") os.makedirs(action_class_output_dir, exist_ok=True) os.makedirs(action_class_probabilities_dir, exist_ok=True) dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] # load the saved model for action classification ... classifier_dir = output_dir + "/" + config.get_str( "SPEAKER_ACTION_CLASSIFIER_DIR") classifier_filename = classifier_dir + "/" + config.get_str( "SPEAKER_ACTION_CLASSIFIER_FILENAME") clf = MiscHelper.dump_load(classifier_filename) csv_col = ['frame_start', 'frame_end', 'prediction'] for lecture in testing_set: input_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle" output_actions_filename = action_class_output_dir + "/" + database.name + "_" + lecture.title + ".csv" output_proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + lecture.title + ".csv" # load data ... data_xy = MiscHelper.dump_load(input_filename) # classier predict .... test_x = data_xy["features"] y_pred = clf.predict(test_x) y_pred_re = y_pred.reshape((y_pred.shape[0], 1)) # save prediction result output_csv = ResultRecorder(output_actions_filename) output_csv.write_headers(csv_col) # the function accepts a list of columns to save on CSV ... # by transposing, we make the standard list of rows into a list of columns for the function ... paras = np.hstack((data_xy["frame_infos"], y_pred[:, None])).transpose() output_csv.record_results(paras) # save label probabilities all_classes = clf.classes_ y_prob = clf.predict_proba(test_x) infos = np.concatenate((y_pred_re, y_prob), axis=1) output_csv = ResultRecorder(output_proba_filename) output_csv.write_headers(csv_col + all_classes.tolist()) # ... IDEM .... paras = np.hstack((data_xy["frame_infos"], infos)).transpose() output_csv.record_results(paras)
def main(): # usage check if len(sys.argv) < 2: print("Usage:") print("") print("\tpython {0:s} config".format(sys.argv[0])) print("") print("Where") print("\tconfig:\tPath to AccessMath configuration file") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return output_dir = config.get_str("OUTPUT_PATH") temporal_segments_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR") keyframes_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_KEYFRAMES_DIR") os.makedirs(keyframes_dir, exist_ok=True) dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] for current_lecture in testing_set: # read segment data .... input_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" video_segment_data = MiscHelper.dump_load(input_filename) # key-frames that must be extracted from video ... segments, keyframes_per_segment = video_segment_data all_keyframes = [] for segment_keyframes in keyframes_per_segment: all_keyframes += [ keyframe_idx for keyframe_idx, bbox in segment_keyframes ] print("") print("processing: " + current_lecture.title) # print(all_keyframes) # the simple frame sampling worker .. worker = SimpleFrameSampler() # main video file names m_videos = [ config.get_str("VIDEO_FILES_PATH") + "/" + video["path"] for video in current_lecture.main_videos ] # execute the actual process .... processor = SequentialVideoSampler(m_videos, all_keyframes) if "forced_width" in current_lecture.parameters: processor.force_resolution( current_lecture.parameters["forced_width"], current_lecture.parameters["forced_height"]) processor.doProcessing(worker, 0, True) # 0 sampled_frame_data = worker.frame_times, worker.frame_indices, worker.compressed_frames # save results keyframes_data_filename = keyframes_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" MiscHelper.dump_save(sampled_frame_data, keyframes_data_filename)
def main(): # usage check if len(sys.argv) < 2: print("Usage:") print("") print("\tpython {0:s} config".format(sys.argv[0])) print("") print("Where") print("\tconfig:\tPath to AccessMath configuration file") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return # inputs / output paths output_dir = config.get_str("OUTPUT_PATH") temporal_segments_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR") keyframes_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_KEYFRAMES_DIR") fg_mask_dir = output_dir + "/" + config.get_str( "SPEAKER_FG_ESTIMATION_MASK_DIR") summaries_dir = output_dir + "/" + database.output_summaries os.makedirs(summaries_dir, exist_ok=True) summary_prefix = summaries_dir + "/" + config.get_str( "SPEAKER_SUMMARY_PREFIX") + "_" + database.name + "_" # current dataset .... dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] print("... preparing summary generator ...") summ_generator = SummaryGenerator(config) for current_lecture in testing_set: print("") print("Processing: " + current_lecture.title) # get all inputs .... # read segment data .... segments_data_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" video_segment_data = MiscHelper.dump_load(segments_data_filename) # read key-frames data ... keyframes_data_filename = keyframes_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" video_keyframes_data = MiscHelper.dump_load(keyframes_data_filename) # read mask data ... fg_mask_filename = fg_mask_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" fg_mask_png = MiscHelper.dump_load(fg_mask_filename) fg_mask = cv2.imdecode(fg_mask_png, cv2.IMREAD_GRAYSCALE) output_prefix = summary_prefix + current_lecture.title.lower() summ_generator.export_summary(database, current_lecture, video_segment_data, video_keyframes_data, fg_mask, output_prefix)
def main(): # usage check if len(sys.argv) < 2: print("Usage:") print("") print("\tpython {0:s} config [gt_labels]".format(sys.argv[0])) print("") print("Where") print("\tconfig:\tPath to AccessMath configuration file") print("\tgt_labels:\tuse ground truth action labels (Default= False)") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return output_dir = config.get_str("OUTPUT_PATH") video_metadata_dir = output_dir + "/" + config.get_str( "SPEAKER_ACTION_VIDEO_META_DATA_DIR") action_class_probabilities_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR") output_bboxes_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR") temporal_segments_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR") os.makedirs(temporal_segments_dir, exist_ok=True) dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] valid_actions = config.get("SPEAKER_VALID_ACTIONS") if len(sys.argv) >= 3: use_ground_truth = int(sys.argv[2]) > 0 else: use_ground_truth = False for current_lecture in testing_set: info_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + current_lecture.title + ".csv" video_info = MiscHelper.dump_load(info_filename) segmenter = VideoSegmenter.FromConfig(config, video_info["width"], video_info["height"]) # read label data .... prob_info = ResultReader.read_actions_probabilities_file( proba_filename, valid_actions) segments, gt_actions, pred_actions, prob_actions = prob_info # read bbox data ... bbox_filename = output_bboxes_dir + "/" + database.name + "_" + current_lecture.title + ".csv" frame_idxs, frame_actions, body_bboxes, rh_bboxes = ResultReader.read_bbox_file( bbox_filename, use_ground_truth) # (splits_frames, video_keyframes) video_data = segmenter.get_keyframes(pred_actions, segments, frame_idxs, body_bboxes, rh_bboxes) print("") print("video key_frames") print(video_data[0]) print(video_data[1]) print("") output_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" MiscHelper.dump_save(video_data, output_filename)